In [15]:
import requests
import pandas as pd
from bs4 import BeautifulSoup as BS

#constants
#=========

#tbody tag only occurs once on page (tbody is short for table body)
RESULT_TABLE_TAG = 'tbody'

#link tags (link to specific fire incident page)
FIRE_NAMES_TAG = 'th'

#specify certain request headers so the website doesn't 403 our request and we receive an xml response
headers = {  'Connection': 'close', 'Accept': 'application/xml', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0'} 

#other constants
CURRENT_YEAR_URL = 'https://www.fire.ca.gov/incidents'
PREV_YEAR_URL = 'https://www.fire.ca.gov/incidents/2022/'
INCIDENT_URL_BASE = PREV_YEAR_URL #TODO:: change to CURRENT_YEAR_URL when done testing
LAT_LONG_TEXT = 'Latitude / Longitude'
CSV_COLUMN_NAMES = ['name','county','date_started','latitude','longitude']

#/constants
#==========

def update_data_file():
    #get webpage xml from response
    with requests.Session() as s:
        response = s.get(PREV_YEAR_URL, headers=headers)
        response_xml = response.text
    #parse xml using lxml parser
    soup = BS(response_xml, 'lxml')

    #find data table in page
    data_table = soup.find(RESULT_TABLE_TAG)

    #find all fire name elements (hypertext)
    fire_name_elements = data_table.find_all(FIRE_NAMES_TAG)

    #create lists for each fire attribute
    fire_names = []
    fire_counties = []
    fire_start_dates = []
    fire_latitudes = []
    fire_longitudes = []
    incident_urls = []
    #iterate through table elements and store text
    for fire_name_element in fire_name_elements:
        fire_name_text = fire_name_element.contents[1].text.strip()
        print(fire_name_text)
        fire_county_name_element = fire_name_element.next_sibling.next_sibling #idk why it needs an extra .next_sibling but it does
        fire_date_started_element = fire_county_name_element.next_sibling.next_sibling
        fire_names.append(fire_name_text) #.child because fire names have an extra parent wrapper
        fire_counties.append(fire_county_name_element.text.strip())
        fire_start_dates.append(fire_date_started_element.text.strip())
        split_date = fire_date_started_element.text.split('/')
        #TODO:: use a regex to remove all alphanumeric characters in the fire name (to use in url - maybe only replace chars not permitted in urls?)
        stripped_date = split_date[0].lstrip('0') + "/" + split_date[1].lstrip('0') + "/" + fire_name_text.replace("(","").replace(")","").replace(" ", "-")
        incident_url = INCIDENT_URL_BASE + stripped_date
        incident_urls.append(incident_url)
        #TODO:: use forking to make the following faster
        with requests.Session() as s:
            response = s.get(incident_url, headers=headers)
            response_xml = response.text
            soup = BS(response_xml, 'lxml')
            lat_long_title_element = soup.find(string=LAT_LONG_TEXT)
            lat_long_element = lat_long_title_element.parent.next_sibling.next_sibling
            #isolate latitude and longitude and convert from string to float
            lat_long = lat_long_element.text.replace("[", "").replace("]", "").replace(" ", "").split(",")
            for val in lat_long:
                float(val)
            fire_latitudes.append(lat_long[0])
            fire_longitudes.append(lat_long[1])


    #find which fires are new
    new_incidents_available = False
    new_incidents_fire_names = []
    df = pd.read_csv('fire_data.csv')
    search_column_name = CSV_COLUMN_NAMES[0]
    new_fire_data = {key: [] for key in CSV_COLUMN_NAMES} #create a dict of new data to append to csv
    for fire_name, county_name, date_started,latitude,longitude in zip(fire_names, fire_counties, fire_start_dates,fire_latitudes,fire_longitudes):
        new_fire = df[df[search_column_name] == fire_name]
        if new_fire.empty:
            new_incidents_available = True
            new_incidents_fire_names.append(fire_name)
            new_fire_data[CSV_COLUMN_NAMES[0]].append(fire_name)
            new_fire_data[CSV_COLUMN_NAMES[1]].append(county_name)
            new_fire_data[CSV_COLUMN_NAMES[2]].append(date_started)
            new_fire_data[CSV_COLUMN_NAMES[3]].append(latitude)
            new_fire_data[CSV_COLUMN_NAMES[4]].append(longitude)
    new_fire_df = pd.DataFrame.from_dict(new_fire_data)
    new_fire_df.to_csv('fire_data.csv', mode='a', header=False, index=False)

    #note: returns a tuple of two vars
    return new_incidents_available, new_incidents_fire_names

In [16]:
import tweepy
#api_secrets.py
import api_secrets

#constants
#=========

MAX_TWEET_LENGTH = 280

#\constants
#==========

def tweet(tweet_text_list):
    # creates the tweepy Client object
    client = tweepy.Client(consumer_key=api_secrets.consumer_key, consumer_secret=api_secrets.consumer_secret, access_token=api_secrets.access_token, access_token_secret=api_secrets.access_token_secret) 
    
    # tweets the fire names
    for tweet_text in tweet_text_list:  #need tweet list since may be multiple new fires between checks
        client.create_tweet(text=tweet_text)


In [17]:
#webscraper is webscraper.py
import webscraper
#twitter_bot is twitter_bot.py
import twitter_bot
import time
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon

#globals
#=======

gdf_list = [] #a list of all geodataframes

#=======
#globals

#constants
#=========

HOUR_IN_SECONDS = 86400
CSV_COLUMN_NAMES = ['name','county','date_started','latitude','longitude']
SHAPEFILE_NAMES = ['./shapefiles/full_california_fhsz/fhszs06_3.shp']

#\constants
#==========


def main():
        read_shapefiles()
        #blank_dict = {key: [] for key in CSV_COLUMN_NAMES}
        #new_fire_df = pd.DataFrame.from_dict(blank_dict)
        #new_fire_df.to_csv('fire_data.csv', header=CSV_COLUMN_NAMES, index=False)
    #main server loop
    #while(1):
        #wait an hour between webscraping checks
        #time.sleep(HOUR_IN_SECONDS)
        new_incidents_available, new_fire_names = webscraper.update_data_file()
        if new_incidents_available == True:
            twitter_bot.tweet(compile_tweet_text(new_fire_names))

def compile_tweet_text(new_fire_names):
    tweet_text_list = []
    df = pd.read_csv('fire_data.csv')
    #print(df)
    search_column_name = CSV_COLUMN_NAMES[0]
    for new_fire_name in new_fire_names:
        fire_result = df[df[search_column_name] == new_fire_name]
        county_name = fire_result[CSV_COLUMN_NAMES[1]]
        date_started = fire_result[CSV_COLUMN_NAMES[2]]
        latitude = fire_result[CSV_COLUMN_NAMES[3]]
        longitude = fire_result[CSV_COLUMN_NAMES[4]]
        fhsz_text = ""
        if fhsz(latitude, longitude):
            fhsz_text = "The fire is in an area designated as a fire hazard zone."
        tweet_text = "ALERT\n\nNew fire: " + new_fire_name + "\nCounty: " + county_name + "\nStarted on: " + date_started + "\n" + fhsz_text
        tweet_text_list.append(tweet_text)
    return tweet_text_list

#constructs fhsz polygons from shapefiles
def read_shapefiles():
    for shapefile_name in SHAPEFILE_NAMES:
        gdf = gpd.read_file(shapefile_name) #gdf is geodataframe
        gdf_list.append(gdf)        

#returns what classification of fire hazard severity zone a location is in (none,)
def fhsz(lat,long):
   point = Point(lat,long)
   for gdf in gdf_list:
       for polygon in gdf.geometry:
           if point.within(polygon):
               print('in fhsz')

if __name__ == "__main__":
    main()

Pleasant Fire
Howard Fire
97 Fire
Manzanita Fire
Irie Fire
Garden Fire
Dutch Fire
Forward Fire
Eliza Fire
Barnes Fire
Fork Fire
Coyote Fire
Mosquito Fire
Power Fire
Rosa Fire
Radford Fire
Hill Fire
Fairview Fire
Caesar Fire
Sandia Fire
Red Fire
Tower Fire
Mountain Fire
Mill Fire
Woods Fire
Walker Fire
Branstetter Fire
Border 32
Route Fire
McCovey Fire
Gulch Fire
Ranch Fire
Still Fire
East Fire
Valley Fire
Point Fire
Pleasant Fire
Quail Fire
Los Bueyes
Wishon Fire
Eden Fire
Oak Fire
Rail Fire
Sam Fire
S-2 Fire
Rodgers Fire
3-8 Fire
Six Rivers (SRF) Lightning Complex
Smokey Fire
Springs Fire
Marmot Fire
Meamber Fire
Pebble Fire
Kelsey Fire
Shackleford Fire
Mesa Fire
Yeti and Alex Fire
Highway Fire
McKinney Fire
Apple Fire
Casner Fire
Cable Fire
Oak Fire
Anzar Fire
Slate Fire
Flynn Fire
Meadow Fire
Winding Fire
Agua Fire
Bell Fire
Riosa Fire
Border 27 Fire
Rainbow Fire
Grant Fire
Peter Fire
Herman Fire
Harbison
Porter Fire
Bay Fire
Washburn Fire
Nome Fire
Jan-Dar Fire
Electra Fire
Garriso

KeyboardInterrupt: 