# Webscraping: California wildfires 

In [1]:
from bs4 import BeautifulSoup
import requests
import time, os
import re
import pandas as pd

In [14]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager

from sqlalchemy import create_engine
from sqlalchemy import inspect

## Webscraping CAL FIRE 

In [4]:
# functions for parsing/processing 'multi' page (10 fires per page)

def get_fire_name(fire):
    try:
        name = fire.find(text=re.compile('Name')).findNext().text.strip()
        return name
    except:
        return None

def get_start_date(fire):
    try:
        start_date = fire.find_all('div', class_='responsive-table-label')[0] \
            .findNext().findNext().text
        return start_date.split('\n')[1].strip()
    except:
        return None

def get_counties(fire):
    try:
        counties = fire.find_all('div', class_='responsive-table-label')[1] \
            .findNext().text
        return counties.split('\n')[1].strip()
    except:
        return None
    
def get_acres(fire):
    try:
        acres = fire.find_all('div', class_='responsive-table-label')[2] \
            .findNext().text
        return int(acres.split('\n')[1].strip().replace(',', ''))
    except:
        return None

In [5]:
# functions for parsing/processing single/individual fire page 

def get_table_value(table, text):
    obj = table.find(text=text)
    
    if not obj:
        return None
    
    return obj.findNext().text.strip()

def get_section_value(loss_section, loss_type):
    '''Parsing 'Damages and Losses' section of single fire page'''
    
    obj = loss_section.find('h4',text=re.compile(loss_type))
    
    if not obj:
        return 0
    
    return int(obj.text.split()[0].strip().replace(',', ''))

In [7]:
def get_fires(year, pg_start, pg_end):
    '''Gets all wildfires in CA for a given year and page range'''
    
    # opening website with selenium driver
    driver = webdriver.Chrome(ChromeDriverManager().install())
    driver.get('https://www.fire.ca.gov/incidents/')
    time.sleep(1)

    headers = ['name', 'start_date', 'counties', 'acres',
        'last_updated', 'date_started', 'date_contained', 
        'lat_long', 'cause', 'destroyed', 'fatalities']
    fire_data = []
    
    # navigating to annual incident page 1
    driver.find_element_by_link_text(str(year)).click()
    time.sleep(1)
    
    # determine the number of 'multi' pages for a given year
    incident_table = driver.find_element_by_id("incidentListTable")
    num_pages = incident_table.find_elements_by_tag_name("a")[-1].text
    
    # navigate through 'multi' pages by page number - each has 10 fires
    for page_num in range(pg_start, pg_end+1):
        if page_num > 1:
            driver.find_element_by_link_text(str(page_num)).click()

        # capturing html of multiple fire page 
        soup_multi = BeautifulSoup(driver.page_source)
        
        # finding table of 10 fires using Beautiful Soup
        fire_table = soup_multi.find('div', id='incidentListTable') \
            .find_all('div', class_='responsive-table-row')
        
        if page_num == num_pages: # last page may have fewer than 10 fires
            num_fires = len(fire_table)-1 # subtract 1 because of header row
        else:
            num_fires = 10
        
        # looping through the 10 fires 
        for i in range(1, num_fires+1): # first row is header
            fire = fire_table[i]

            # parsing data points 
            name = get_fire_name(fire)
            start_date = get_start_date(fire)
            counties = get_counties(fire)
            acres = get_acres(fire)
        
            # navigate to individual (single) fire page - fire name is the link
            driver.find_element_by_link_text(name).click()
            time.sleep(1)

            # capturing html of single fire page 
            soup_single = BeautifulSoup(driver.page_source)

            # parsing a table on single fire page
            table = soup_single.find('table', class_='table table-striped')
            
            # parsing data points on single fire page
            last_updated = get_table_value(table, text='Last Updated')
            date_started = get_table_value(table, text='Date Started')
            date_contained = get_table_value(table, text='Date Contained')
            lat_long = get_table_value(table, text='Lat/Long')
            cause = get_table_value(table, text='Cause')

            # parsing 'Damages and Losses' section of single fire page
            loss_section = soup_single.find('div', class_='section incident-damages-and-losses')
            if loss_section:
                destroyed = get_section_value(loss_section, 'Structures Destroyed')
                fatalities = get_section_value(loss_section, 'Fatalities')
            else:
                destroyed, fatalities = 0,0

            # navigate back to 'multi' page
            driver.back()
            time.sleep(1)
            if page_num > 1: # 'back' goes to page 1 so need to navigate back to page of interest
                driver.find_element_by_link_text(str(page_num)).click()
                time.sleep(1)
            
            # putting scraped data into a dictionary
            fire_dict = dict(zip(headers, [name, start_date, counties, acres, 
                            last_updated, date_started, date_contained,
                            lat_long, cause, destroyed, fatalities]))
            fire_data.append(fire_dict)
    driver.quit()
    return fire_data

## Scraped fires 

### 2020 

In [8]:
# 2020 first 5 pages

fires_2020_1 = get_fires(2020, 1, 5)

26


In [34]:
fires_2020_1 = pd.DataFrame(fires_2020_1)
fires_2020_1.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Slink Fire,9/1/2020,Mono,,11/10/20 2:33 PM,09/01/20 12:38 PM,,"[38.568, -119.568]",Under Investigation,0,0
46,Hobo Fire,9/1/2020,Trinity,,09/16/20 11:23 AM,09/01/20 9:37 AM,09/10/20 11:23AM,"[40.82126, -123.12461]",,0,0
47,Hensley Fire,8/30/2020,Madera,688.0,08/30/20 7:11 PM,08/30/20 11:11 AM,08/30/20 7:11PM,"[37.08053, -119.88673]",,0,0
48,Johnson Fire,8/28/2020,Los Angeles,290.0,08/30/20 7:27 PM,08/28/20 4:21 PM,08/30/20 7:27PM,"[34.65979, -118.36411]",,0,0
49,Perry Fire,8/26/2020,Placer,9.0,08/26/20 7:35 PM,08/26/20 1:37 PM,,"[38.8825, -121.10411]",Under Investigation,0,0


In [12]:
fires_2020_1.shape

(50, 11)

In [17]:
fires_2020_2 = get_fires(2020, 6, 10)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [35]:
fires_2020_2 = pd.DataFrame(fires_2020_2)
fires_2020_2.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Stagecoach Fire,8/3/2020,Kern,7760.0,08/18/20 5:51 PM,08/03/20 5:33 PM,08/18/20 5:50PM,"[35.43044, -118.53361]",Under Investigation,0,0
46,Elsmere Fire,8/3/2020,Los Angeles,200.0,08/24/20 2:47 PM,08/03/20 3:45 PM,08/06/20 1:32PM,"[34.3428, -118.49731]",Under Investigation,0,0
47,Beale Fire,8/2/2020,Yuba,600.0,08/13/20 11:22 AM,08/02/20 10:24 PM,08/03/20 7:22AM,"[39.11307, -121.38178]",,0,0
48,Post Fire,8/2/2020,Los Angeles,120.0,08/24/20 2:47 PM,08/02/20 8:49 PM,08/04/20 2:34PM,"[34.04447, -118.21608]",,0,0
49,Sites Fire,8/2/2020,Colusa,560.0,08/05/20 7:16 AM,08/02/20 5:13 PM,08/05/20 7:16AM,"[39.31313, -122.48525]",,0,0


In [20]:
fires_2020_3 = get_fires(2020, 11, 15)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [36]:
fires_2020_3 = pd.DataFrame(fires_2020_3)
fires_2020_3.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Byron Fire,4/27/2020,Contra Costa,47,04/27/20 8:34 PM,04/27/20 2:59 PM,04/27/20 8:33PM,"[37.82066, -121.595225]",,0,0
46,Bar Fire,4/25/2020,Trinity,55,05/01/20 4:49 PM,04/25/20 2:59 PM,,"[40.63442, -122.97693]",Under Investigation,0,0
47,Red Fire,4/23/2020,Tehama,26,04/26/20 9:53 AM,04/23/20 8:52 PM,04/24/20 9:53AM,"[40.09554, -122.43905]",,0,0
48,Carter Fire,4/4/2020,Imperial,275,04/20/20 1:51 PM,04/04/20 8:22 PM,04/12/20 1:51PM,"[32.93432, -115.5865]",Under Investigation,0,0
49,South Main Fire,3/6/2020,Riverside,20,03/09/20 11:20 AM,03/06/20 12:44 PM,,"[33.65263, -117.40906]",Under Investigation,0,0


In [23]:
fires_2020_4 = get_fires(2020, 16, 20)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [37]:
fires_2020_4 = pd.DataFrame(fires_2020_4)
fires_2020_4.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Riata Fire,6/16/2020,San Luis Obispo,18,06/17/20 2:59 PM,06/16/20 2:44 PM,06/17/20 2:59PM,"[35.40953, -120.56823]",Under Investigation,0,0
46,Bitter Fire,6/16/2020,San Benito,895,06/21/20 7:39 PM,06/16/20 2:11 PM,06/21/20 7:27PM,"[36.3011, -120.92925]",Under Investigation,0,0
47,Avila Fire,6/15/2020,San Luis Obispo,445,07/24/20 3:34 PM,06/15/20 4:44 PM,06/19/20 7:27AM,"[35.17977, -120.69959]",Under Investigation,0,0
48,Drum Fire,6/14/2020,Santa Barbara,696,06/16/20 7:22 PM,06/14/20 3:03 PM,,"[34.63309, -120.28867]",Under Investigation,0,0
49,Hopland Fire,6/13/2020,Mendocino,34,06/14/20 7:40 AM,06/13/20 6:52 PM,06/14/20 7:36AM,"[39.14783, -80.49163]",,0,0


In [24]:
fires_2020_5 = get_fires(2020, 21, 25)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [38]:
fires_2020_5 = pd.DataFrame(fires_2020_5)
fires_2020_5.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Byron Fire,4/27/2020,Contra Costa,47,04/27/20 8:34 PM,04/27/20 2:59 PM,04/27/20 8:33PM,"[37.82066, -121.595225]",,0,0
46,Bar Fire,4/25/2020,Trinity,55,05/01/20 4:49 PM,04/25/20 2:59 PM,,"[40.63442, -122.97693]",Under Investigation,0,0
47,Red Fire,4/23/2020,Tehama,26,04/26/20 9:53 AM,04/23/20 8:52 PM,04/24/20 9:53AM,"[40.09554, -122.43905]",,0,0
48,Carter Fire,4/4/2020,Imperial,275,04/20/20 1:51 PM,04/04/20 8:22 PM,04/12/20 1:51PM,"[32.93432, -115.5865]",Under Investigation,0,0
49,South Main Fire,3/6/2020,Riverside,20,03/09/20 11:20 AM,03/06/20 12:44 PM,,"[33.65263, -117.40906]",Under Investigation,0,0


In [54]:
# last 8 fires

fires_2020_6 = get_fires(2020, 26, 26, 8)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [55]:
fires_2020_6 = pd.DataFrame(fires_2020_6)
fires_2020_6.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
3,Baseball Fire,2/25/2020,Glenn and Mendocino,211.0,03/09/20 11:22 AM,02/25/20 2:17 PM,03/03/20 11:21AM,"[39.75638899, -122.94111111]",Under Investigation,0,0
4,Wood Fire,2/23/2020,Lassen,57.0,02/24/20 2:45 PM,02/23/20 5:50 PM,02/23/20 2:45PM,"[40.62155, -120.49392]",Under Investigation,0,0
5,Beegum Fire,2/23/2020,Tehama,75.0,02/24/20 2:32 PM,02/23/20 8:35 AM,02/24/20 2:32PM,"[40.33583333, -122.84722222]",Under Investigation,0,0
6,Antelope Fire,2/17/2020,Sierra,102.0,02/24/20 2:41 PM,02/17/20 3:04 PM,02/20/20 2:40PM,"[39.6923001, -120.3291855]",,0,0
7,Bear Fire,2/15/2020,Humboldt,15.0,02/16/20 9:24 AM,02/15/20 5:48 PM,,"[40.511092, -123.6378411]",,0,0


In [59]:
# combining 2020 data

fires_2020 = pd.concat([fires_2020_1, fires_2020_2, fires_2020_3, fires_2020_4,
           fires_2020_5, fires_2020_6])
fires_2020.head()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
0,Sanderson Fire,12/12/2020,Riverside,1933.0,12/14/20 5:50 PM,12/13/20 1:00 AM,12/14/20 5:50PM,"[33.93244, -117.03459]",Under Investigation,0,0
1,Cornell Fire,12/7/2020,Ventura,174.0,12/17/20 11:18 AM,12/07/20 11:44 AM,12/08/20 11:17AM,"[34.33622, -119.078]",,0,0
2,Thomas Fire,12/3/2020,Lassen,24.0,12/03/20 6:36 PM,12/03/20 1:32 PM,12/03/20 5:40PM,"[41.591948, -120.374514]",,0,0
3,Cerritos Fire,12/2/2020,Riverside,200.0,12/04/20 6:45 PM,12/03/20 2:06 AM,,"[33.773754, -117.051463]",Under Investigation,0,0
4,Bond Fire,12/2/2020,Orange,6686.0,12/17/20 1:15 PM,12/02/20 10:14 PM,12/10/20 6:59PM,"[33.743842, -117.674967]",,31,0


In [60]:
fires_2020.shape

(258, 11)

In [61]:
fires_2020.to_csv('fires_2020.csv', index=False)

#### Fixing 2020 

In [8]:
# coming back to re-run fires_2020_3 because it read the wrong pages

fires_2020_3_fix = get_fires(2020, 11, 15)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [9]:
fires_2020_3_fix = pd.DataFrame(fires_2020_3_fix)
fires_2020_3_fix.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Tripp Fire,7/9/2020,Riverside,10.0,07/11/20 7:44 AM,07/09/20 2:27 PM,07/09/20 7:44AM,"[33.551838, -116.7464168]",Under Investigation,0,0
46,Nail Fire,7/7/2020,Colusa,25.0,07/28/20 3:55 PM,07/07/20 5:32 PM,07/09/20 3:20PM,"[39.35498, -122.65813]",Under Investigation,0,0
47,Brandon Fire,7/7/2020,El Dorado,32.0,07/08/20 8:33 PM,07/07/20 4:06 PM,,"[38.58488, -120.941267]",Under Investigation,0,0
48,Mountain Meadow Fire,7/6/2020,Solano,75.0,07/28/20 11:50 AM,07/06/20 6:22 PM,07/09/20 11:50AM,"[38.21614, -122.12447]",,0,0
49,Patterson Fire,7/6/2020,San Joaquin,150.0,07/06/20 7:04 PM,07/06/20 4:04 PM,07/06/20 7:02PM,"[37.71984, -121.53524]",Under Investigation,0,0


In [16]:
fires_2020_3_fix.head()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
0,North Fire,8/2/2020,Lassen,6882.0,08/13/20 11:27 AM,08/02/20 4:51 PM,08/10/20 11:27AM,"[40.36764, -120.44811]",Under Investigation,0,0
1,Water Fire,8/2/2020,Riverside,52.0,08/04/20 3:13 PM,08/02/20 3:11 PM,08/04/20 3:12PM,"[33.924086, -116.641846]",,0,0
2,Sophia Fire,8/2/2020,El Dorado,36.0,08/02/20 9:12 AM,08/02/20 9:12 AM,08/02/20 9:11AM,"[38.686478, -121.103299]",,0,0
3,Pond Fire,8/1/2020,San Luis Obispo,1962.0,08/09/20 7:17 PM,08/01/20 6:44 PM,08/09/20 7:17PM,"[35.43128, -120.47346]",,0,0
4,Castaic Fire,8/1/2020,Los Angeles,178.0,11/10/20 2:25 PM,08/01/20 6:04 PM,09/14/20 2:25PM,"[34.51853, -118.59699]",,0,0


In [30]:
# looks like 2020_3 ran the wrong pages (leading to duplicates) - so coming back to fix that

fires_2020 = pd.read_csv('fires_2020_old.csv')
fires_2020.head()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
0,Sanderson Fire,12/12/2020,Riverside,1933.0,12/14/20 5:50 PM,12/13/20 1:00 AM,12/14/20 5:50PM,"[33.93244, -117.03459]",Under Investigation,0,0
1,Cornell Fire,12/7/2020,Ventura,174.0,12/17/20 11:18 AM,12/07/20 11:44 AM,12/08/20 11:17AM,"[34.33622, -119.078]",,0,0
2,Thomas Fire,12/3/2020,Lassen,24.0,12/03/20 6:36 PM,12/03/20 1:32 PM,12/03/20 5:40PM,"[41.591948, -120.374514]",,0,0
3,Cerritos Fire,12/2/2020,Riverside,200.0,12/04/20 6:45 PM,12/03/20 2:06 AM,,"[33.773754, -117.051463]",Under Investigation,0,0
4,Bond Fire,12/2/2020,Orange,6686.0,12/17/20 1:15 PM,12/02/20 10:14 PM,12/10/20 6:59PM,"[33.743842, -117.674967]",,31,0


In [31]:
fires_2020.shape

(258, 11)

In [33]:
fires_2020 = pd.concat([fires_2020[:100], fires_2020_3_fix, fires_2020[150:]]) \
    .reset_index(drop=True) 
fires_2020.shape

(258, 11)

In [37]:
fires_2020.to_csv('fires_2020.csv', index=False)

### 2019 

In [25]:
fires_2019_1 = get_fires(2019, 1, 5)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [39]:
fires_2019_1 = pd.DataFrame(fires_2019_1)
fires_2019_1.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Sandalwood Fire,10/10/2019,Riverside,1011.0,11/01/19 3:14 PM,10/10/19 3:38 PM,,"[33.9925, -117.059167]",,74,2
46,Reche Fire,10/10/2019,Riverside,350.0,10/14/19 10:56 AM,10/10/19 2:18 PM,10/13/19 7:30AM,"[33.975405, -117.207068]",Under Investigation,0,0
47,Eagle Fire,10/10/2019,Riverside,9.0,10/10/19 6:11 PM,10/10/19 12:08 PM,10/10/19 6:11PM,"[33.827979, -117.499619]",Under Investigation,0,0
48,Merrill Fire,10/9/2019,Contra Costa,40.0,10/12/19 2:00 PM,10/10/19 6:42 AM,10/10/19 12:00PM,"[37.828267, -122.103867]",,0,0
49,Inghram Fire,10/9/2019,Tehama,10.0,10/09/19 9:52 AM,10/09/19 9:52 AM,10/08/19 9:52AM,"[39.847708, -122.160416]",,0,0


In [27]:
fires_2019_2 = get_fires(2019, 6, 10)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [40]:
fires_2019_2 = pd.DataFrame(fires_2019_2)
fires_2019_2.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Cow Fire,9/6/2019,Inyo,1975,01/10/20 4:08 PM,09/06/19 12:23 PM,11/21/19 4:08PM,"[36.284, -118.228]",Lightning,0,0
46,Lone Fire,9/6/2019,Modoc,5737,09/26/19 2:46 PM,09/06/19 11:34 AM,09/13/19 7:19AM,"[41.748, -121.056]",Lightning,0,0
47,Springs Fire,9/6/2019,Mono,4840,10/24/19 11:18 AM,09/06/19 11:24 AM,,"[37.826, -118.872]",Lightning,0,0
48,South Fire,9/5/2019,Tehama,5332,01/10/20 4:13 PM,09/05/19 7:59 PM,12/02/19 4:12PM,"[40.109, -122.789]",Lightning,0,0
49,Flood Fire,9/5/2019,San Joaquin,44,09/08/19 7:56 PM,09/05/19 2:33 PM,09/05/19 7:56PM,"[38.017138, -120.945044]",Under Investigation,0,0


In [28]:
fires_2019_3 = get_fires(2019, 11, 15)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [41]:
fires_2019_3 = pd.DataFrame(fires_2019_3)
fires_2019_3.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,3-2 Willow (Previously H-1 Fire),8/7/2019,Lassen,107,10/02/19 4:07 PM,08/07/19 6:48 PM,,"[40.900458, -121.320838]",Under Investigation,0,0
46,R-1 Fire,8/7/2019,Lassen,130,09/26/19 2:47 PM,08/07/19 5:21 PM,,"[40.886677, -120.336977]",Under Investigation,0,0
47,Sellers Fire,8/7/2019,Contra Costa,58,09/26/19 2:48 PM,08/07/19 3:13 PM,,"[37.968494, -121.678172]",Under Investigation,0,0
48,Toro Fire,8/5/2019,Riverside,94,08/06/19 6:42 PM,08/05/19 3:36 PM,08/06/19 6:42PM,"[33.740219, -117.333728]",Under Investigation,0,0
49,Caliente Fire,8/5/2019,Kern,29,08/06/19 6:43 PM,08/05/19 1:56 PM,08/06/19 6:43PM,"[35.287373, -118.623651]",Under Investigation,0,0


In [29]:
fires_2019_4 = get_fires(2019, 16, 20)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [42]:
fires_2019_4 = pd.DataFrame(fires_2019_4)
fires_2019_4.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Sargent Fire,7/9/2019,Monterey,190,07/10/19 4:05 PM,07/09/19 9:43 PM,07/10/19 4:04PM,"[35.95191, -120.8092]",Under Investigation,0,0
46,Gillis Fire,7/8/2019,San Luis Obispo,974,07/10/19 7:11 AM,07/08/19 4:44 PM,07/09/19 6:22PM,"[35.63111111, -120.26916667]",Under Investigation,0,0
47,Lake Fire,7/8/2019,Mariposa,244,07/15/19 6:34 PM,07/08/19 3:30 PM,,"[37.524378, -120.296551]",Under Investigation,0,0
48,Far Fire,7/6/2019,Yuba,38,07/08/19 7:51 AM,07/06/19 8:21 PM,,"[39.06066, -121.35014]",Under Investigation,0,0
49,Snowstorm Fire,7/5/2019,Lassen,263,07/08/19 7:49 AM,07/05/19 4:37 PM,07/06/19 7:49AM,"[40.679318, -120.392768]",Under Investigation,0,0


In [30]:
fires_2019_5 = get_fires(2019, 21, 25)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [43]:
fires_2019_5 = pd.DataFrame(fires_2019_5)
fires_2019_5.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Spruce Fire,6/1/2019,Tulare,155,06/11/19 2:48 PM,06/01/19 3:49 PM,06/11/19 2:48PM,"[36.28001, -119.10974]",Unknown,0,0
46,Canyon Fire,5/30/2019,Santa Clara,144,06/11/19 2:40 PM,05/30/19 4:19 PM,06/11/19 2:40PM,"[37.45899, -121.22093]",Unknown,0,0
47,Diversion Fire,5/30/2019,Riverside,45,06/03/19 8:48 AM,05/30/19 12:41 PM,06/03/19 8:48AM,"[33.69078, -114.57265]",Unknown,0,0
48,66 Fire,5/29/2019,Riverside,55,06/24/19 11:46 AM,05/29/19 9:06 PM,06/24/19 11:46AM,"[33.57039, -116.1466]",Unknown,0,0
49,Hill Fire,5/29/2019,Fresno,121,06/05/19 3:07 PM,05/29/19 5:18 PM,06/05/19 3:07PM,"[36.66909, -119.3046]",Unknown,0,0


In [62]:
fires_2019_6 = get_fires(2019, 26, 26)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [63]:
fires_2019_6 = pd.DataFrame(fires_2019_6)
fires_2019_6.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
5,Sugar Fire,5/10/2019,Placer,65,05/24/19 2:28 PM,05/10/19 2:14 PM,05/24/19 2:28PM,"[39.11536, -120.76088]",Unknown,0,0
6,Refuge Fire,5/7/2019,Kern,2500,05/09/19 9:37 AM,05/07/19 3:47 PM,05/09/19 9:37AM,"[35.72057, -119.62762]",Unknown,0,0
7,Vulcan Fire,5/3/2019,Kern,172,05/08/19 8:48 AM,05/03/19 3:04 PM,05/08/19 8:48AM,"[35.03855, -119.19454]",Unknown,0,0
8,Meridian Fire,5/1/2019,Butte,50,05/08/19 8:47 AM,05/01/19 4:46 PM,05/08/19 8:47AM,"[39.85484, -121.905]",Unknown,0,0
9,Woody Fire,4/30/2019,Kern,115,05/08/19 8:47 AM,04/30/19 6:37 PM,05/08/19 8:47AM,"[35.65189, -118.92545]",Unknown,0,0


In [64]:
# last 4 fires

fires_2019_7 = get_fires(2019, 27, 27, 4)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [65]:
fires_2019_7 = pd.DataFrame(fires_2019_7)
fires_2019_7.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
0,Cana Fire,4/30/2019,Butte,10,05/08/19 8:47 AM,04/30/19 12:20 PM,05/08/19 8:47AM,"[39.83958, -121.957]",Unknown,0,0
1,York Fire,4/28/2019,Kings,161,06/24/19 11:45 AM,04/28/19 10:20 PM,06/24/19 11:45AM,"[35.81778, -120.09715]",Unknown,0,0
2,Girasol Fire,4/9/2019,Riverside,30,04/10/19 10:23 AM,04/09/19 10:18 PM,04/10/19 10:23AM,,Unknown,0,0
3,Pilot Fire,1/1/2019,Humboldt,30,01/15/19 10:38 AM,01/01/19 2:14 PM,01/15/19 10:38AM,"[40.61805556, -123.67555556]",Unknown,0,0


In [66]:
# combining 2019 data

fires_2019 = pd.concat([fires_2019_1, fires_2019_2, fires_2019_3, fires_2019_4,
           fires_2019_5, fires_2019_6, fires_2019_7])
fires_2019.head()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
0,Cave Fire (No Longer a CAL FIRE Incident),11/25/2019,Santa Barbara,3126.0,12/16/19 8:23 AM,11/25/19 7:59 PM,12/14/19 8:22AM,"[34.5025, -119.785]",,0,0
1,Foothills Fire,11/25/2019,Placer,355.0,11/25/19 8:31 PM,11/25/19 12:39 PM,,"[38.838992, -121.325842]",,0,0
2,Cashe Fire,11/25/2019,Yolo,,12/03/19 4:35 PM,11/25/19 12:02 PM,,"[38.734634, -121.729691]",,0,0
3,Eagle Fire,11/4/2019,Lake,75.0,11/14/19 6:26 PM,11/05/19 6:52 AM,11/06/19 6:30PM,"[39.100408, -122.496548]",,0,0
4,Ranch Fire,11/3/2019,Tehama,2534.0,11/18/19 8:28 AM,11/03/19 2:16 PM,11/14/19 6:02PM,"[40.036379, -122.637837]",,0,0


In [67]:
fires_2019.shape

(264, 11)

In [68]:
fires_2019.to_csv('fires_2019.csv', index=False)

### 2018 

In [31]:
fires_2018_1 = get_fires(2018, 1, 5)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [44]:
fires_2018_1 = pd.DataFrame(fires_2018_1)
fires_2018_1.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Fork Fire,9/9/2018,Los Angeles,166.0,01/04/19 9:05 AM,09/09/18 12:10 PM,01/04/19 9:05AM,"[34.2325, -117.846111]",Unknown,0,0
46,Snell Fire,9/8/2018,Napa,2490.0,01/04/19 9:06 AM,09/08/18 2:29 PM,01/04/19 9:06AM,"[38.69601, -122.44468]",Under Investigation,0,0
47,Tulloch Fire,9/8/2018,Tuolumne,573.0,01/04/19 9:06 AM,09/08/18 1:34 PM,01/04/19 9:06AM,"[37.83388, -120.61746]",Unknown,0,0
48,Delta Fire,9/5/2018,Shasta and Trinity,63311.0,10/25/19 10:17 AM,09/05/18 12:51 PM,01/04/19 9:07AM,"[40.9425, -122.43]",Unknown,42,0
49,Kerlin Fire,9/4/2018,Trinity,1751.0,01/04/19 9:08 AM,09/04/18 3:20 PM,01/04/19 9:08AM,"[40.616251, -123.52019]",Unknown,0,0


In [32]:
fires_2018_2 = get_fires(2018, 6, 10)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [48]:
fires_2018_2 = pd.DataFrame(fires_2018_2)
fires_2018_2.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Tarina Fire,8/3/2018,Kern,2950,01/04/19 9:26 AM,08/03/18 2:48 PM,01/04/19 9:26AM,"[35.37444, -118.83556]",Unknown,0,0
46,Donnell Fire,8/1/2018,Tuolumne,36450,01/04/19 9:26 AM,08/01/18 5:48 PM,01/04/19 9:26AM,"[38.349, -119.929]",Unknown,0,0
47,Western Fire,8/1/2018,Mendocino,106,01/04/19 9:26 AM,08/01/18 3:53 PM,01/04/19 9:26AM,"[38.8802, -123.0496]",Unknown,0,0
48,Omega Fire,8/1/2018,El Dorado,66,10/25/19 10:09 AM,08/01/18 2:40 PM,01/04/19 9:27AM,"[38.82128, -121.03918]",Unknown,0,0
49,Bumper Fire,8/1/2018,El Dorado,67,01/04/19 9:28 AM,08/01/18 1:57 PM,01/04/19 9:28AM,"[38.645, -120.874167]",Unknown,0,0


In [45]:
fires_2018_3 = get_fires(2018, 11, 15)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [49]:
fires_2018_3 = pd.DataFrame(fires_2018_3)
fires_2018_3.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Tuscan Fire,7/15/2018,Tehama,50,01/04/19 9:43 AM,07/15/18 5:15 PM,01/04/19 9:43AM,"[40.27754, -122.11375]",Unknown,0,0
46,3-18 Fire,7/15/2018,Lassen,32,01/04/19 9:42 AM,07/15/18 4:45 PM,01/04/19 9:42AM,"[41.08583, -121.05722]",Unknown,0,0
47,Johnson Fire,7/15/2018,Humboldt,16,01/04/19 9:43 AM,07/15/18 4:14 PM,01/04/19 9:43AM,"[41.34453, -123.85404]",Unknown,0,0
48,3-10 Fire,7/15/2018,Lassen,56,01/04/19 9:44 AM,07/15/18 11:55 AM,01/04/19 9:44AM,"[40.95234, -121.21597]",Unknown,0,0
49,Steamboat Fire,7/15/2018,Siskiyou,224,01/04/19 9:44 AM,07/15/18 9:58 AM,01/04/19 9:44AM,"[41.70877, -122.47293]",Lightning,0,0


In [46]:
fires_2018_4 = get_fires(2018, 16, 20)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [50]:
fires_2018_4 = pd.DataFrame(fires_2018_4)
fires_2018_4.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Flat Fire,6/28/2018,Trinity,300,01/04/19 9:59 AM,06/28/18 6:01 PM,01/04/19 9:59AM,"[40.60402, -122.9144]",Unknown,0,0
46,Cherry Fire,6/27/2018,Siskiyou,63,01/04/19 9:59 AM,06/27/18 7:46 PM,01/04/19 9:59AM,"[41.83359, -122.99272]",Unknown,0,0
47,Hyatt Fire,6/27/2018,Lassen,441,10/25/19 9:31 AM,06/27/18 3:09 PM,01/04/19 9:59AM,"[40.316137, -120.45053]",Unknown,4,0
48,Anchor Fire,6/27/2018,Fresno,76,01/04/19 9:59 AM,06/27/18 12:39 PM,01/04/19 9:59AM,"[36.696879, -119.304499]",Unknown,0,0
49,Shippee Fire,6/26/2018,Butte,347,01/04/19 9:59 AM,06/26/18 12:56 PM,01/04/19 9:59AM,"[39.59872, -121.78208]",Unknown,0,0


In [51]:
fires_2018_5 = get_fires(2018, 21, 25)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [52]:
fires_2018_5 = pd.DataFrame(fires_2018_5)
fires_2018_5.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Recycle Fire,6/6/2018,San Diego,265,01/04/19 10:11 AM,06/06/18 9:46 AM,01/04/19 10:11AM,"[32.63046, -116.46674]",Campfire,0,0
46,Vista Fire,6/5/2018,San Diego,12,01/04/19 10:11 AM,06/05/18 1:45 PM,01/04/19 10:11AM,"[33.40769, -117.22974]",Unknown,0,0
47,Pallet Fire,6/5/2018,Sonoma,10,01/04/19 10:12 AM,06/05/18 1:03 PM,01/04/19 10:12AM,"[38.245151, -122.44408]",Unknown,0,0
48,Panoche Fire,6/4/2018,San Benito,64,01/04/19 10:13 AM,06/04/18 11:17 PM,01/04/19 10:13AM,"[36.64832, -121.03253]",Unknown,0,0
49,ONeals Fire,6/4/2018,Madera,300,01/04/19 10:14 AM,06/04/18 5:44 PM,01/04/19 10:14AM,"[37.10181, -119.623981]",Unknown,0,0


In [73]:
fires_2018_6 = get_fires(2018, 26, 30)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [74]:
fires_2018_6 = pd.DataFrame(fires_2018_6)
fires_2018_6.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Main Fire,4/28/2018,Riverside,73.0,01/04/19 10:26 AM,04/28/18 2:26 PM,01/04/19 10:26AM,"[33.67815, -117.32281]",Unknown,0,0
46,French Fire,4/26/2018,Plumas,20.0,01/04/19 10:27 AM,04/26/18 3:00 PM,01/04/19 10:27AM,"[39.89075, -120.18499]",Unknown,0,0
47,Grape Fire,4/24/2018,Humboldt,,01/03/19 3:23 PM,04/24/18 4:59 PM,01/03/19 3:23PM,"[40.70258, -123.55012]",Unknown,0,0
48,Lago Fire,4/22/2018,Riverside,18.0,01/03/19 1:10 PM,04/22/18 1:35 PM,01/03/19 1:10PM,"[33.88744, -117.16232]",Unknown,0,0
49,Moffat Fire,4/19/2018,Inyo,1265.0,01/03/19 1:09 PM,04/19/18 10:28 AM,01/03/19 1:09PM,"[36.71537, -118.08449]",Unknown,0,0


In [75]:
# last 10 fires

fires_2018_7 = get_fires(2018, 31, 31)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [76]:
fires_2018_7 = pd.DataFrame(fires_2018_7)
fires_2018_7.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
5,Pleasant Fire,2/18/2018,Inyo,2070.0,01/03/19 9:43 AM,02/18/18 2:12 PM,01/03/19 9:43AM,"[37.40208, -118.50235]",Unknown,0,0
6,Wilcox Fire,2/18/2018,Tulare,33.0,01/03/19 9:44 AM,02/18/18 1:45 PM,01/03/19 9:44AM,,Unknown,0,0
7,Haycamp Fire,2/7/2018,Monterey,50.0,01/03/19 9:43 AM,02/07/18 3:53 PM,01/03/19 9:43AM,"[35.96689, -121.28185]",Unknown,0,0
8,Montecito Flooding / Mudflows,1/8/2018,Santa Barbara and Ventura,,01/03/19 9:43 AM,01/09/18 4:00 AM,01/03/19 9:43AM,,Unknown,0,0
9,Bridge Fire,12/31/1969,Marin,45.0,01/04/19 9:22 AM,12/31/69 4:00 PM,01/04/19 9:22AM,"[38.07135, -122.76751]",Unknown,0,0


In [78]:
# combining 2018 data

fires_2018 = pd.concat([fires_2018_1, fires_2018_2, fires_2018_3, fires_2018_4,
           fires_2018_5, fires_2018_6, fires_2018_7])
fires_2018.head()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
0,Honey Flooding,11/29/2018,Butte,,07/09/19 1:45 PM,11/29/18 11:30 PM,01/04/19 8:42AM,"[39.71217, -121.77385]",Unknown,0,0
1,Bear Fire,11/16/2018,Santa Cruz,10.0,01/04/19 8:44 AM,11/16/18 11:01 AM,01/04/19 8:44AM,"[37.12857, -122.12036]",Unknown,0,0
2,Morgan Fire,11/15/2018,Contra Costa,20.0,01/04/19 8:44 AM,11/15/18 1:31 PM,01/04/19 8:44AM,"[37.81111, -121.7944]",Unknown,0,0
3,Briggs Fire,11/14/2018,Ventura,150.0,01/04/19 8:44 AM,11/15/18 1:39 AM,01/04/19 8:44AM,"[34.30166667, -119.09916667]",Unknown,0,0
4,Niles Fire,11/14/2018,Alameda,20.0,01/04/19 8:45 AM,11/14/18 2:56 PM,01/04/19 8:45AM,"[37.61158, -121.94182]",Unknown,0,0


In [79]:
fires_2018.shape

(310, 11)

In [80]:
fires_2018.to_csv('fires_2018.csv', index=False)

### 2017 

In [56]:
fires_2017_1 = get_fires(2017, 1, 5)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [81]:
fires_2017_1 = pd.DataFrame(fires_2017_1)
fires_2017_1.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Portola Fire,10/10/2017,Riverside,23,01/09/18 1:32 PM,10/10/17 2:57 PM,01/09/18 1:32PM,"[33.50488, -117.02132]",Unknown,0,0
46,Garden Fire,10/10/2017,Nevada,19,01/09/18 1:33 PM,10/10/17 12:30 PM,01/09/18 1:33PM,"[39.06414, -121.13737]",Unknown,0,0
47,Honey Fire,10/9/2017,Butte,150,01/09/18 1:34 PM,10/09/17 3:05 PM,01/09/18 1:34PM,"[39.74679, -121.67261]",Unknown,0,0
48,Pozo Fire,10/9/2017,San Luis Obispo,45,01/09/18 1:34 PM,10/09/17 2:30 PM,01/09/18 1:34PM,"[35.28738, -120.44448]",Unknown,0,0
49,37 Fire,10/9/2017,Sonoma,1660,01/09/18 1:34 PM,10/09/17 2:00 PM,01/09/18 1:34PM,"[38.14242, -122.47301]",Unknown,0,0


In [69]:
fires_2017_2 = get_fires(2017, 6, 10)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [70]:
fires_2017_2 = pd.DataFrame(fires_2017_2)
fires_2017_2.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,French Fire,9/17/2017,El Dorado,136.0,01/09/18 1:21 PM,09/17/17 3:18 PM,01/09/18 1:21PM,"[38.5758, -120.915]",Unknown,0,0
46,Northern Region Lightning Incident,9/13/2017,,10000.0,01/09/18 1:20 PM,09/13/17 9:45 AM,01/09/18 1:20PM,,Unknown,0,0
47,Buck Fire,9/12/2017,Trinity,13417.0,01/09/18 1:21 PM,09/12/17 5:42 PM,01/09/18 1:21PM,"[40.2275, -123.03583]",Unknown,0,0
48,Pacific Fire,9/12/2017,Butte,12.0,01/09/18 1:21 PM,09/12/17 5:24 PM,01/09/18 1:21PM,"[39.44257, -121.60683]",Unknown,0,0
49,Slides Fire,9/12/2017,Lake,58.0,07/01/19 1:07 PM,09/12/17 4:08 PM,01/09/18 1:20PM,"[39.42604, -123.03344]",Unknown,0,0


In [71]:
fires_2017_3 = get_fires(2017, 11, 15)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [72]:
fires_2017_3 = pd.DataFrame(fires_2017_3)
fires_2017_3.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Bodega Fire,8/27/2017,Sonoma,18.0,01/09/18 12:45 PM,08/27/17 1:30 PM,01/09/18 12:45PM,"[38.26294, -122.71998]",Unknown,0,0
46,Vestal Fire,8/26/2017,Tehama,40.0,01/09/18 12:45 PM,08/26/17 3:03 PM,01/09/18 12:45PM,"[40.29831, -122.70449]",Unknown,0,0
47,Flat Fire,8/26/2017,Fresno,111.0,01/09/18 12:45 PM,08/26/17 2:31 PM,01/09/18 12:45PM,"[36.93332, -119.43376]",Unknown,0,0
48,Peg Fire,8/26/2017,Lassen,157.0,01/09/18 12:44 PM,08/26/17 12:35 PM,01/09/18 12:44PM,"[40.80968, -120.48641]",Unknown,0,0
49,Montezuma Fire,8/26/2017,San Diego,62.0,01/09/18 12:44 PM,08/26/17 11:30 AM,01/09/18 12:44PM,"[33.20988, -116.46989]",Unknown,0,0


In [82]:
fires_2017_4 = get_fires(2017, 16, 20)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [83]:
fires_2017_4 = pd.DataFrame(fires_2017_4)
fires_2017_4.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Mile Fire,7/31/2017,San Bernardino,100,01/09/18 12:16 PM,07/31/17 2:44 PM,01/09/18 12:16PM,"[34.19076, -117.26969]",Unknown,0,0
46,Rose Fire,7/31/2017,Riverside,200,01/09/18 12:15 PM,07/31/17 2:22 PM,01/09/18 12:15PM,"[33.689092, -117.396404]",Unknown,0,0
47,Summit Complex,7/31/2017,Stanislaus,5247,07/01/19 1:07 PM,07/31/17 1:44 PM,01/09/18 12:15PM,"[38.329, -119.782]",Unknown,0,0
48,Roadrunner Fire,7/30/2017,Tulare,2289,01/09/18 12:15 PM,07/30/17 5:43 PM,01/09/18 12:15PM,"[36.0226, -118.94252]",Unknown,0,0
49,Twist Fire,7/30/2017,Tuolumne,124,01/09/18 12:14 PM,07/30/17 4:28 PM,01/09/18 12:14PM,"[37.90014, -120.36005]",Unknown,0,0


In [84]:
fires_2017_5 = get_fires(2017, 21, 25)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [85]:
fires_2017_5 = pd.DataFrame(fires_2017_5)
fires_2017_5.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Grade Fire,7/16/2017,Mendocino,900.0,01/09/18 11:56 AM,07/16/17 2:51 PM,01/09/18 11:56AM,"[39.30125, -123.28825]",Vehicle,0,0
46,Reservation Fire,7/16/2017,Monterey,40.0,01/09/18 11:56 AM,07/16/17 11:17 AM,01/09/18 11:56AM,"[36.66684, -121.74826]",Unknown,0,0
47,North Fire,7/15/2017,Shasta,10.0,01/09/18 11:56 AM,07/15/17 6:10 PM,01/09/18 11:56AM,"[40.48972, -122.38619]",Unknown,0,0
48,Bridge Fire,7/14/2017,San Bernardino,460.0,04/12/18 2:52 PM,07/14/17 2:23 PM,04/12/18 2:52PM,"[34.09773, -117.10567]",Unknown,0,0
49,Dode Fire,7/13/2017,Modoc,410.0,01/09/18 11:54 AM,07/13/17 3:23 PM,01/09/18 11:54AM,"[41.72163, -121.30451]",Unknown,0,0


In [86]:
fires_2017_6 = get_fires(2017, 26, 30)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [87]:
fires_2017_6 = pd.DataFrame(fires_2017_6)
fires_2017_6.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Valley Fire,7/4/2017,San Benito,34.0,01/09/18 11:42 AM,07/04/17 1:21 PM,01/09/18 11:42AM,"[36.85596, -121.30983]",Unknown,0,0
46,Silver Fire,7/4/2017,San Bernardino,13.0,01/09/18 11:42 AM,07/04/17 1:15 PM,01/09/18 11:42AM,"[34.304722, -117.3125]",Unknown,0,0
47,Timber Fire,7/3/2017,Riverside,30.0,01/09/18 11:42 AM,07/03/17 7:31 PM,01/09/18 11:42AM,"[33.96896, -117.25411]",Unknown,0,0
48,Lago Fire,7/3/2017,Riverside,50.0,01/09/18 11:41 AM,07/03/17 3:14 PM,01/09/18 11:41AM,"[33.87739, -117.16215]",Unknown,0,0
49,Canyon Fire,7/3/2017,Riverside,10.0,01/09/18 11:41 AM,07/03/17 1:34 PM,01/09/18 11:41AM,"[33.91806, -116.99321]",Unknown,0,0


In [88]:
fires_2017_7 = get_fires(2017, 31, 35)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [89]:
fires_2017_7 = pd.DataFrame(fires_2017_7)
fires_2017_7.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Shore Fire,6/16/2017,Monterey,32.0,01/09/18 10:34 AM,06/16/17 3:49 PM,01/09/18 10:34AM,"[35.90092, -120.93371]",Unknown,0,0
46,Springs Fire,6/16/2017,Riverside,10.0,01/09/18 10:34 AM,06/16/17 2:51 PM,01/09/18 10:34AM,,Unknown,0,0
47,Skyway Fire,6/16/2017,Butte,134.0,01/09/18 10:34 AM,06/16/17 9:27 AM,01/09/18 10:34AM,"[39.69717, -121.75417]",Unknown,0,0
48,Nuevo Fire,6/15/2017,Riverside,28.0,01/09/18 10:33 AM,06/15/17 3:51 PM,01/09/18 10:33AM,"[33.79925, -117.11659]",Unknown,0,0
49,Canyon Fire,6/15/2017,Riverside,105.0,01/09/18 10:33 AM,06/15/17 11:36 AM,01/09/18 10:33AM,"[33.950382, -116.884446]",Unknown,0,0


In [90]:
fires_2017_8 = get_fires(2017, 36, 40)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [91]:
fires_2017_8 = pd.DataFrame(fires_2017_8)
fires_2017_8.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Ribbonwood Fire,5/24/2017,San Diego,25.0,01/09/18 10:10 AM,05/24/17 3:30 PM,01/09/18 10:10AM,"[32.679, -116.29296]",Unknown,0,0
46,Moreno Fire,5/23/2017,Riverside,10.0,06/04/21 10:48 AM,05/23/17 1:13 PM,01/09/18 10:08AM,"[33.94673, -117.17773]",Unknown,0,0
47,Bitterwater Fire,5/21/2017,San Benito,21.0,01/09/18 10:07 AM,05/22/17 4:50 AM,01/09/18 10:07AM,"[36.35578, -120.99552]",Unknown,0,0
48,Smiley Fire,5/21/2017,Riverside,35.0,01/09/18 10:07 AM,05/21/17 4:12 PM,01/09/18 10:07AM,"[33.96419, -117.20507]",Unknown,0,0
49,Gate Fire,5/20/2017,San Diego,2056.0,01/09/18 10:06 AM,05/20/17 11:19 PM,01/09/18 10:06AM,"[32.65597, -116.80527]",Unknown,0,0


In [92]:
fires_2017_9 = get_fires(2017, 40, 43)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [93]:
fires_2017_9 = pd.DataFrame(fires_2017_9)
fires_2017_9.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
35,Tower Fire,4/30/2017,San Bernardino,150.0,01/09/18 9:53 AM,04/30/17 4:03 PM,01/09/18 9:53AM,"[34.30104, -117.45646]",Unknown,0,0
36,Opera Fire,4/30/2017,Riverside,1350.0,01/09/18 9:53 AM,04/30/17 3:23 PM,01/09/18 9:53AM,"[34.00566, -117.30693]",Unknown,0,0
37,Tumey Fire,4/29/2017,Fresno,126.0,01/09/18 9:52 AM,04/29/17 5:15 PM,01/09/18 9:52AM,"[36.5355, -120.6585]",Unknown,0,0
38,El Dorado Fire,4/28/2017,Fresno,976.0,01/09/18 9:52 AM,04/28/17 3:40 PM,01/09/18 9:52AM,"[36.530836, -120.206592]",Unknown,0,0
39,Soledad Fire,4/24/2017,Monterey,10.0,01/09/18 9:51 AM,04/24/17 12:06 PM,01/09/18 9:51AM,"[36.44141, -121.34695]",Unknown,0,0


In [94]:
# last 6 fires

fires_2017_10 = get_fires(2017, 44, 44, 6)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [95]:
fires_2017_10 = pd.DataFrame(fires_2017_10)
fires_2017_10.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
1,66 Fire,4/12/2017,Riverside,15.0,01/09/18 9:50 AM,04/13/17 2:30 AM,01/09/18 9:50AM,"[33.56777, -116.08003]",Unknown,0,0
2,West Fire,4/3/2017,Modoc,380.0,01/09/18 9:40 AM,04/03/17 4:00 PM,01/09/18 9:40AM,"[41.16716, -120.58342]",Unknown,0,0
3,Grace Fire,4/3/2017,Shasta,45.0,01/09/18 9:41 AM,04/03/17 4:00 PM,01/09/18 9:41AM,"[40.48712, -121.85645]",Unknown,0,0
4,Oroville Spillway,2/7/2017,Butte,,01/09/18 9:24 AM,02/07/17 2:00 PM,01/09/18 9:24AM,"[39.51158, -121.55633]",Unknown,0,0
5,Taglio Fire,12/31/1969,Merced,12.0,01/09/18 10:05 AM,12/31/69 4:00 PM,01/09/18 10:05AM,"[37.21812, -121.07761]",Unknown,0,0


In [96]:
# combining 2017 data

fires_2017 = pd.concat([fires_2017_1, fires_2017_2, fires_2017_3, fires_2017_4,
           fires_2017_5, fires_2017_6, fires_2017_7, fires_2017_8,
             fires_2017_9, fires_2017_10])
fires_2017.head()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
0,Holiday Fire,12/28/2017,El Dorado,80.0,07/09/19 1:54 PM,12/28/17 8:51 PM,01/09/18 1:49PM,"[38.869036, -119.61865]",Unknown,0,0
1,Riverbottom Fire,12/21/2017,Riverside,45.0,07/09/19 1:52 PM,12/21/17 11:16 AM,01/09/18 1:49PM,"[33.9885, -117.38417]",Unknown,0,0
2,Drum Fire,12/16/2017,Santa Barbara,14.0,07/09/19 1:53 PM,12/16/17 12:21 PM,01/09/18 1:49PM,"[34.71796, -120.27077]",Unknown,0,0
3,Coast Fire,12/14/2017,Santa Barbara,14.0,07/09/19 1:54 PM,12/14/17 12:59 PM,01/09/18 1:49PM,"[34.605358, -120.414875]",Unknown,0,0
4,Longhorn Fire,12/13/2017,Riverside,19.0,01/09/18 1:49 PM,12/13/17 3:27 PM,01/09/18 1:49PM,"[33.88994, -116.91023]",Unknown,0,0


In [97]:
fires_2017.shape

(446, 11)

In [98]:
fires_2017.to_csv('fires_2017.csv', index=False)

#### Fixing 2017 

In [40]:
# coming back to fix 2017 because duplicated page 40

fires_2017 = pd.read_csv('fires_2017_old.csv')
fires_2017.head()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
0,Holiday Fire,12/28/2017,El Dorado,80.0,07/09/19 1:54 PM,12/28/17 8:51 PM,01/09/18 1:49PM,"[38.869036, -119.61865]",Unknown,0,0
1,Riverbottom Fire,12/21/2017,Riverside,45.0,07/09/19 1:52 PM,12/21/17 11:16 AM,01/09/18 1:49PM,"[33.9885, -117.38417]",Unknown,0,0
2,Drum Fire,12/16/2017,Santa Barbara,14.0,07/09/19 1:53 PM,12/16/17 12:21 PM,01/09/18 1:49PM,"[34.71796, -120.27077]",Unknown,0,0
3,Coast Fire,12/14/2017,Santa Barbara,14.0,07/09/19 1:54 PM,12/14/17 12:59 PM,01/09/18 1:49PM,"[34.605358, -120.414875]",Unknown,0,0
4,Longhorn Fire,12/13/2017,Riverside,19.0,01/09/18 1:49 PM,12/13/17 3:27 PM,01/09/18 1:49PM,"[33.88994, -116.91023]",Unknown,0,0


In [41]:
fires_2017.shape

(446, 11)

In [44]:
# drop 10 duplicate fires

fires_2017 = fires_2017.drop(fires_2017.index[400:410]).reset_index(drop=True)
fires_2017.shape

(436, 11)

In [47]:
fires_2017.to_csv('fires_2017.csv', index=False)

### 2016 

In [99]:
fires_2016_1 = get_fires(2016, 1, 5)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [100]:
fires_2016_1 = pd.DataFrame(fires_2016_1)
fires_2016_1.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Gap Fire,8/27/2016,Siskiyou,33867,08/28/16 6:15 PM,08/27/16 6:00 PM,08/28/16 6:15PM,"[41.851, -123.118]",Unknown,0,0
46,Santos Fire,8/26/2016,Butte,88,08/30/16 6:40 PM,08/26/16 3:30 PM,08/30/16 6:40PM,"[39.79547, -121.71662]",Unknown,0,0
47,Range Fire,8/26/2016,Kern,518,08/29/16 10:30 AM,08/26/16 10:10 AM,08/29/16 10:30AM,"[35.2013, -118.7212]",Unknown,0,0
48,Grade Fire,8/24/2016,Siskiyou,710,08/30/16 6:45 AM,08/24/16 2:55 PM,08/30/16 6:45AM,"[41.7813, -122.611]",Electrical,5,0
49,Tule Fire,8/22/2016,Tulare,395,11/08/16 10:15 AM,08/22/16 10:00 PM,11/08/16 10:15AM,"[36.1648, -118.73906]",Unknown,0,0


In [101]:
fires_2016_2 = get_fires(2016, 6, 10)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [102]:
fires_2016_2 = pd.DataFrame(fires_2016_2)
fires_2016_2.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Table Fire,7/21/2016,Siskiyou,23.0,07/22/16 12:00 PM,07/21/16 8:30 PM,07/22/16 12:00PM,"[41.71847, -122.36924]",Unknown,0,0
46,Roblar Fire,7/21/2016,San Diego,1245.0,07/30/16 2:15 PM,07/21/16 4:06 PM,07/30/16 2:15PM,"[33.393, -117.35093]",Unknown,0,0
47,Baker Fire,7/21/2016,Calaveras,57.0,07/22/16 7:00 PM,07/21/16 3:06 PM,07/22/16 7:00PM,"[37.98171, -120.64241]",Equipment,0,0
48,Serpa Fire,7/21/2016,Madera,80.0,07/25/16 7:20 PM,07/21/16 12:53 PM,07/25/16 7:20PM,,Unknown,0,0
49,Foothill Fire,7/19/2016,Lake,69.0,07/22/16 6:15 PM,07/19/16 5:16 PM,07/22/16 6:15PM,"[39.0863, -122.7891]",Unknown,0,0


In [103]:
fires_2016_3 = get_fires(2016, 11, 15)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [104]:
fires_2016_3 = pd.DataFrame(fires_2016_3)
fires_2016_3.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Arroyo Fire,6/18/2016,Monterey,70,06/20/16 6:15 PM,06/18/16 4:47 PM,06/20/16 6:15PM,"[36.2833, -121.3306]",Unknown,0,0
46,Camanche Fire,6/17/2016,Amador,210,06/21/16 6:30 PM,06/17/16 2:54 PM,06/21/16 6:30PM,"[38.26874, -120.90265]",Unknown,0,0
47,King Fire,6/17/2016,San Benito,237,06/20/16 6:00 AM,06/17/16 2:30 PM,06/20/16 6:00AM,,Unknown,2,0
48,Sherpa Fire,6/15/2016,Santa Barbara,7474,07/12/16 2:30 PM,06/15/16 3:21 PM,07/12/16 2:30PM,"[34.776, -119.643]",Unknown,0,0
49,Cheyenne Fire,6/15/2016,Calaveras,83,06/16/16 10:00 AM,06/15/16 12:46 PM,06/16/16 10:00AM,,Unknown,2,0


In [105]:
fires_2016_4 = get_fires(2016, 16, 18)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [106]:
fires_2016_4 = pd.DataFrame(fires_2016_4)
fires_2016_4.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
25,Athens Fire,5/17/2016,Placer,169.0,05/19/16 6:00 PM,05/17/16 3:06 PM,05/19/16 6:00PM,"[38.8390117, -121.3090268]",Unknown,0,0
26,Avocado Fire,5/15/2016,Fresno,132.0,05/17/16 7:10 PM,05/15/16 4:38 PM,05/17/16 7:10PM,"[36.78769, -119.38248]",Unknown,0,0
27,Bryson Fire,5/12/2016,Monterey,25.0,05/12/16 8:45 PM,05/12/16 2:13 PM,05/12/16 8:45PM,"[35.8378273, -121.0381507]",Unknown,0,0
28,Edison Fire,5/12/2016,Ventura,20.0,05/13/16 10:30 PM,05/12/16 2:13 PM,05/13/16 10:30PM,,Unknown,0,0
29,Shedd Fire,5/10/2016,San Luis Obispo,150.0,05/10/16 6:45 PM,05/10/16 4:11 PM,05/10/16 6:45PM,"[35.61961, -120.39993]",Unknown,0,0


In [107]:
# last 4 fires

fires_2016_5 = get_fires(2016, 19, 19, 4)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [108]:
fires_2016_5 = pd.DataFrame(fires_2016_5)
fires_2016_5.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
0,Casitas Fire,4/28/2016,Ventura,50.0,04/30/16 7:00 AM,04/28/16 7:00 PM,04/30/16 7:00AM,,Unknown,0,0
1,Hills Fire,4/28/2016,Fresno,151.0,04/29/16 5:15 PM,04/28/16 5:45 PM,04/29/16 5:15PM,,Unknown,0,0
2,Taglio Fire,4/24/2016,Merced,30.0,04/24/16 3:30 PM,04/24/16 11:10 AM,04/24/16 3:30PM,"[37.2171, -121.08036]",Unknown,0,0
3,Gorman Fire,4/19/2016,Los Angeles,,04/19/16 3:30 PM,04/19/16 1:09 PM,04/19/16 3:30PM,"[34.6888731, -118.7892593]",Unknown,0,0


In [109]:
# combining 2016 data

fires_2016 = pd.concat([fires_2016_1, fires_2016_2, fires_2016_3, fires_2016_4,
           fires_2016_5])
fires_2016.head()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
0,Marek Fire,11/13/2016,Los Angeles,4824.0,11/14/16 2:00 PM,11/14/16 5:45 AM,11/14/16 2:00PM,"[34.31, -118.335]",Unknown,0,0
1,Gopher Fire,11/13/2016,San Diego,28.0,11/14/16 6:00 PM,11/13/16 11:14 AM,11/14/16 6:00PM,"[33.254355, -117.158475]",Unknown,0,0
2,Meadow Fire,10/29/2016,Tulare,4347.0,12/19/16 1:30 PM,10/29/16 11:15 AM,12/19/16 1:30PM,"[35.984, -118.551]",Unknown,0,0
3,Jacobson Fire,10/20/2016,Tulare,10.0,12/19/16 1:30 PM,10/20/16 5:00 PM,12/19/16 1:30PM,"[36.217, -118.551]",Unknown,0,0
4,Emerald Fire,10/13/2016,El Dorado,176.0,10/17/16 11:45 AM,10/14/16 1:28 AM,10/17/16 11:45AM,"[38.93449, -120.10744]",Under Investigation,0,0


In [110]:
fires_2016.shape

(184, 11)

In [111]:
fires_2016.to_csv('fires_2016.csv', index=False)

### 2015 

In [112]:
fires_2015_1 = get_fires(2015, 1, 5)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [113]:
fires_2015_1 = pd.DataFrame(fires_2015_1)
fires_2015_1.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,De Luz Fire,8/9/2015,San Diego,55,08/10/15 3:05 PM,08/09/15 8:22 AM,08/10/15 3:05PM,"[33.4115972, -117.2886249]",Unknown,0,0
46,36 Fire,8/8/2015,Tehama,110,08/09/15 8:30 PM,08/08/15 2:55 PM,08/09/15 8:30PM,"[40.3506868, -121.7786799]",Under Investigation,0,0
47,Francis Fire,8/3/2015,Yuba,30,08/05/15 7:30 AM,08/03/15 6:07 PM,08/05/15 7:30AM,"[39.33303, -121.18735]",Unknown,0,0
48,Point Fire,8/3/2015,Shasta,59,08/04/15 8:15 AM,08/03/15 2:28 PM,08/04/15 8:15AM,"[40.3929, -122.40546]",Unknown,0,0
49,Dodge Fire,8/3/2015,Lassen,10570,08/04/15 10:37 AM,08/03/15 2:15 PM,08/04/15 10:37AM,"[40.938, -120.105]",Unknown,0,0


In [114]:
fires_2015_2 = get_fires(2015, 6, 10)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [115]:
fires_2015_2 = pd.DataFrame(fires_2015_2)
fires_2015_2.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Lake Fire,7/16/2015,San Diego,25.0,07/17/15 3:15 PM,07/16/15 11:30 AM,07/17/15 3:15PM,"[33.21407, -116.73472]",Unknown,0,0
46,Cooley Fire,7/13/2015,Siskiyou,181.0,07/14/15 8:30 AM,07/13/15 5:56 PM,07/14/15 8:30AM,"[41.8048, -122.5029]",Unknown,0,0
47,241 Fire,7/13/2015,Orange,214.0,07/15/15 6:45 PM,07/13/15 10:47 AM,07/15/15 6:45PM,"[33.7626, -117.7274]",Unknown,0,0
48,Mill 2 Fire,7/12/2015,San Bernardino,54.0,07/21/15 7:20 AM,07/12/15 2:46 PM,07/21/15 7:20AM,"[34.0783472, -117.0487863]",Vehicle,0,0
49,Spider Fire,7/12/2015,Riverside,21.0,07/12/15 7:48 PM,07/12/15 1:52 PM,07/12/15 7:48PM,"[33.986204, -117.43942]",Unknown,0,0


In [116]:
fires_2015_3 = get_fires(2015, 11, 15)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [117]:
fires_2015_3 = pd.DataFrame(fires_2015_3)
fires_2015_3.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Site Fire,6/5/2015,Alameda,300.0,06/06/15 7:15 AM,06/05/15 8:22 PM,06/06/15 7:15AM,,Unknown,0,0
46,Oasis Fire,6/5/2015,Lake,25.0,06/06/15 6:45 AM,06/05/15 7:57 PM,06/06/15 6:45AM,,Unknown,0,0
47,FKU Lightning Fires,6/5/2015,Fresno,45.0,06/06/15 8:00 PM,06/05/15 5:00 PM,06/06/15 8:00PM,,Unknown,0,0
48,Snow Fire,6/5/2015,Riverside,25.0,06/05/15 7:00 PM,06/05/15 1:32 PM,06/05/15 7:00PM,,Unknown,0,0
49,Harrison Fire,6/1/2015,Riverside,80.0,06/05/15 4:32 PM,06/01/15 10:16 AM,06/05/15 4:32PM,,Unknown,0,0


In [118]:
# last 7 fires

fires_2015_4 = get_fires(2015, 16, 16, 7)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [119]:
fires_2015_4 = pd.DataFrame(fires_2015_4)
fires_2015_4.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
2,Highway Fire,4/18/2015,Riverside,1049,04/24/15 7:30 AM,04/18/15 6:12 PM,04/24/15 7:30AM,"[33.884313, -117.642759]",Unknown,0,0
3,Ward Fire,4/12/2015,Plumas,137,04/18/15 11:50 PM,04/13/15 5:30 AM,04/18/15 11:50PM,"[40.050833333333, -120.70166666667]",Unknown,0,0
4,Stephens Fire,2/24/2015,Siskiyou,200,03/02/15 9:00 AM,02/24/15 12:15 PM,03/02/15 9:00AM,"[41.485, -121.851]",Unknown,0,0
5,Van Dyke Fire,2/6/2015,Mono,509,02/20/15 7:00 PM,02/06/15 4:52 PM,02/20/15 7:00PM,,Unknown,0,0
6,Round Fire,2/6/2015,Inyo and Mono,7000,02/12/15 7:35 AM,02/06/15 2:07 PM,02/12/15 7:35AM,,Under Investigation,40,0


In [120]:
# combining 2015 data

fires_2015 = pd.concat([fires_2015_1, fires_2015_2, fires_2015_3, fires_2015_4])
fires_2015.head()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
0,Solimar Fire,12/25/2015,,1388.0,12/29/15 10:00 AM,12/25/15 10:42 PM,12/29/15 10:00AM,"[34.31953, -119.34512]",Unknown,0,0
1,Banister Fire,11/11/2015,Ventura,29.0,11/12/15 6:00 PM,11/11/15 5:40 PM,11/12/15 6:00PM,,Unknown,0,0
2,Potrero Fire,11/6/2015,Ventura,50.0,11/07/15 4:45 PM,11/07/15 1:45 AM,11/07/15 4:45PM,"[34.15, -118.879444]",Unknown,0,0
3,Gibraltar Fire,10/28/2015,Santa Barbara,21.0,11/09/15 12:00 PM,10/29/15 5:30 AM,11/09/15 12:00PM,"[34.4828, -119.63216]",Unknown,0,0
4,Cienega Fire,10/12/2015,San Benito,670.0,10/16/15 6:00 PM,10/12/15 4:00 PM,10/16/15 6:00PM,"[36.70854, -121.32734]",Unknown,0,0


In [121]:
fires_2015.shape

(157, 11)

In [122]:
fires_2015.to_csv('fires_2015.csv', index=False)

### 2014 

In [123]:
fires_2014_1 = get_fires(2014, 1, 5)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [124]:
fires_2014_1 = pd.DataFrame(fires_2014_1)
fires_2014_1.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Beaver Fire,7/29/2014,Siskiyou,32496.0,09/02/14 1:00 PM,07/30/14 12:00 AM,09/02/14 1:00PM,,Unknown,0,0
46,Siskiyou County Lightning Fires,7/29/2014,Siskiyou,30.0,08/02/14 11:00 AM,07/29/14 1:00 PM,08/02/14 11:00AM,,Unknown,0,0
47,Web Fire,7/28/2014,Butte,389.0,08/01/14 5:30 PM,07/28/14 1:11 PM,08/01/14 5:30PM,,Under Investigation,0,0
48,French Fire,7/27/2014,Madera,13838.0,07/28/14 2:30 PM,07/28/14 5:45 AM,07/28/14 2:30PM,,Unknown,0,0
49,El Portal Fire,7/26/2014,Mariposa,4689.0,08/10/14 8:15 AM,07/26/14 5:00 PM,08/10/14 8:15AM,,Unknown,0,0


In [125]:
fires_2014_2 = get_fires(2014, 6, 10)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [126]:
fires_2014_2 = pd.DataFrame(fires_2014_2)
fires_2014_2.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,River Fire,5/14/2014,San Diego,105,05/19/14 9:20 AM,05/14/14 12:12 PM,05/19/14 9:20AM,,Unknown,0,0
46,Poinsettia Fire,5/14/2014,San Diego,600,05/17/14 12:00 AM,05/14/14 10:30 AM,05/17/14 12:00AM,"[33.1109, -117.279]",Unknown,0,0
47,Tomahawk Fire,5/14/2014,San Diego,5367,05/19/14 9:20 AM,05/14/14 9:45 AM,05/19/14 9:20AM,"[33.3529, -117.284889]",Unknown,0,0
48,Miguelito Fire,5/13/2014,Santa Barbara,632,05/19/14 9:20 AM,05/13/14 2:00 PM,05/19/14 9:20AM,"[34.609, -120.454]",Under Investigation,0,0
49,Bernardo Fire,5/13/2014,San Diego,1548,05/17/14 8:14 PM,05/13/14 11:00 AM,05/17/14 8:14PM,"[33.003, -117.133]",Equipment,0,0


In [128]:
fires_2014_3 = get_fires(2014, 11, 11)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [129]:
fires_2014_3 = pd.DataFrame(fires_2014_3)
fires_2014_3.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
5,Encinal Fire,3/19/2014,Monterey,400,03/21/14 6:00 PM,03/19/14 4:45 PM,03/21/14 6:00PM,"[36.6313, -121.4819]",Fire Escaped into Wildland,0,0
6,Pierce Fire,3/15/2014,Riverside,350,03/16/14 8:30 AM,03/15/14 4:37 PM,03/16/14 8:30AM,,Unknown,0,0
7,Gun Fire,3/8/2014,San Diego,125,03/11/14 11:00 AM,03/08/14 12:30 PM,03/11/14 11:00AM,"[32.81, -116.49]",Unknown,0,0
8,Lake Fire,1/24/2014,Fresno,106,01/26/14 6:00 PM,01/25/14 6:37 AM,01/26/14 6:00PM,"[36.8266, -119.3415]",Unknown,0,0
9,Brewer Fire,1/22/2014,Placer,120,01/22/14 4:35 PM,01/22/14 1:10 PM,01/22/14 4:35PM,,Unknown,0,0


In [130]:
# last 5 fires

fires_2014_4 = get_fires(2014, 12, 12, 5)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [131]:
fires_2014_4 = pd.DataFrame(fires_2014_4)
fires_2014_4.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
0,Colby Fire,1/15/2014,Los Angeles,1952,01/27/14 10:00 AM,01/16/14 5:50 AM,01/27/14 10:00AM,"[34.1703, -117.8816]",Unknown,0,0
1,Red Fire,1/4/2014,Humboldt,333,01/08/14 7:15 PM,01/04/14 12:00 PM,01/08/14 7:15PM,"[40.884415, -123.773779]",Unknown,0,0
2,Bridge Fire,1/3/2014,Humboldt,18,01/04/14 6:30 PM,01/03/14 10:30 AM,01/04/14 6:30PM,,Unknown,0,0
3,Campbell Fire,1/2/2014,Tehama,865,01/10/14 6:30 PM,01/02/14 8:00 PM,01/10/14 6:30PM,"[40.134597, -121.790183]",Unknown,0,0
4,Honcut Fire,1/1/2014,Butte,60,01/02/14 7:30 AM,01/01/14 10:46 AM,01/02/14 7:30AM,,Unknown,0,0


In [132]:
# combining 2014 data

fires_2014 = pd.concat([fires_2014_1, fires_2014_2, fires_2014_3, fires_2014_4])
fires_2014.head()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
0,Bane Fire,10/16/2014,San Bernardino,48.0,10/17/14 6:15 PM,10/16/14 2:34 PM,10/17/14 6:15PM,"[33.93869, -117.70643]",Unknown,0,0
1,Applegate Fire,10/8/2014,Placer,459.0,10/15/14 7:50 AM,10/08/14 1:33 PM,10/15/14 7:50AM,,Under Investigation,10,0
2,Dog Rock Fire,10/7/2014,Mariposa,311.0,10/12/14 6:00 PM,10/07/14 3:05 PM,10/12/14 6:00PM,,Unknown,0,0
3,Foothill Fire,9/29/2014,Ventura,25.0,09/30/14 3:00 PM,09/29/14 7:35 PM,09/30/14 3:00PM,,Unknown,0,0
4,Cascade Fire,9/25/2014,El Dorado,20.0,09/25/14 7:27 AM,09/25/14 6:00 PM,09/25/14 7:27AM,"[38.9195001, -120.1140659]",Unknown,0,0


In [133]:
fires_2014.shape

(115, 11)

In [134]:
fires_2014.to_csv('fires_2014.csv', index=False)

### 2013 

In [135]:
fires_2013_1 = get_fires(2013, 1, 5)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [136]:
fires_2013_1 = pd.DataFrame(fires_2013_1)
fires_2013_1.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Cleghorn Fire,8/17/2013,San Bernardino,110,08/18/13 8:55 PM,08/17/13 5:25 PM,08/18/13 8:55PM,"[34.287, -117.451]",Unknown,0,0
46,Rim Fire,8/17/2013,Tuolumne,257314,09/06/13 6:30 PM,08/17/13 3:25 PM,09/06/13 6:30PM,"[37.857, -120.086]",Unknown,0,0
47,Bridges Fire,8/17/2013,Calaveras,46,08/19/13 7:10 AM,08/17/13 10:11 AM,08/19/13 7:10AM,"[38.06231, -120.45233]",Under Investigation,0,0
48,Double Fire,8/16/2013,Lake,70,08/18/13 6:30 PM,08/16/13 5:57 PM,08/18/13 6:30PM,"[39.013682, -122.491894]",Unknown,0,0
49,Swedes Fire,8/16/2013,Butte,2462,08/22/13 6:00 PM,08/16/13 4:37 PM,08/22/13 6:00PM,"[39.446268, -121.38236]",Under Investigation,7,0


In [137]:
fires_2013_2 = get_fires(2013, 6, 10)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [138]:
fires_2013_2 = pd.DataFrame(fires_2013_2)
fires_2013_2.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Fox Fire (formally 3-7 Fire),7/3/2013,Modoc,502.0,07/05/13 6:30 PM,07/03/13 10:56 PM,07/05/13 6:30PM,"[41.2415, -121.035]",Unknown,0,0
46,Northern Region July Lightning,7/2/2013,,754.0,07/04/13 10:45 AM,07/02/13 12:00 PM,07/04/13 10:45AM,"[39.9845, -121.5747]",Unknown,0,0
47,Concord Fire,7/1/2013,Contra Costa,274.0,07/01/13 6:30 PM,07/01/13 3:41 PM,07/01/13 6:30PM,"[39.90465, -121.74367]",Unknown,0,0
48,Kirker Fire,7/1/2013,Contra Costa,492.0,07/01/13 6:40 PM,07/01/13 1:19 PM,07/01/13 6:40PM,"[37.96866, -121.91453]",Unknown,0,0
49,Freeman Fire,6/29/2013,Monterey,105.0,06/29/13 6:30 PM,06/29/13 2:09 PM,06/29/13 6:30PM,"[36.1531, -120.8973]",Unknown,0,0


In [139]:
fires_2013_3 = get_fires(2013, 11, 15)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [140]:
fires_2013_3 = pd.DataFrame(fires_2013_3)
fires_2013_3.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
45,Tres Pinos Fire,5/3/2013,San Benito,354.0,05/03/13 6:45 PM,05/03/13 11:42 AM,05/03/13 6:45PM,"[37.160346, -120.937494]",Unknown,0,0
46,Springs Fire,5/2/2013,Ventura,24251.0,05/11/13 6:30 AM,05/02/13 7:01 AM,05/11/13 6:30AM,,Under Investigation,10,0
47,306 Fire,5/1/2013,Glenn,217.0,05/05/13 6:00 PM,05/01/13 7:00 PM,05/05/13 6:00PM,"[39.514139, -122.560862]",Under Investigation,0,0
48,Summit Fire,5/1/2013,Riverside,3166.0,05/04/13 6:30 PM,05/01/13 12:38 PM,05/04/13 6:30PM,"[34.288877, -116.941311]",Under Investigation,0,0
49,Panther Fire,5/1/2013,Tehama,6965.0,05/09/13 9:00 AM,05/01/13 9:12 AM,05/09/13 9:00AM,"[40.190062, -121.595555]",Under Investigation,0,0


In [141]:
# last 9 fires

fires_2013_4 = get_fires(2013, 16, 16, 9)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [/Users/mayaremington/.wdm/drivers/chromedriver/mac64/94.0.4606.61/chromedriver] found in cache


In [142]:
fires_2013_4 = pd.DataFrame(fires_2013_4)
fires_2013_4.tail()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
4,Fawnskin Fire,4/20/2013,San Bernardino,30,04/22/13 9:00 AM,04/20/13 5:30 PM,04/22/13 9:00AM,"[34.288877, -116.941311]",Unknown,0,0
5,Butte Fire,4/9/2013,Fresno,80,04/09/13 3:30 PM,04/09/13 12:35 PM,04/09/13 3:30PM,,Unknown,0,0
6,Jurupa Fire,2/28/2013,Riverside,311,03/02/13 7:00 PM,02/28/13 4:43 PM,03/02/13 7:00PM,,Unknown,0,0
7,River Fire,2/24/2013,Inyo,406,02/28/13 8:00 PM,02/24/13 8:16 AM,02/28/13 8:00PM,"[36.602575, -118.01651]",Under Investigation,0,0
8,Becks Fire,1/22/2013,Lake,296,01/23/13 10:15 AM,01/22/13 11:08 AM,01/23/13 10:15AM,,Unknown,0,0


In [143]:
# combining 2013 data

fires_2013 = pd.concat([fires_2013_1, fires_2013_2, fires_2013_3, fires_2013_4])
fires_2013.head()

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
0,Grant Fire,12/30/2013,Santa Clara,40.0,12/31/13 4:00 PM,12/31/13 5:30 AM,12/31/13 4:00PM,"[37.342186, -121.717706]",Unknown,0,0
1,Refuse Fire,12/30/2013,Mendocino,15.0,12/30/13 8:30 PM,12/30/13 11:00 AM,12/30/13 8:30PM,,Unknown,0,0
2,Pfeiffer Fire,12/15/2013,Monterey,917.0,12/20/13 8:00 PM,12/16/13 12:20 AM,12/20/13 8:00PM,,Unknown,0,0
3,Happy Camp Fire,12/9/2013,Ventura,44.0,12/11/13 5:30 PM,12/10/13 2:26 AM,12/11/13 5:30PM,,Unknown,0,0
4,Dam Fire,11/23/2013,Lake,23.0,11/24/13 6:45 PM,11/23/13 2:17 PM,11/24/13 6:45PM,"[38.9289, -122.5818]",Unknown,0,0


In [144]:
fires_2013.shape

(159, 11)

In [145]:
fires_2013.to_csv('fires_2013.csv', index=False)

### Combining scraped data into a SQL database

In [10]:
# importing scraped data files from csv because kernel has been reset and webscraping function doesn't work after CAlFire changed their website layout

fire_list = []

for year in range(2013, 2021)[::-1]:  # file rows are in reverse chronological order - so years need to be also
    fire_list.append(pd.read_csv('fires_' + str(year) + '.csv'))

In [11]:
# combining data

fire_df = pd.concat(fire_list)
fire_df.head(5)

Unnamed: 0,name,start_date,counties,acres,last_updated,date_started,date_contained,lat_long,cause,destroyed,fatalities
0,Sanderson Fire,12/12/2020,Riverside,1933.0,12/14/20 5:50 PM,12/13/20 1:00 AM,12/14/20 5:50PM,"[33.93244, -117.03459]",Under Investigation,0,0
1,Cornell Fire,12/7/2020,Ventura,174.0,12/17/20 11:18 AM,12/07/20 11:44 AM,12/08/20 11:17AM,"[34.33622, -119.078]",,0,0
2,Thomas Fire,12/3/2020,Lassen,24.0,12/03/20 6:36 PM,12/03/20 1:32 PM,12/03/20 5:40PM,"[41.591948, -120.374514]",,0,0
3,Cerritos Fire,12/2/2020,Riverside,200.0,12/04/20 6:45 PM,12/03/20 2:06 AM,,"[33.773754, -117.051463]",Under Investigation,0,0
4,Bond Fire,12/2/2020,Orange,6686.0,12/17/20 1:15 PM,12/02/20 10:14 PM,12/10/20 6:59PM,"[33.743842, -117.674967]",,31,0


In [12]:
fire_df.shape

(1883, 11)

In [21]:
# using SQLAlchemy to connect to sqlite wildfires.db

engine = create_engine("sqlite:///wildfires.db")

In [25]:
# converting df to a SQL table

fire_df.to_sql('raw_data', con=engine,
              if_exists='replace', index=False)