# Objective
I want to create a tool that could be used to determine the risk of a forest fire happening, using as small of a geographic and temporal unit as possible. 

# Packages 

In [4]:
import pandas as pd
import numpy as np
import sqlite3
import plotly.express as px

# Data Sources
I'm going to need data that describes...
- Where forest fires have happened in the past
- Information per geo unit
    - Elevation
    - June mean temp
    - Annual rainfall
    - Rural-ness
    - Vegetation density

I can get my historical forest fire data from this [Kaggle dataset](https://www.kaggle.com/rtatman/188-million-us-wildfires). It has 1.88 million wildfires with geographic information, between 1992 and 2015. 

Here's a dataset of 1,800 fires in California that I might want to add later. [Link](https://www.kaggle.com/ananthu017/california-wildfire-incidents-20132020)

Here's temperature and precipitation data for all US counties. [Link](https://www.ncdc.noaa.gov/cag/county/mapping/110/tavg/202101/1/anomaly)

# Cleaning Forest Fire Dataset

In [5]:
con = sqlite3.connect("/Users/patricknorman/Downloads/FPA_FOD_20170508.sqlite")

cur = con.cursor()

# The result of a "cursor.execute" can be iterated over by row
first_fire = []

for row in cur.execute('SELECT * FROM Fires LIMIT 1;'):
    for x in row:
        first_fire.append(x)
        
df = pd.read_sql_query('SELECT * FROM Fires', con)

# Be sure to close the connection
con.close()

In [6]:
df = df.rename(columns={'FIPS_CODE':'COUNTY_CODE'})

In [7]:
state_codes = {
    'WA': '53', 'DE': '10', 'DC': '11', 'WI': '55', 'WV': '54', 'HI': '15',
    'FL': '12', 'WY': '56', 'PR': '72', 'NJ': '34', 'NM': '35', 'TX': '48',
    'LA': '22', 'NC': '37', 'ND': '38', 'NE': '31', 'TN': '47', 'NY': '36',
    'PA': '42', 'AK': '02', 'NV': '32', 'NH': '33', 'VA': '51', 'CO': '08',
    'CA': '06', 'AL': '01', 'AR': '05', 'VT': '50', 'IL': '17', 'GA': '13',
    'IN': '18', 'IA': '19', 'MA': '25', 'AZ': '04', 'ID': '16', 'CT': '09',
    'ME': '23', 'MD': '24', 'OK': '40', 'OH': '39', 'UT': '49', 'MO': '29',
    'MN': '27', 'MI': '26', 'RI': '44', 'KS': '20', 'MT': '30', 'MS': '28',
    'SC': '45', 'KY': '21', 'OR': '41', 'SD': '46'
}

# converting values: str to int
# for key in state_codes.keys():
    # state_codes[key] = int(state_codes[key])

In [8]:
df2 = df
df2['STATE_CODE'] = df2.STATE.apply(lambda x: state_codes[x])

In [9]:
df2 = df2.astype({'COUNTY_CODE':'str'})

In [10]:
df2.FIPS = df2.STATE_CODE + df2.COUNTY_CODE


  df2.FIPS = df2.STATE_CODE + df2.COUNTY_CODE


In [13]:
df2.FIPS[0]

'06063'

In [26]:
df2.drop('Shape', axis=1, inplace=True)

# Importing and Cleaning June Temps

In [59]:
june_temps = pd.read_csv('/Users/patricknorman/Downloads/june_temps.csv')

june_temps['FIPS'] = june_temps['Location ID'].apply(lambda x: str(x[3:]))
june_temps['state'] = june_temps['Location ID'].apply(lambda x: x[:2])

june_temps['state_code'] = june_temps['state'].apply(lambda x: str(state_codes[x]))
june_temps['FIPS'] = june_temps['state_code'] + june_temps['FIPS']

june_temps.drop(['Location ID', 'state', 'state_code','Location'], axis=1, inplace=True)

june_temps.rename(columns={'Value':'june_temp',
                           'Rank':'june_temp_rank',
                           'Anomaly (1901-2000 base period)':'june_temp_anomaly',
                           '1901-2000 Mean':'june_temp_historical_mean'}, inplace=True)

In [60]:
june_temps.head()

Unnamed: 0,june_temp,june_temp_rank,june_temp_anomaly,june_temp_historical_mean,FIPS
0,77.9,49,-0.4,78.3,1001
1,79.7,73,0.3,79.4,1003
2,77.5,31,-1.1,78.6,1005
3,76.3,30,-1.3,77.6,1007
4,75.9,59,0.1,75.8,1009


# Importing and Cleaning Annual Precipitation

In [None]:
precip = pd.read_csv('')