### Import libraries and Connect to Database

In [None]:
import sqlite3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

conn = sqlite3.connect("Wildfires.sqlite")
print(conn.total_changes)

### Read and Filter Data

In [None]:
# Connect to fires table
df = pd.read_sql_query("SELECT * FROM Fires;", conn)
df.info()

In [None]:
# Only look at fires from 2000 or later
fires_all = df[df['FIRE_YEAR'] >= 2000]
fires_all.info()

In [None]:
# Drop columns with a lot of missing data
fires = fires_all.drop(columns = ['FOD_ID', 'FPA_ID', 'LOCAL_FIRE_REPORT_ID', 
                                'LOCAL_INCIDENT_ID', 'FIRE_CODE', 
                                'ICS_209_INCIDENT_NUMBER','ICS_209_NAME', 
                                'MTBS_ID', 'MTBS_FIRE_NAME', 'COMPLEX_NAME',
                                 'COUNTY', 'FIPS_CODE', 'FIPS_NAME'])
fires.info()

In [None]:
# Creating new date variables
fires['DAYS_TO_CONTAIN'] = fires['CONT_DATE'] - fires['DISCOVERY_DATE'] 
fires['DAYS_TO_CONTAIN'] = fires['DAYS_TO_CONTAIN'].fillna(0)
fires['DISCOVERY_DATE_D'] = pd.to_datetime(fires['DISCOVERY_DATE'], unit='D', origin='julian')
fires['CONT_DATE_D'] = pd.to_datetime(fires['CONT_DATE'], unit='D', origin='julian')

In [None]:
# NOAA climate regions (https://www.ncdc.noaa.gov/monitoring-references/maps/us-climate-regions.php)
fires['REGION'] = np.where(fires['STATE'].isin(['WA', 'OR', 'ID']), 'Northwest', 
                  np.where(fires['STATE'].isin(['CA', 'NV']), 'West', 
                  np.where(fires['STATE'].isin(['MT', 'NE', 'ND', 'SD', 'WY']), 'Northern Rockies', 
                  np.where(fires['STATE'].isin(['AK']), 'Alaska', 
                  np.where(fires['STATE'].isin(['AZ', 'NM', 'CO', 'UT']), 'Southwest',          
                  np.where(fires['STATE'].isin(['OK', 'TX', 'KS', 'AR', 'LA', 'MS']), 'South', 
                  np.where(fires['STATE'].isin(['AL', 'FL', 'GA', 'NC', 'SC', 'VA']), 'Southeast', 
                  np.where(fires['STATE'].isin(['IL', 'IN', 'KY', 'MO', 'TN', 'OH', 'WV']), 'Ohio Valley', 
                  np.where(fires['STATE'].isin(['IA',  'MI', 'MN', 'WI']), 'Upper Midwest', 
                  np.where(fires['STATE'].isin(['CT', 'NY', 'ME', 'MA', 'MD', 'NH', 'NJ', 'PA', 'RI', 'VT', 'DE']), 'Northeast', 'Other'        
                                  ))))))))))

In [None]:
# Export data for after 2000 to csv
fires.to_csv('Fires_During_After_2000.csv')

In [None]:
# Restricting data source to only records with discovery and containment times
fires_time = fires[ fires['DISCOVERY_TIME'].notna() & (fires['DISCOVERY_TIME']!=u'')]
fires_cont = fires_time[ fires_time['CONT_TIME'].notna() & (fires['CONT_TIME']!=u'')]
fires_cont.info()

In [None]:
# Separating discovery hour and min as well as containment hour and min because original format
# was not allowing for transformation to date time
fires_cont['DISCOVERY_HOUR'] = fires_cont['DISCOVERY_TIME'].astype(str).str[:-2]
fires_cont['DISCOVERY_MIN'] = fires_cont['DISCOVERY_TIME'].astype(str).str[-2:]
fires_cont['CONT_HOUR'] = fires_cont['CONT_TIME'].astype(str).str[:-2]
fires_cont['CONT_MIN'] = fires_cont['CONT_TIME'].astype(str).str[-2:].str.strip()

In [None]:
# Create time variables
fires_cont['DISCOVERY_TIME_NEW'] = (pd.to_datetime(fires_cont['DISCOVERY_HOUR'].astype(str) + ':' + fires_cont['DISCOVERY_MIN'].astype(str), format='%H:%M'))
fires_cont['CONT_TIME_NEW'] = (pd.to_datetime(fires_cont['CONT_HOUR'].astype(str) + ':' + fires_cont['CONT_MIN'].astype(str), format='%H:%M'))
fires_cont.info()

In [None]:
# Create datetime variables and hours til containment variable
fires_cont['DISCOVERY_DATE_DT'] = pd.to_datetime(fires_cont['DISCOVERY_DATE_D'].astype(str) + ' ' + fires_cont['DISCOVERY_TIME_NEW'].astype(str))
fires_cont['CONT_DATE_DT'] = pd.to_datetime(fires_cont['CONT_DATE_D'].astype(str) + ' ' + fires_cont['CONT_TIME_NEW'].astype(str))
fires_cont['CONT_HOURS'] = (fires_cont['CONT_DATE_DT'] - fires_cont['DISCOVERY_DATE_DT'])/ np.timedelta64(1,'h')
fires_cont.head()

In [None]:
# Export data to csv
fires_cont.to_csv('Fires_During_After_2000_With_Containment.csv')

In [None]:
# Restrictions for final data set
fires_new = fires_cont[fires_cont['CONT_HOURS'] > 5]
fires_final = fires_new[ fires_new['FIRE_SIZE_CLASS'] != 'A'] # A is fire size class that is <= 0.25 acres
fires_final.info()

In [None]:
# Export final data set used in models
# The other data sources were created for more flexibility as I was figuring out my data 
# restrictions, but this one is what I ultimately went with
fires_final.to_csv('Fires_Final_Data.csv')