# Create database table of merged fire & air data (fire_and_air_data)
### This notebook reads in from the aqi_data and cleaned_wildfire_data tables & merges them, then writes them to the fire_and_air_data table


In [1]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
from datetime import datetime,date,timedelta

In [2]:
rds_connection_string = "root:12345678@127.0.0.1/fires"
engine = create_engine(f'mysql+pymysql://{rds_connection_string}')


## Read in air data and fire data

In [3]:
wildfire_df = pd.read_sql("select * from cleaned_wildfire_data",con=engine)


  result = self._query(query)


In [4]:
wildfire_df = wildfire_df[['fire_id','fire_name', 'discovery_date', 'containment_date', 'size', 'class', 'fire_year', 'distance']]



In [5]:
wildfire_df.head()

Unnamed: 0,fire_id,fire_name,discovery_date,containment_date,size,class,fire_year,distance
0,0,FOUNTAIN,2005-02-02,2005-02-02,0.1,A,2005,173.637835
1,1,PIGEON,2004-05-12,2004-05-12,0.25,A,2004,135.485925
2,2,SLACK,2004-05-31,2004-05-31,0.1,A,2004,123.749694
3,3,DEER,2004-06-28,2004-07-03,0.1,A,2004,146.784946
4,4,STEVENOT,2004-06-28,2004-07-03,0.1,A,2004,145.787248


In [6]:
airquality_df = pd.read_sql("select * from aqi_data", con=engine)
airquality_df.head()


Unnamed: 0,date,overall_aqi,site_name,year
0,2001-01-01,153,San Francisco,2001
1,2001-01-02,140,San Francisco,2001
2,2001-01-03,140,San Francisco,2001
3,2001-01-04,107,San Francisco,2001
4,2001-01-05,124,San Francisco,2001


## Merge them on the containment date (ie, end date)

In [7]:
merged_data_df = airquality_df.merge(wildfire_df,left_on='date', right_on="containment_date", how='left')

In [8]:
merged_data_df.head()

Unnamed: 0,date,overall_aqi,site_name,year,fire_id,fire_name,discovery_date,containment_date,size,class,fire_year,distance
0,2001-01-01,153,San Francisco,2001,8659.0,LUCERNE,2001-01-01,2001-01-01,0.1,A,2001.0,380.928215
1,2001-01-01,153,San Francisco,2001,9387.0,DEBRIS 2,2001-01-01,2001-01-01,0.1,A,2001.0,136.804378
2,2001-01-01,153,San Francisco,2001,17163.0,LP HILL,2001-01-01,2001-01-01,0.1,A,2001.0,251.976491
3,2001-01-01,153,San Francisco,2001,30114.0,PGE 1,2001-01-01,2001-01-01,0.1,A,2001.0,200.71576
4,2001-01-01,153,San Francisco,2001,30201.0,PUMP HOUSE,2001-01-01,2001-01-01,0.1,A,2001.0,467.441792


### Because we did a left join on airquality, we have records for days with no fire.  Need to drop these.

In [9]:

merged_data_df.dropna(inplace=True)

## Write to the database

In [10]:
# and write it to the database
merged_data_df.to_sql(name="fire_and_air_data", con=engine, if_exists="replace", index=False)