# Create database table binned data
### This notebook reads in combined data from fire_and_air_data, bins each record based on distance from SF, then then writes them to the binned_by_acres table

In [1]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np

%matplotlib inline


In [2]:
rds_connection_string = "root:12345678@127.0.0.1/fires"
engine = create_engine(f'mysql+pymysql://{rds_connection_string}')


In [3]:
data = pd.read_sql("select * from fire_and_air_data", con=engine)
data.head()

  result = self._query(query)


Unnamed: 0,date,overall_aqi,site_name,year,fire_id,fire_name,discovery_date,containment_date,size,class,fire_year,distance
0,2001-01-01,153,San Francisco,2001,8659.0,LUCERNE,2001-01-01,2001-01-01,0.1,A,2001.0,380.928215
1,2001-01-01,153,San Francisco,2001,9387.0,DEBRIS 2,2001-01-01,2001-01-01,0.1,A,2001.0,136.804378
2,2001-01-01,153,San Francisco,2001,17163.0,LP HILL,2001-01-01,2001-01-01,0.1,A,2001.0,251.976491
3,2001-01-01,153,San Francisco,2001,30114.0,PGE 1,2001-01-01,2001-01-01,0.1,A,2001.0,200.71576
4,2001-01-01,153,San Francisco,2001,30201.0,PUMP HOUSE,2001-01-01,2001-01-01,0.1,A,2001.0,467.441792


In [4]:
data = data[['date', 'overall_aqi', 'fire_id', 'size', 'distance']]
data.head()

Unnamed: 0,date,overall_aqi,fire_id,size,distance
0,2001-01-01,153,8659.0,0.1,380.928215
1,2001-01-01,153,9387.0,0.1,136.804378
2,2001-01-01,153,17163.0,0.1,251.976491
3,2001-01-01,153,30114.0,0.1,200.71576
4,2001-01-01,153,30201.0,0.1,467.441792


## Bin the data by distance

In [5]:
bins = [0, 50, 100, 150, 200, 250, 300, 10000000000000000]

In [6]:
labels = ['within 50', 'within 100', 'within 150', 'within 200', 'within 250', 'within 300', 'bigger then 300']

In [7]:
data['bins'] = pd.cut(data['distance'], bins=bins, labels=labels)

In [8]:
data.head()

Unnamed: 0,date,overall_aqi,fire_id,size,distance,bins
0,2001-01-01,153,8659.0,0.1,380.928215,bigger then 300
1,2001-01-01,153,9387.0,0.1,136.804378,within 150
2,2001-01-01,153,17163.0,0.1,251.976491,within 300
3,2001-01-01,153,30114.0,0.1,200.71576,within 250
4,2001-01-01,153,30201.0,0.1,467.441792,bigger then 300


In [9]:
data.to_sql(name="binned_by_acres", con=engine, if_exists="replace")