In [1]:
import pandas as pd
from sqlalchemy import create_engine

In [2]:
# read csv into dataframe
file_wildfires = "data/wildfire_data/fltered_wildfire_data.csv"
wildfire_df = pd.read_csv(file_wildfires)


In [3]:
wildfire_df.head()

Unnamed: 0,SOURCE_REPORTING_UNIT_NAME,FIRE_NAME,FIRE_YEAR,DISCOVERY_DATE,CONT_DATE,FIRE_SIZE,FIRE_SIZE_CLASS,FIPS_NAME
0,Plumas National Forest,FOUNTAIN,2005,2453403.5,2453403.5,0.1,A,Plumas
1,Eldorado National Forest,PIGEON,2004,2453137.5,2453137.5,0.25,A,Placer
2,Eldorado National Forest,SLACK,2004,2453156.5,2453156.5,0.1,A,El Dorado
3,Eldorado National Forest,DEER,2004,2453184.5,2453189.5,0.1,A,Alpine
4,Eldorado National Forest,STEVENOT,2004,2453184.5,2453189.5,0.1,A,Alpine


In [4]:
wildfire_df.columns = ["reporting_unit", "fire_name", "fire_year", "discovery_date", "containment_date", "size", "class", "fips"]


In [5]:
wildfire_df = wildfire_df[["fire_name","discovery_date", "containment_date", "size", "class", "fire_year"]]

In [6]:
wildfire_df.head()

Unnamed: 0,fire_name,discovery_date,containment_date,size,class,fire_year
0,FOUNTAIN,2453403.5,2453403.5,0.1,A,2005
1,PIGEON,2453137.5,2453137.5,0.25,A,2004
2,SLACK,2453156.5,2453156.5,0.1,A,2004
3,DEER,2453184.5,2453189.5,0.1,A,2004
4,STEVENOT,2453184.5,2453189.5,0.1,A,2004


In [7]:
wildfire_df = wildfire_df.reset_index()
wildfire_df


Unnamed: 0,index,fire_name,discovery_date,containment_date,size,class,fire_year
0,0,FOUNTAIN,2453403.5,2453403.5,0.10,A,2005
1,1,PIGEON,2453137.5,2453137.5,0.25,A,2004
2,2,SLACK,2453156.5,2453156.5,0.10,A,2004
3,3,DEER,2453184.5,2453189.5,0.10,A,2004
4,4,STEVENOT,2453184.5,2453189.5,0.10,A,2004
5,5,HIDDEN,2453186.5,2453187.5,0.10,A,2004
6,6,FORK,2453187.5,2453188.5,0.10,A,2004
7,7,SLATE,2453437.5,2453437.5,0.80,B,2005
8,8,SHASTA,2453444.5,2453444.5,1.00,B,2005
9,9,TANGLEFOOT,2453187.5,2453188.5,0.10,A,2004


In [8]:

wildfire_df = wildfire_df.rename(columns={"index": "fire_id"})
wildfire_df.head()


Unnamed: 0,fire_id,fire_name,discovery_date,containment_date,size,class,fire_year
0,0,FOUNTAIN,2453403.5,2453403.5,0.1,A,2005
1,1,PIGEON,2453137.5,2453137.5,0.25,A,2004
2,2,SLACK,2453156.5,2453156.5,0.1,A,2004
3,3,DEER,2453184.5,2453189.5,0.1,A,2004
4,4,STEVENOT,2453184.5,2453189.5,0.1,A,2004


In [9]:
wildfire_df.set_index('fire_id')

Unnamed: 0_level_0,fire_name,discovery_date,containment_date,size,class,fire_year
fire_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,FOUNTAIN,2453403.5,2453403.5,0.10,A,2005
1,PIGEON,2453137.5,2453137.5,0.25,A,2004
2,SLACK,2453156.5,2453156.5,0.10,A,2004
3,DEER,2453184.5,2453189.5,0.10,A,2004
4,STEVENOT,2453184.5,2453189.5,0.10,A,2004
5,HIDDEN,2453186.5,2453187.5,0.10,A,2004
6,FORK,2453187.5,2453188.5,0.10,A,2004
7,SLATE,2453437.5,2453437.5,0.80,B,2005
8,SHASTA,2453444.5,2453444.5,1.00,B,2005
9,TANGLEFOOT,2453187.5,2453188.5,0.10,A,2004


In [10]:
wildfire_df.dtypes

fire_id               int64
fire_name            object
discovery_date      float64
containment_date    float64
size                float64
class                object
fire_year             int64
dtype: object

In [11]:
epoch = pd.to_datetime(0, unit='s').to_julian_date()

In [12]:
wildfire_df['discovery_date'] = pd.to_datetime(wildfire_df['discovery_date'] - epoch, unit='D')
wildfire_df['containment_date'] = pd.to_datetime(wildfire_df['containment_date'] - epoch, unit='D')

In [13]:
wildfire_df.dtypes

fire_id                      int64
fire_name                   object
discovery_date      datetime64[ns]
containment_date    datetime64[ns]
size                       float64
class                       object
fire_year                    int64
dtype: object

In [14]:
wildfire_df.head()

Unnamed: 0,fire_id,fire_name,discovery_date,containment_date,size,class,fire_year
0,0,FOUNTAIN,2005-02-02,2005-02-02,0.1,A,2005
1,1,PIGEON,2004-05-12,2004-05-12,0.25,A,2004
2,2,SLACK,2004-05-31,2004-05-31,0.1,A,2004
3,3,DEER,2004-06-28,2004-07-03,0.1,A,2004
4,4,STEVENOT,2004-06-28,2004-07-03,0.1,A,2004


In [15]:
rds_connection_string = "root:12345678@127.0.0.1/fires"
engine = create_engine(f'mysql+pymysql://{rds_connection_string}')
wildfire_df.to_sql(name="wildfire_data", con=engine, if_exists="replace", index=True)
    

  result = self._query(query)
