In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

In [2]:
import warnings
warnings.filterwarnings("ignore")

#### Importing and cleaning the 2019 file

In [3]:
#Import file
event_details=pd.read_csv("Resources/StormEvents_details-ftp_v1.0_d2019_c20190617.csv", encoding="UTF-8")

#Parse year and month description
event_details['BEGIN_YEARMONTH']=event_details['BEGIN_YEARMONTH'].astype(str)
event_details['BEGIN_YEAR']=event_details['BEGIN_YEARMONTH'].str[0:4]
event_details['BEGIN_MONTH']=event_details['BEGIN_YEARMONTH'].str[4:6]

#Move the new columns to the first and second column
cols = list(event_details.columns)
cols = [cols[-1]] + cols[:-1]
event_details = event_details[cols]

cols = list(event_details.columns)
cols = [cols[-1]] + cols[:-1]
event_details = event_details[cols]

event_details.head()

Unnamed: 0,BEGIN_YEAR,BEGIN_MONTH,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,...,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,DATA_SOURCE
0,2019,1,201901,29,1200,201901,29,1900,133946,801726,...,,,,,,,,A strong surface cold front crossed the region...,Snowfall amounts of 1 to 2 inches were observe...,CSV
1,2019,3,201903,9,810,201903,9,810,134106,814097,...,0.0,N,CADDO GAP,34.4,-93.62,34.4,-93.62,A strong storm system brought severe weather t...,A tractor shed was destroyed in Caddo Gap.,CSV
2,2019,3,201903,9,750,201903,9,750,134106,814096,...,1.0,NNE,SUBIACO,35.3,-93.64,35.3,-93.64,A strong storm system brought severe weather t...,Tree limbs were broken and shingles were blown...,CSV
3,2019,1,201901,12,1000,201901,13,1400,134278,804158,...,,,,,,,,Low pressure tracked south of the region throu...,A report from Delaware showed that 4.5 inches ...,CSV
4,2019,1,201901,12,1000,201901,13,1400,134278,804159,...,,,,,,,,Low pressure tracked south of the region throu...,"Based on surrounding observations, it is estim...",CSV


In [4]:
#Drop columns that are not needed
event_details.drop(columns={'BEGIN_YEARMONTH','CZ_TYPE','CZ_FIPS','CZ_NAME','CZ_TIMEZONE','MAGNITUDE','MAGNITUDE_TYPE',
                    'CATEGORY','TOR_F_SCALE','TOR_LENGTH','TOR_WIDTH','TOR_OTHER_WFO','TOR_OTHER_CZ_STATE',
                    'TOR_OTHER_CZ_FIPS','TOR_OTHER_CZ_NAME','BEGIN_AZIMUTH','BEGIN_RANGE',
                    'END_RANGE','END_AZIMUTH','END_LOCATION','EPISODE_NARRATIVE','EVENT_NARRATIVE',
                    'DATA_SOURCE','CZ_TIMEZONE','BEGIN_DATE_TIME','END_DATE_TIME'}, inplace=True)

In [5]:
#filter and keep only flood events
event_details = event_details.loc[(event_details['EVENT_TYPE'] == 'Flood') | (event_details['EVENT_TYPE'] == 'Flash Flood')|
                                  (event_details['EVENT_TYPE'] == 'Coastal Flood')|(event_details['EVENT_TYPE'] == 'Lakeshore Flood')]

event_details.reset_index(inplace=True)
event_details.drop(columns={'index'},inplace=True)
event_details.head()

Unnamed: 0,BEGIN_YEAR,BEGIN_MONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,STATE,...,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,FLOOD_CAUSE,BEGIN_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON
0,2019,2,14,1610,201902,14,1905,133682,800014,HAWAII,...,0,0.00K,0.00K,Emergency Manager,Heavy Rain,HAIKU,20.9177,-156.3282,20.922,-156.3141
1,2019,2,23,1600,201902,23,1902,133684,800018,HAWAII,...,0,0.00K,0.00K,Emergency Manager,Heavy Rain,KURTISTOWN,19.5721,-155.029,19.5738,-155.0273
2,2019,1,24,1300,201901,24,1500,135012,809140,VIRGINIA,...,0,0.00K,0.00K,Law Enforcement,Heavy Rain,TREMONT,38.8459,-77.2383,38.8467,-77.2375
3,2019,1,24,1300,201901,24,1500,135012,809141,VIRGINIA,...,0,0.00K,0.00K,Law Enforcement,Heavy Rain,VALE,38.9108,-77.363,38.9119,-77.3642
4,2019,1,24,1300,201901,24,1500,135012,809142,VIRGINIA,...,0,0.00K,0.00K,Law Enforcement,Heavy Rain,CLARKS CROSSING,38.9182,-77.3015,38.9159,-77.2967


In [6]:
#save clean data 
event_details.to_csv("Output/Flood Events 2019.csv", index=False)

In [7]:
# Create Engine
rds_connection_string =('pg8000://{username}:{password}@{ipaddress}:{port}/{dbname}'
               .format(username="postgres",
                       password="PASSWORD",
                       ipaddress="localhost",
                       port=5432,
                       dbname="Storm")
              )
engine=create_engine(f'postgresql+{rds_connection_string}')

In [8]:
event_details.to_sql(name='Flood_Events_2019', con=engine, if_exists='replace', index=False)

### Importing and cleaning the historical file

In [9]:
#Import file
flood_data=pd.read_csv("Resources/StormEvents_details-ftp_v1.0_d1965_c20190301.csv", encoding="UTF-8")

#Parse year and month description
flood_data['BEGIN_YEARMONTH']=flood_data['BEGIN_YEARMONTH'].astype(str)
flood_data['BEGIN_YEAR']=flood_data['BEGIN_YEARMONTH'].str[0:4]
flood_data['BEGIN_MONTH']=flood_data['BEGIN_YEARMONTH'].str[4:6]

#Move the new columns to the first and second column
cols = list(flood_data.columns)
cols = [cols[-1]] + cols[:-1]
flood_data = flood_data[cols]

cols = list(flood_data.columns)
cols = [cols[-1]] + cols[:-1]
flood_data = flood_data[cols]

flood_data.head()

Unnamed: 0,BEGIN_YEAR,BEGIN_MONTH,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,...,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,DATA_SOURCE
0,1965,4,196504,19,1350,196504,19,1350,,9979216,...,0,,,30.58,-88.08,,,,,PUB
1,1965,1,196501,25,2150,196501,25,2150,,10062822,...,0,,,38.78,-92.27,,,,,PUB
2,1965,4,196504,12,427,196504,12,427,,10077976,...,0,,,40.7,-72.68,,,,,PUB
3,1965,8,196508,1,1320,196508,1,1320,,10040097,...,0,,,39.05,-76.05,,,,,PUB
4,1965,9,196509,4,1940,196509,4,1940,,10038464,...,0,,,43.02,-86.1,43.08,-86.07,,,PUB


In [10]:
#Drop columns that are not needed
flood_data.drop(columns={'BEGIN_YEARMONTH','END_YEARMONTH','END_DAY','END_TIME','CZ_TYPE','CZ_FIPS','CZ_NAME','CZ_TIMEZONE','MAGNITUDE','MAGNITUDE_TYPE',
                    'CATEGORY','TOR_F_SCALE','TOR_LENGTH','TOR_WIDTH','TOR_OTHER_WFO','TOR_OTHER_CZ_STATE',
                    'TOR_OTHER_CZ_FIPS','TOR_OTHER_CZ_NAME','BEGIN_AZIMUTH','BEGIN_LOCATION','BEGIN_RANGE',
                    'END_RANGE','END_AZIMUTH','END_LOCATION','EPISODE_NARRATIVE','EVENT_NARRATIVE',
                    'DATA_SOURCE','CZ_TIMEZONE','BEGIN_DATE_TIME','END_DATE_TIME'}, inplace=True)