In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
#Create a Storm database to store storm data 

import pymongo

conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

db = client.StormDB

data = db.data.find()

### Importing and cleaning the historical file

In [4]:
#Import glob loop through 20 years of storm data
import glob

path = "Resources" 
all_files = glob.glob(path + "/*.csv")
li=[]

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

# Create a dataframe using the imported data
storm_data = pd.concat(li, axis=0, ignore_index=True)

storm_data.head()

Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,STATE,STATE_FIPS,...,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,DATA_SOURCE
0,199901,14,0,199901,15,0,1501526,5683694,MASSACHUSETTS,25.0,...,,,,,,,,A strong high pressure system centered over so...,,PDC
1,199901,2,700,199901,2,2359,1500622,5683477,MICHIGAN,26.0,...,,,,,,,,Blizzard conditions developed across lower Mic...,,PDC
2,199901,13,130,199901,13,1400,1500140,5683047,OHIO,39.0,...,,,,,,,,Freezing rain and sleet changed to snow as a s...,,PDC
3,199908,1,1,199908,31,2359,2409403,5712941,MISSOURI,29.0,...,,,,,,,,"After an abnormally wet June, the rest of the ...",,PDC
4,199901,2,2300,199901,3,1900,1502756,5681541,NEW YORK,36.0,...,,,,,,,,A deep area of low pressure moved from the Mis...,,PDC


In [5]:
#Parse year and month description
storm_data['BEGIN_YEARMONTH']=storm_data['BEGIN_YEARMONTH'].astype(str)
storm_data['BEGIN_YEAR']=storm_data['BEGIN_YEARMONTH'].str[0:4]
storm_data['BEGIN_MONTH']=storm_data['BEGIN_YEARMONTH'].str[4:6]

#Move the new columns to the first and second column
cols = list(storm_data.columns)
cols = [cols[-1]] + cols[:-1]
storm_data = storm_data[cols]

cols = list(storm_data.columns)
cols = [cols[-1]] + cols[:-1]
storm_data = storm_data[cols]


In [6]:
#Drop columns that are not needed
storm_data.drop(columns={'BEGIN_YEARMONTH','END_YEARMONTH','END_DAY','END_TIME','CZ_TYPE','CZ_FIPS','CZ_NAME','CZ_TIMEZONE','MAGNITUDE','MAGNITUDE_TYPE',
                    'CATEGORY','TOR_F_SCALE','TOR_LENGTH','TOR_WIDTH','TOR_OTHER_WFO','TOR_OTHER_CZ_STATE',
                    'TOR_OTHER_CZ_FIPS','TOR_OTHER_CZ_NAME','BEGIN_AZIMUTH','BEGIN_RANGE',
                    'END_RANGE','END_AZIMUTH','END_LOCATION','EPISODE_NARRATIVE','EVENT_NARRATIVE',
                    'DATA_SOURCE','CZ_TIMEZONE','BEGIN_DATE_TIME','END_DATE_TIME',
                    'BEGIN_DAY','BEGIN_TIME','EPISODE_ID','STATE_FIPS','YEAR','MONTH_NAME',
                    'SOURCE','FLOOD_CAUSE','STATE','WFO'}, inplace=True)

In [7]:
storm_data.head()

Unnamed: 0,BEGIN_YEAR,BEGIN_MONTH,EVENT_ID,EVENT_TYPE,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,BEGIN_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON
0,1999,1,5683694,Heavy Snow,0,0,0,0,,,,,,,
1,1999,1,5683477,Blizzard,0,0,0,0,,,,,,,
2,1999,1,5683047,Winter Storm,0,0,0,0,2K,,,,,,
3,1999,8,5712941,Drought,0,0,0,0,,,,,,,
4,1999,1,5681541,Winter Storm,0,0,0,0,3K,,,,,,


In [8]:
#Retrive the total for damage and death
storm_data[["DEATHS_DIRECT",'DEATHS_INDIRECT','INJURIES_DIRECT','INJURIES_INDIRECT',"DAMAGE_PROPERTY","DAMAGE_CROPS"]].fillna(0, inplace=True)
storm_data["DEATH_TOLL"]=storm_data['DEATHS_DIRECT']+storm_data['DEATHS_INDIRECT']
storm_data['TOTAL_INJURIES']=storm_data['INJURIES_DIRECT']+storm_data['INJURIES_INDIRECT']

#change property and crop damage to numbers and add 1000
storm_data["DAMAGE_PROPERTY"]=storm_data['DAMAGE_PROPERTY'].str[:-1]
storm_data["DAMAGE_CROPS"]=storm_data['DAMAGE_CROPS'].str[:-1]

storm_data["DAMAGE_PROPERTY"]=(storm_data['DAMAGE_PROPERTY'].apply(pd.to_numeric))
storm_data["DAMAGE_CROPS"]=(storm_data['DAMAGE_CROPS'].apply(pd.to_numeric))


In [9]:
storm_data.head()

Unnamed: 0,BEGIN_YEAR,BEGIN_MONTH,EVENT_ID,EVENT_TYPE,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,BEGIN_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,DEATH_TOLL,TOTAL_INJURIES
0,1999,1,5683694,Heavy Snow,0,0,0,0,,,,,,,,0,0
1,1999,1,5683477,Blizzard,0,0,0,0,,,,,,,,0,0
2,1999,1,5683047,Winter Storm,0,0,0,0,2.0,,,,,,,0,0
3,1999,8,5712941,Drought,0,0,0,0,,,,,,,,0,0
4,1999,1,5681541,Winter Storm,0,0,0,0,3.0,,,,,,,0,0


In [10]:
#Store storm data in mongo db
storm_json= storm_data.to_dict(orient='records')
db.storm_info.insert_many(storm_json)

<pymongo.results.InsertManyResult at 0x23528b73348>

In [11]:
storm_data.drop(columns={'INJURIES_DIRECT','INJURIES_INDIRECT','DEATHS_DIRECT','DEATHS_INDIRECT'}, inplace=True)

In [12]:
storm_data.head()

Unnamed: 0,BEGIN_YEAR,BEGIN_MONTH,EVENT_ID,EVENT_TYPE,DAMAGE_PROPERTY,DAMAGE_CROPS,BEGIN_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,DEATH_TOLL,TOTAL_INJURIES
0,1999,1,5683694,Heavy Snow,,,,,,,,0,0
1,1999,1,5683477,Blizzard,,,,,,,,0,0
2,1999,1,5683047,Winter Storm,2.0,,,,,,,0,0
3,1999,8,5712941,Drought,,,,,,,,0,0
4,1999,1,5681541,Winter Storm,3.0,,,,,,,0,0


In [13]:
sum_groupby=storm_data[["EVENT_TYPE","DAMAGE_PROPERTY","DAMAGE_CROPS","DEATH_TOLL",'TOTAL_INJURIES']]
sum_groupby["EVENT_TYPE"]=sum_groupby["EVENT_TYPE"].replace({"Flash Flood":"Flood","Excessive Heat":'Heat' })
sum_groupby=sum_groupby.groupby(["EVENT_TYPE"]).sum()

count_groupby=storm_data[["EVENT_TYPE","DAMAGE_PROPERTY"]]
count_groupby["EVENT_TYPE"]=count_groupby["EVENT_TYPE"].replace({"Flash Flood":"Flood"})
count_groupby=count_groupby.groupby(["EVENT_TYPE"]).count()
count_groupby.rename(columns={'DAMAGE_PROPERTY':'COUNT'}, inplace=True)

storm_summary=pd.merge(count_groupby,sum_groupby, on="EVENT_TYPE")

storm_summary.sort_values("COUNT", ascending=False).head()

Unnamed: 0_level_0,COUNT,DAMAGE_PROPERTY,DAMAGE_CROPS,DEATH_TOLL,TOTAL_INJURIES
EVENT_TYPE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Thunderstorm Wind,202120,2673074.73,171123.15,555,5040
Hail,122779,747711.6,493168.62,12,766
Flood,91109,2825145.44,396510.03,1742,1786
High Wind,40554,723939.01,28415.02,359,1618
Winter Weather,39158,61961.0,16.0,818,5298


In [14]:
storm_summary.sort_values("DAMAGE_PROPERTY", ascending=False).head()

Unnamed: 0_level_0,COUNT,DAMAGE_PROPERTY,DAMAGE_CROPS,DEATH_TOLL,TOTAL_INJURIES
EVENT_TYPE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Flood,91109,2825145.44,396510.03,1742,1786
Thunderstorm Wind,202120,2673074.73,171123.15,555,5040
Tornado,20106,1382265.98,101191.55,1579,20918
Hail,122779,747711.6,493168.62,12,766
High Wind,40554,723939.01,28415.02,359,1618


In [15]:
storm_summary.sort_values("TOTAL_INJURIES", ascending=False).head()

Unnamed: 0_level_0,COUNT,DAMAGE_PROPERTY,DAMAGE_CROPS,DEATH_TOLL,TOTAL_INJURIES
EVENT_TYPE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Tornado,20106,1382265.98,101191.55,1579,20918
Heat,6804,12494.0,9349.9,2886,14853
Winter Weather,39158,61961.0,16.0,818,5298
Thunderstorm Wind,202120,2673074.73,171123.15,555,5040
Lightning,10858,544589.83,1975.64,740,4387


In [16]:
#Sumarize flood data
flood_data= storm_data.loc[(storm_data['EVENT_TYPE'] == 'Flood') | (storm_data['EVENT_TYPE'] == 'Flash Flood')]

flood_data.reset_index(inplace=True)
flood_data.drop(columns={'index'},inplace=True)

sum_groupby=flood_data[["BEGIN_YEAR","DAMAGE_PROPERTY","DAMAGE_CROPS","DEATH_TOLL",'TOTAL_INJURIES']]
sum_groupby=sum_groupby.groupby(["BEGIN_YEAR"]).sum()
count_groupby=flood_data[["BEGIN_YEAR","DAMAGE_PROPERTY"]].groupby(["BEGIN_YEAR"]).count()
count_groupby.rename(columns={'DAMAGE_PROPERTY':'FLOOD_COUNT'}, inplace=True)

flood_summary=pd.merge(count_groupby,sum_groupby, on="BEGIN_YEAR")
flood_summary.to_csv("Output/FloodSummary.csv", index=True)

flood_data= flood_data.loc[(flood_data['BEGIN_YEAR'] == '2019')]

flood_data.to_csv("Output/FloodData2019.csv", index=False)

In [17]:
#Sumarize Thunderstorm data
thunderstorm_data= storm_data.loc[(storm_data['EVENT_TYPE'] == 'Thunderstorm Wind')]

thunderstorm_data.reset_index(inplace=True)
thunderstorm_data.drop(columns={'index'},inplace=True)

sum_groupby=thunderstorm_data[["BEGIN_YEAR","DAMAGE_PROPERTY","DAMAGE_CROPS","DEATH_TOLL",'TOTAL_INJURIES']]
sum_groupby=sum_groupby.groupby(["BEGIN_YEAR"]).sum()
count_groupby=thunderstorm_data[["BEGIN_YEAR","DAMAGE_PROPERTY"]].groupby(["BEGIN_YEAR"]).count()
count_groupby.rename(columns={'DAMAGE_PROPERTY':'FLOOD_COUNT'}, inplace=True)

thunderstorm_summary=pd.merge(count_groupby,sum_groupby, on="BEGIN_YEAR")
thunderstorm_summary.to_csv("Output/ThunderstormSummary.csv", index=True)

thunderstorm_data= thunderstorm_data.loc[(thunderstorm_data['BEGIN_YEAR'] == '2019')]

thunderstorm_data.to_csv("Output/ThunderstormData2019.csv", index=False)

In [18]:
#Sumarize hail data
hail_data= storm_data.loc[(storm_data['EVENT_TYPE'] == 'Hail')]

hail_data.reset_index(inplace=True)
hail_data.drop(columns={'index'},inplace=True)

sum_groupby=hail_data[["BEGIN_YEAR","DAMAGE_PROPERTY","DAMAGE_CROPS","DEATH_TOLL",'TOTAL_INJURIES']].groupby(["BEGIN_YEAR"]).sum()
count_groupby=hail_data[["BEGIN_YEAR","DAMAGE_PROPERTY"]].groupby(["BEGIN_YEAR"]).count()
count_groupby.rename(columns={'DAMAGE_PROPERTY':'HAIL_COUNT'}, inplace=True)

hail_summary=pd.merge(count_groupby,sum_groupby, on="BEGIN_YEAR")
hail_summary.to_csv("Output/HailSummary.csv", index=True)

hail_data= hail_data.loc[(hail_data['BEGIN_YEAR'] == '2019')]

hail_data.to_csv("Output/HailData2019.csv", index=False)

In [19]:
#Sumarize high wind data
highwind_data= storm_data.loc[(storm_data['EVENT_TYPE'] == 'High Wind')]

highwind_data.reset_index(inplace=True)
highwind_data.drop(columns={'index'},inplace=True)

sum_groupby=highwind_data[["BEGIN_YEAR","DAMAGE_PROPERTY","DAMAGE_CROPS","DEATH_TOLL",'TOTAL_INJURIES']].groupby(["BEGIN_YEAR"]).sum()
count_groupby=highwind_data[["BEGIN_YEAR","DAMAGE_PROPERTY"]].groupby(["BEGIN_YEAR"]).count()
count_groupby.rename(columns={'DAMAGE_PROPERTY':'HIGHWIND_COUNT'}, inplace=True)

highwind_summary=pd.merge(count_groupby,sum_groupby, on="BEGIN_YEAR")
hail_summary.to_csv("Output/HighWindSummary.csv", index=True)

In [20]:
#Sumarize winter weather data
winterweather_data= storm_data.loc[(storm_data['EVENT_TYPE'] == 'Winter Weather')]

winterweather_data.reset_index(inplace=True)
winterweather_data.drop(columns={'index'},inplace=True)

sum_groupby=winterweather_data[["BEGIN_YEAR","DAMAGE_PROPERTY","DAMAGE_CROPS","DEATH_TOLL",'TOTAL_INJURIES']].groupby(["BEGIN_YEAR"]).sum()
count_groupby=winterweather_data[["BEGIN_YEAR","DAMAGE_PROPERTY"]].groupby(["BEGIN_YEAR"]).count()
count_groupby.rename(columns={'DAMAGE_PROPERTY':'WINTERWEATHER_COUNT'}, inplace=True)

winterweather_summary=pd.merge(count_groupby,sum_groupby, on="BEGIN_YEAR")
winterweather_summary.to_csv("Output/WinterWeatherSummary.csv", index=True)


In [21]:
#Sumarize Tornado data
tornado_data= storm_data.loc[(storm_data['EVENT_TYPE'] == 'Tornado')]

tornado_data.reset_index(inplace=True)
tornado_data.drop(columns={'index'},inplace=True)

sum_groupby=tornado_data[["BEGIN_YEAR","DAMAGE_PROPERTY","DAMAGE_CROPS","DEATH_TOLL",'TOTAL_INJURIES']].groupby(["BEGIN_YEAR"]).sum()
count_groupby=tornado_data[["BEGIN_YEAR","DAMAGE_PROPERTY"]].groupby(["BEGIN_YEAR"]).count()
count_groupby.rename(columns={'DAMAGE_PROPERTY':'TORNADO_COUNT'}, inplace=True)

tornado_summary=pd.merge(count_groupby,sum_groupby, on="BEGIN_YEAR")
tornado_summary.to_csv("Output/TornadoSummary.csv", index=True)

tornado_data= tornado_data.loc[(tornado_data['BEGIN_YEAR'] == '2019')]
tornado_data.to_csv("Output/TornadoData2019.csv", index=False)

In [22]:
#Sumarize Lightning data
lightning_data= storm_data.loc[(storm_data['EVENT_TYPE'] == 'Lightning')]

lightning_data.reset_index(inplace=True)
lightning_data.drop(columns={'index'},inplace=True)



sum_groupby=lightning_data[["BEGIN_YEAR","DAMAGE_PROPERTY","DAMAGE_CROPS","DEATH_TOLL",'TOTAL_INJURIES']].groupby(["BEGIN_YEAR"]).sum()
count_groupby=lightning_data[["BEGIN_YEAR","DAMAGE_PROPERTY"]].groupby(["BEGIN_YEAR"]).count()
count_groupby.rename(columns={'DAMAGE_PROPERTY':'Lightning_COUNT'}, inplace=True)

lightning_summary=pd.merge(count_groupby,sum_groupby, on="BEGIN_YEAR")
lightning_summary.to_csv("Output/LightningSummary.csv", index=True)


lightning_data= lightning_data.loc[(lightning_data['BEGIN_YEAR'] == '2019')]
lightning_data.to_csv("Output/LightningData2019.csv", index=False)

In [23]:
#Sumarize Heat data
heat_data= storm_data.loc[(storm_data['EVENT_TYPE'] == 'Heat')]

heat_data.reset_index(inplace=True)
heat_data.drop(columns={'index'},inplace=True)

sum_groupby=heat_data[["BEGIN_YEAR","DAMAGE_PROPERTY","DAMAGE_CROPS","DEATH_TOLL",'TOTAL_INJURIES']].groupby(["BEGIN_YEAR"]).sum()
count_groupby=heat_data[["BEGIN_YEAR","DAMAGE_PROPERTY"]].groupby(["BEGIN_YEAR"]).count()
count_groupby.rename(columns={'DAMAGE_PROPERTY':'HEAT_COUNT'}, inplace=True)

heat_summary=pd.merge(count_groupby,sum_groupby, on="BEGIN_YEAR")
heat_summary.to_csv("Output/HeatSummary.csv", index=True)