In [1]:
import pandas as pd
from sqlalchemy import create_engine
import numpy as np

In [2]:
#Load economic damage csv
csv_file = "./Resources/economic-damage-from-natural-disasters.csv"
econ_damage_df = pd.read_csv(csv_file)
econ_damage_df.head()

Unnamed: 0,Entity,Code,Year,Total economic damage from natural disasters (US$)
0,All natural disasters,,1900,30000000
1,All natural disasters,,1901,0
2,All natural disasters,,1902,0
3,All natural disasters,,1903,480000000
4,All natural disasters,,1904,0


In [3]:
#cleaning up economic damage frame
econ_damage_df.rename(columns={"Entity": "disaster_type", "Year": "year", "Total economic damage from natural disasters (US$)":"econ_damage"}, inplace=True)
econ_damage_df = econ_damage_df[[ "year", "disaster_type", "econ_damage"]]
econ_damage_df.head()

Unnamed: 0,year,disaster_type,econ_damage
0,1900,All natural disasters,30000000
1,1901,All natural disasters,0
2,1902,All natural disasters,0
3,1903,All natural disasters,480000000
4,1904,All natural disasters,0


In [4]:
#Load disaster count csv
csv_file = "./Resources/number-of-natural-disaster-events.csv"
event_count_df = pd.read_csv(csv_file)
event_count_df.head()

Unnamed: 0,Entity,Code,Year,Number of reported natural disasters (reported disasters)
0,All natural disasters,,1900,5
1,All natural disasters,,1901,2
2,All natural disasters,,1902,9
3,All natural disasters,,1903,8
4,All natural disasters,,1904,2


In [5]:
#cleaning up disaster count frame
event_count_df.rename(columns={"Entity": "disaster_type", "Year": "year", "Number of reported natural disasters (reported disasters)":"disaster_count"}, inplace=True)
event_count_df = event_count_df[[ "year", "disaster_type", "disaster_count"]]
event_count_df.head()

Unnamed: 0,year,disaster_type,disaster_count
0,1900,All natural disasters,5
1,1901,All natural disasters,2
2,1902,All natural disasters,9
3,1903,All natural disasters,8
4,1904,All natural disasters,2


In [6]:
#joining frames into one useful frame.
#Not as familiar with pd.merge, but spotchecks showed that ouput 
#was what I'm looking for, and row count is right!

disaster_count_econ = pd.merge(event_count_df, econ_damage_df,  how='left', left_on=['year','disaster_type'], right_on = ['year','disaster_type'])
disaster_count_econ.head()

Unnamed: 0,year,disaster_type,disaster_count,econ_damage
0,1900,All natural disasters,5,30000000.0
1,1901,All natural disasters,2,0.0
2,1902,All natural disasters,9,0.0
3,1903,All natural disasters,8,480000000.0
4,1904,All natural disasters,2,0.0


In [15]:
#Creating a df that is merged and separated by disaster type - now it starts looking like a relational db table!

disaster_df = disaster_count_econ.pivot_table(['disaster_count','econ_damage'],['year'],'disaster_type')

disaster_df.columns = ['all_disaster_count','drought_count','earthquake_count','extreme_temp_count','extreme_weather_count','flood_count','impact_count','landslide_count','dry_mass_movement_count','volcanic_count','wildfire_count',
                      'all_disaster_cost','drought_cost','earthquake_cost','extreme_temp_cost','extreme_weather_cost','flood_cost','impact_cost','landslide_cost','dry_mass_movement_cost','volcanic_cost','wildfire_cost']

disaster_df= disaster_df.reset_index()
disaster_df = disaster_df.fillna(0)
disaster_df.head(10)

Unnamed: 0,year,all_disaster_count,drought_count,earthquake_count,extreme_temp_count,extreme_weather_count,flood_count,impact_count,landslide_count,dry_mass_movement_count,...,drought_cost,earthquake_cost,extreme_temp_cost,extreme_weather_cost,flood_cost,impact_cost,landslide_cost,dry_mass_movement_cost,volcanic_cost,wildfire_cost
0,1900,5.0,2.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,30000000.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1901,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1902,9.0,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1903,8.0,1.0,1.0,0.0,2.0,2.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,480000000.0,0.0,0.0,0.0,0.0,0.0
4,1904,2.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,1905,4.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,1906,17.0,1.0,10.0,0.0,3.0,2.0,0.0,0.0,0.0,...,0.0,630750000.0,0.0,20000000.0,0.0,0.0,0.0,0.0,0.0,0.0
7,1907,5.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,30000000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,1908,4.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,116000000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,1909,11.0,0.0,3.0,0.0,5.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
#Connecting to db
rds_connection_string = "postgres:p4ssword@localhost:5432/disaster_db"
engine = create_engine(f'postgresql://{rds_connection_string}')
engine.table_names()

['disasters']

In [21]:
#Loading db
disaster_df.to_sql(name='disasters', con=engine, if_exists='append', index=False)

In [22]:
#retreiving from db to verify
pd.read_sql_query('select * from disasters order by all_disaster_cost desc', con=engine).head(10)

Unnamed: 0,year,all_disaster_count,drought_count,earthquake_count,extreme_temp_count,extreme_weather_count,flood_count,impact_count,landslide_count,dry_mass_movement_count,...,drought_cost,earthquake_cost,extreme_temp_cost,extreme_weather_cost,flood_cost,impact_cost,landslide_cost,dry_mass_movement_cost,volcanic_cost,wildfire_cost
0,2011,334,17,30,16,84,156,0,17,0,...,8142000000,230299850000,781123000,50872148000,70757047000,0,0,0,104000000,3137000000
1,2005,432,20,25,30,130,193,0,13,0,...,462120000,6705100000,400000000,184793461000,17939670000,0,55000000,0,0,3850000000
2,2008,352,16,23,10,111,165,0,12,3,...,234000000,85796000000,21940000000,60728103000,19619144000,0,0,0,0,2532000000
3,2012,346,21,27,51,90,136,0,13,1,...,25480000000,18536314000,152801000,85732579000,25790538000,0,0,0,0,1000000000
4,1995,248,6,26,13,81,94,0,16,0,...,112800000,101285800000,834300000,25006834000,27555794000,0,36289000,0,722000,134500000
5,2016,325,15,30,12,84,161,0,13,0,...,3554000000,32994500000,1727000000,45111315000,57382350000,0,725000000,0,0,6287000000
6,2017,276,7,19,11,85,114,0,25,0,...,2422000000,2764338000,0,122118300000,15778682000,0,6300000,0,0,1019000000
7,2004,350,11,42,16,124,128,0,15,1,...,2991275000,38770000000,0,84227635000,10383038000,0,3500000,0,0,3000000
8,2010,393,17,24,29,94,184,0,32,0,...,3884700000,47300660000,400000000,28124083000,49137575000,0,1277078000,0,0,2070000000
9,2013,332,9,29,14,105,149,1,11,1,...,1087000000,9112859000,1000000000,52388364000,54782566000,33000000,0,8000000,0,1072400000
