# Data Cleaning 1 
### Converting FemaWebDisasterSummaries.csv into DisasterSummaries.csv for use in our database

In [2]:
import pandas as pd

In [3]:
dirty_summary_table = pd.read_csv("FemaWebDisasterSummaries.csv")

In [4]:
dirty_summary_table.head()

Unnamed: 0,disasterNumber,totalNumberIaApproved,totalAmountIhpApproved,totalAmountHaApproved,totalAmountOnaApproved,totalObligatedAmountPa,totalObligatedAmountCatAb,totalObligatedAmountCatC2g,paLoadDate,iaLoadDate,hash,lastRefresh
0,1258,,,,,,,,,,4d19a1c0116ebbd291f446dd4f353f5d,2019-09-04T00:47:21.380Z
1,1268,,,,,,,,,,5a9fb5ef3295e1e55f0901c821bb6e1f,2019-09-04T00:47:21.382Z
2,1241,,,,,,,,,,777ead5c411cb2efb04436853d0b7443,2018-03-09T18:42:11.608Z
3,1265,,,,,,,,,,b65172351ce90f9f3f1912f0cf401390,2019-09-04T00:47:21.380Z
4,1247,,,,,,,,,,da36ec31dba54b471b394548e699d17a,2019-09-04T00:47:21.378Z


### Our PostgreSQL does not need data that references database maintenance.Create a new table that includes only the columns that we need. 

In [5]:
disasterSummaries = dirty_summary_table[
    ['disasterNumber', 'totalNumberIaApproved', 'totalAmountIhpApproved',
    'totalAmountHaApproved', 'totalAmountOnaApproved', 'totalObligatedAmountCatAb',
    'totalObligatedAmountCatC2g', 'paLoadDate', 'iaLoadDate']].copy()

In [6]:
disasterSummaries.head()

Unnamed: 0,disasterNumber,totalNumberIaApproved,totalAmountIhpApproved,totalAmountHaApproved,totalAmountOnaApproved,totalObligatedAmountCatAb,totalObligatedAmountCatC2g,paLoadDate,iaLoadDate
0,1258,,,,,,,,
1,1268,,,,,,,,
2,1241,,,,,,,,
3,1265,,,,,,,,
4,1247,,,,,,,,


In [7]:
#Check dtypes
print(disasterSummaries.dtypes)

disasterNumber                  int64
totalNumberIaApproved         float64
totalAmountIhpApproved        float64
totalAmountHaApproved         float64
totalAmountOnaApproved        float64
totalObligatedAmountCatAb     float64
totalObligatedAmountCatC2g    float64
paLoadDate                    float64
iaLoadDate                     object
dtype: object


### Remove hour:minute:second:timezone

In [9]:
disasterSummaries['iaLoadDate'] = disasterSummaries['iaLoadDate'].str.replace('T00:00:00.000Z', '')

In [30]:
disasterSummaries = disasterSummaries.rename(columns={'disasterNumber': 'disasternumber', 'totalNumberIaApproved': 'totalnumberiaapproved','totalAmountIhpApproved':'totalamountihpapproved', 'totalAmountHaApproved': 'totalamounthaapproved', 'totalAmountOnaApproved': 'totalamountonaapproved', 'totalObligatedAmountCatAb': 'totalobligatedamountcatab', 'totalObligatedAmountCatC2g':'totalobligatedamountcatc2g', 'paLoadDate': 'paloaddate', 'iaLoadDate': 'ialoaddate'})
print(disasterSummaries)

      disasternumber  totalnumberiaapproved  totalamountihpapproved  \
0               1258                    NaN                     NaN   
1               1268                    NaN                     NaN   
2               1241                    NaN                     NaN   
3               1265                    NaN                     NaN   
4               1247                    NaN                     NaN   
5               1272                    NaN                     NaN   
6               1243                    NaN                     NaN   
7               1239                    NaN                     NaN   
8               1278                    NaN                     NaN   
9               1250                    NaN                     NaN   
10              1275                    NaN                     NaN   
11              1257                    NaN                     NaN   
12              1285                    NaN                     NaN   
13    

# Export to PostgreSQL

In [24]:
from sqlalchemy import create_engine

In [25]:
conn = "postgres:Minnesota78!@localhost:5432/Project_2"
engine = create_engine((f'postgresql://{conn}'))

In [26]:
engine.table_names()

['disasterdeclarations', 'disastersummaries2', 'disastersummaries']

In [31]:
disasterSummaries.to_sql(name='disastersummaries', con=engine, if_exists='append', index=False)

In [33]:
pd.read_sql_query('select * from disastersummaries', con=engine).head(15)

Unnamed: 0,disasternumber,totalnumberiaapproved,totalamountihpapproved,totalamounthaapproved,totalamountonaapproved,totalobligatedamountpa,totalobligatedamountcatab,totalobligatedamountcatc2g,paloaddate,ialoaddate
0,1258,,,,,,,,,
1,1268,,,,,,,,,
2,1241,,,,,,,,,
3,1265,,,,,,,,,
4,1247,,,,,,,,,
5,1272,,,,,,,,,
6,1243,,,,,,,,,
7,1239,,,,,,,,,
8,1278,,,,,,,,,
9,1250,,,,,,,,,


In [45]:
DS = disasterSummaries.sort_values(by='totalnumberiaapproved', ascending=True).head(15)


DS

Unnamed: 0,disasternumber,totalnumberiaapproved,totalamountihpapproved,totalamounthaapproved,totalamountonaapproved,totalobligatedamountcatab,totalobligatedamountcatc2g,paloaddate,ialoaddate
270,1495,1.0,1756.2,1756.2,,,,,2019-09-03
2472,4341,3.0,9674.92,9674.92,,,,,2019-09-03
380,1637,53.0,215839.32,170580.92,45258.4,,,,2019-09-03
468,1726,58.0,432005.4,306633.18,125372.22,,,,2019-09-03
346,1599,77.0,474304.46,284815.63,189488.83,,,,2019-09-03
2334,4089,105.0,421340.81,378748.8,42592.01,,,,2019-09-03
640,1820,105.0,1251943.17,1000815.99,251127.18,,,,2019-09-03
358,1617,109.0,459715.15,229744.12,229971.03,,,,2019-09-03
522,1846,115.0,2055904.84,1798705.32,257199.52,,,,2019-09-03
222,1470,124.0,740552.45,390852.02,349700.43,,,,2019-09-03


In [46]:
DS.to_html('disasterSummaries.html')