In [1]:
import pandas as pd
from sqlalchemy import create_engine

import psycopg2

from config import password

In [2]:
tornado_file = "static/data/Historical_Tornado_Tracks.csv"
tornado_df = pd.read_csv(tornado_file)
tornado_df.head()

Unnamed: 0,FID,OM,YR,MO,DY,DATE,TIME,TZ,ST,STF,...,FAT,LOSS,CLOSS,SLAT,SLON,ELAT,ELON,LEN,WID,Shape_Leng
0,2001,141,2002,6,5,2002-06-05,16:45:00,3,FL,12,...,0,0.0,0.0,28.08,-82.78,28.08,-82.78,0.1,20,0.0
1,2002,142,2002,6,8,2002-06-08,13:40:00,3,FL,12,...,0,0.5,0.0,27.73,-82.73,27.72,-82.75,1.0,40,0.022361
2,2003,145,2002,6,17,2002-06-17,16:40:00,3,FL,12,...,0,0.0,0.0,27.48,-82.72,27.48,-82.72,0.1,20,0.0
3,2004,187,2002,7,11,2002-07-11,12:30:00,3,FL,12,...,0,0.02,0.0,27.77,-82.77,27.77,-82.77,0.5,40,0.0
4,2005,188,2002,7,12,2002-07-12,06:30:00,3,FL,12,...,0,0.001,0.0,27.92,-82.8,27.92,-82.8,0.5,40,0.0


In [3]:
# Create a filtered dataframe from specific columns
tornado_cols = ["YR", "DATE", "MAG", "SLAT", "SLON"]
tornado_transformed= tornado_df[tornado_cols].copy()

# Rename the column headers
tornado_transformed = tornado_transformed.rename(columns={"YR": "year",
                                                         "DATE": "date",
                                                         "MAG": "mag",
                                                         "SLAT": "lat",
                                                         "SLON": "lon"})

tornado_transformed.head()

Unnamed: 0,year,date,mag,lat,lon
0,2002,2002-06-05,0,28.08,-82.78
1,2002,2002-06-08,0,27.73,-82.73
2,2002,2002-06-17,0,27.48,-82.72
3,2002,2002-07-11,0,27.77,-82.77
4,2002,2002-07-12,0,27.92,-82.8


In [4]:
# Filter data by year to only show 2011 through 2013

tornado_filtered = tornado_transformed.loc[(tornado_transformed['year'] >= 2011) & (tornado_transformed['year'] <= 2013)]

tornado_filtered.head()

Unnamed: 0,year,date,mag,lat,lon
723,2011,2011-01-09,1,27.7107,-97.9663
733,2011,2011-03-31,1,27.8716,-82.8509
736,2011,2011-03-31,1,27.8983,-82.6935
738,2011,2011-05-13,0,29.2061,-90.0433
741,2011,2011-05-13,0,29.35,-89.53


In [5]:
earthquake_file = "static/data/earthquakes_2011-2013.csv"
earthquake_df = pd.read_csv(earthquake_file)
earthquake_df.head()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
0,2012-04-12T07:15:48.500Z,28.696,-113.104,13.0,7.0,mww,474.0,95.9,,1.18,...,2020-09-21T18:04:24.284Z,"69 km ENE of San Luis, Mexico",earthquake,,,,,reviewed,us,us
1,2013-10-19T17:54:54.700Z,26.0913,-110.3209,9.45,6.6,mww,,44.0,1.98,1.1,...,2020-07-10T18:20:12.984Z,"98 km SW of Etchoropo, Mexico",earthquake,,3.0,,,reviewed,us,us
2,2012-12-14T10:36:01.590Z,31.095,-119.66,13.0,6.3,mww,760.0,20.9,,1.29,...,2020-05-06T09:28:04.095Z,"279 km SSW of Avalon, California",earthquake,,,,,reviewed,us,us
3,2012-09-25T23:45:24.940Z,24.666,-110.173,10.0,6.3,mww,659.0,25.7,,1.0,...,2020-09-21T18:24:36.158Z,"59 km NNE of La Paz, Mexico",earthquake,,,,,reviewed,us,us
4,2011-07-26T17:44:20.380Z,25.101,-109.525,12.0,6.0,mww,379.0,81.3,,1.19,...,2015-06-30T16:59:03.875Z,"73 km SW of Topolobampo, Mexico",earthquake,,,,,reviewed,us,us


In [6]:
# Create a filtered dataframe from specific columns
earthquake_cols = ["time", "latitude", "longitude", "mag"]
earthquake_transformed= earthquake_df[earthquake_cols].copy()

earthquake_transformed.head()

Unnamed: 0,time,latitude,longitude,mag
0,2012-04-12T07:15:48.500Z,28.696,-113.104,7.0
1,2013-10-19T17:54:54.700Z,26.0913,-110.3209,6.6
2,2012-12-14T10:36:01.590Z,31.095,-119.66,6.3
3,2012-09-25T23:45:24.940Z,24.666,-110.173,6.3
4,2011-07-26T17:44:20.380Z,25.101,-109.525,6.0


In [7]:
#create date only from time column
time_split = earthquake_transformed['time'].str.split('T',expand=True)

earthquake_transformed['date'] = time_split[0]
earthquake_transformed.head()

Unnamed: 0,time,latitude,longitude,mag,date
0,2012-04-12T07:15:48.500Z,28.696,-113.104,7.0,2012-04-12
1,2013-10-19T17:54:54.700Z,26.0913,-110.3209,6.6,2013-10-19
2,2012-12-14T10:36:01.590Z,31.095,-119.66,6.3,2012-12-14
3,2012-09-25T23:45:24.940Z,24.666,-110.173,6.3,2012-09-25
4,2011-07-26T17:44:20.380Z,25.101,-109.525,6.0,2011-07-26


In [8]:
#extracted year only from time and dropped time column

time_split2 = earthquake_transformed['time'].str.split('-',expand=True)
earthquake_transformed['year'] = time_split2[0]

earthquake_transformed.drop(columns=['time'], inplace=True)
earthquake_transformed.head()

Unnamed: 0,latitude,longitude,mag,date,year
0,28.696,-113.104,7.0,2012-04-12,2012
1,26.0913,-110.3209,6.6,2013-10-19,2013
2,31.095,-119.66,6.3,2012-12-14,2012
3,24.666,-110.173,6.3,2012-09-25,2012
4,25.101,-109.525,6.0,2011-07-26,2011


In [9]:
# Rename the column headers
earthquake_transformed = earthquake_transformed.rename(columns={
                                                          "latitude": "lat",
                                                          "longitude": "lon"})

earthquake_transformed.head()

Unnamed: 0,lat,lon,mag,date,year
0,28.696,-113.104,7.0,2012-04-12,2012
1,26.0913,-110.3209,6.6,2013-10-19,2013
2,31.095,-119.66,6.3,2012-12-14,2012
3,24.666,-110.173,6.3,2012-09-25,2012
4,25.101,-109.525,6.0,2011-07-26,2011


In [12]:


connection_string = f"postgres:{password}@localhost:5432/disaster_db"
engine = create_engine(f'postgresql://{connection_string}')

In [17]:
tornado_filtered.to_sql(name='tornado', con=engine, if_exists='append', index=False)

In [19]:
earthquake_transformed.to_sql(name='earthquake', con=engine, if_exists='append', index=False)