In [29]:
import pandas as pd
from sqlalchemy import create_engine
from config import sqlpassword
import time
import psycopg2

In [2]:
file = "Resources/1950-2019_torn.csv"
tornado_data = pd.read_csv(file)

In [3]:
pd.set_option('display.max_rows', 5000)

In [4]:
list(tornado_data.columns)

['om',
 'yr',
 'mo',
 'dy',
 'date',
 'time',
 'tz',
 'st',
 'stf',
 'stn',
 'mag',
 'inj',
 'fat',
 'loss',
 'closs',
 'slat',
 'slon',
 'elat',
 'elon',
 'len',
 'wid',
 'ns',
 'sn',
 'sg',
 'f1',
 'f2',
 'f3',
 'f4',
 'fc']

In [5]:
tornado_df = tornado_data[[
    "om",
    "yr",
    "mo",
    "st",
    "mag",
    "inj",
    "fat",
    "loss",
    "slat",
    "slon",
    "len",
    "wid",
    "ns",
    "sn",
    "sg"
]]

In [6]:
tornado_df = tornado_df.rename(columns={
    "om":"tornado_num",
    "yr":"year",
    "mo":"month",
    "st":"state",
    "mag":"magnitude",
    "inj":"injury",
    "fat":"fatalities",
    "loss":"loss",
    "slat":"latitude",
    "slon":"longitude",
    "len":"miles_traveled",
    "wid":"width_yards",
    "ns":"num_states_affected",
    "sn":"state_num",
    "sg":"segment_num"
})

In [7]:
tornado_df = tornado_df.loc[tornado_df["state_num"] != 0]
tornado_df.state_num.min()

1

In [8]:
# Check that all loss values are integers by filtered years
less1996 = tornado_df.loc[(tornado_df["year"] < 1996)]
less1996_year_loss = less1996[["year", "loss"]]
less1996_year_loss.loss.apply(float.is_integer).all()

True

In [9]:
tornado_df[["year","loss"]].describe()

Unnamed: 0,year,loss
count,65724.0,65724.0
mean,1990.299175,69792.26
std,18.851562,6786282.0
min,1950.0,0.0
25%,1975.0,0.0
50%,1993.0,0.1
75%,2006.0,4.0
max,2019.0,1550000000.0


In [10]:
less1996[["year","loss"]].describe()

Unnamed: 0,year,loss
count,35677.0,35677.0
mean,1975.875186,2.773524
std,12.633464,2.078864
min,1950.0,0.0
25%,1966.0,0.0
50%,1977.0,3.0
75%,1987.0,4.0
max,1995.0,8.0


In [11]:
_1996_2015 = tornado_df.loc[(tornado_df["year"] >= 1996)&(tornado_df["year"] <= 2015)]
_1996_2015[["year","loss"]].describe()

Unnamed: 0,year,loss
count,24939.0,24939.0
mean,2005.335258,1.274281
std,5.589723,31.344328
min,1996.0,0.0
25%,2001.0,0.0
50%,2005.0,0.0
75%,2010.0,0.05
max,2015.0,2800.1


In [12]:
greater2016 = tornado_df.loc[(tornado_df["year"] >= 2016)]
greater2016[["year","loss"]].describe()

Unnamed: 0,year,loss
count,5108.0,5108.0
mean,2017.632733,897982.7
std,1.104755,24329600.0
min,2016.0,0.0
25%,2017.0,0.0
50%,2018.0,200.0
75%,2019.0,50000.0
max,2019.0,1550000000.0


In [13]:
less1996.year.count()+_1996_2015.year.count()+greater2016.year.count()

65724

In [14]:
_1996_2015["loss"] = _1996_2015["loss"] * 1000000
_1996_2015.max()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  _1996_2015["loss"] = _1996_2015["loss"] * 1000000


tornado_num                613494
year                         2015
month                          12
state                          WY
magnitude                       5
injury                       1500
fatalities                    158
loss                   2.8001e+09
latitude                    61.02
longitude                   -64.9
miles_traveled             136.62
width_yards                  4576
num_states_affected             3
state_num                       1
segment_num                     2
dtype: object

In [15]:
_1996_2015.sort_values("loss", ascending=False)

Unnamed: 0,tornado_num,year,month,state,magnitude,injury,fatalities,loss,latitude,longitude,miles_traveled,width_yards,num_states_affected,state_num,segment_num
56503,296616,2011,5,MO,5,1150,158,2.800100e+09,37.0524,-94.5932,21.62,1600,1,1,1
56249,314625,2011,4,AL,4,1500,64,2.450000e+09,33.0297,-87.9350,80.68,2600,1,1,1
58474,451537,2013,5,OK,5,212,24,2.000000e+09,35.2840,-97.6280,13.85,1900,1,1,1
56223,309488,2011,4,AL,5,145,72,1.290000e+09,34.1043,-88.1479,118.63,2200,2,1,2
40287,1147,1999,5,OK,5,583,36,1.000000e+09,35.1300,-97.8500,37.00,1430,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40677,1312,1999,6,WI,0,0,0,0.000000e+00,45.1700,-92.7500,0.50,50,1,1,1
49751,785,2006,8,FL,1,0,0,0.000000e+00,29.9200,-82.4200,5.50,100,1,1,1
49752,786,2006,8,ND,0,0,0,0.000000e+00,45.9500,-99.3700,0.10,40,1,1,1
49753,787,2006,8,MN,1,0,0,0.000000e+00,48.9200,-95.4800,5.00,50,1,1,1


In [16]:
_1996_2019 = pd.concat([_1996_2015,greater2016], ignore_index=True)
_1996_2019[["year","loss"]].describe()

Unnamed: 0,year,loss
count,30047.0,30047.0
mean,2007.425833,1210310.0
std,6.890545,30266710.0
min,1996.0,0.0
25%,2002.0,0.0
50%,2007.0,0.0
75%,2013.0,50000.0
max,2019.0,2800100000.0


In [17]:
bins_1996_2019 = _1996_2019

In [18]:
# bin values >1996 & assign to 1996 damage value scale
bins = [0,1,50,500,5000,50000,500000,5000000,50000000,500000000,5000000000]
labels = [0,1,2,3,4,5,6,7,8,9]
bins_1996_2019["loss"] = pd.cut(bins_1996_2019["loss"],include_lowest=True,right=False, bins=bins, labels=labels)

In [19]:
tornado_clean = pd.concat([bins_1996_2019,less1996], ignore_index=True)
tornado_clean.isnull().any()

tornado_num            False
year                   False
month                  False
state                  False
magnitude              False
injury                 False
fatalities             False
loss                   False
latitude               False
longitude              False
miles_traveled         False
width_yards            False
num_states_affected    False
state_num              False
segment_num            False
dtype: bool

In [20]:
tornado_clean

Unnamed: 0,tornado_num,year,month,state,magnitude,injury,fatalities,loss,latitude,longitude,miles_traveled,width_yards,num_states_affected,state_num,segment_num
0,1,1996,1,FL,0,0,0,4.0,28.08,-80.60,0.5,35,1,1,1
1,859,1996,1,SC,0,0,0,4.0,33.50,-80.87,0.5,50,1,1,1
2,860,1996,1,SC,0,0,0,0.0,33.50,-80.85,0.3,50,1,1,1
3,4,1996,1,FL,1,9,0,6.0,25.68,-80.42,7.0,40,1,1,1
4,5,1996,1,FL,0,0,0,5.0,26.00,-80.23,0.4,10,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65719,1231,1995,12,LA,0,0,0,0.0,31.47,-93.67,0.1,30,2,1,2
65720,1231,1995,12,TX,0,0,0,0.0,31.43,-93.75,0.1,30,2,1,2
65721,1232,1995,12,LA,1,0,0,3.0,30.60,-90.23,0.5,10,1,1,1
65722,1233,1995,12,FL,2,1,1,4.0,30.68,-84.68,0.3,50,1,1,1


In [24]:
tornado_clean.to_csv("Resources/tornado_clean.csv", encoding="utf-8", index = False)

In [25]:
#CREATE ENGINE
engine = create_engine(f"postgres://ouvitqtn:{sqlpassword}@queenie.db.elephantsql.com:5432/ouvitqtn")

In [26]:
#LOAD DATA INTO POSTGRESQL
tornado_clean.to_sql(name='tornado_db', con=engine, if_exists='append', index=False)

In [27]:
#TEST/CALL ENGINE TABLE NAMES
engine.table_names()

['ouvitqtn', 'tornado_db']

In [39]:
db_conn = psycopg2.connect(database="ouvitqtn", user="ouvitqtn", password=f"{sqlpassword}", host="queenie.db.elephantsql.com", port="5432")
db_cursor = db_conn.cursor()
db_cursor.execute("SELECT * FROM tornado_db")
tornado_table = db_cursor.fetchall()

{'tornado_num': 1, 'year': 1996, 'month': 1, 'state': 'FL', 'magnitude': 0, 'injury': 0, 'fatalities': 0, 'loss': 4.0, 'latitude': 28.08, 'longitude': -80.6, 'miles_traveled': 0.5, 'width_yards': 35, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 859, 'year': 1996, 'month': 1, 'state': 'SC', 'magnitude': 0, 'injury': 0, 'fatalities': 0, 'loss': 4.0, 'latitude': 33.5, 'longitude': -80.87, 'miles_traveled': 0.5, 'width_yards': 50, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 860, 'year': 1996, 'month': 1, 'state': 'SC', 'magnitude': 0, 'injury': 0, 'fatalities': 0, 'loss': 0.0, 'latitude': 33.5, 'longitude': -80.85, 'miles_traveled': 0.3, 'width_yards': 50, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 4, 'year': 1996, 'month': 1, 'state': 'FL', 'magnitude': 1, 'injury': 9, 'fatalities': 0, 'loss': 6.0, 'latitude': 25.68, 'longitude': -80.42, 'miles_traveled': 7.0, 'width_yards': 40, 'num_states_aff

{'tornado_num': 985, 'year': 1998, 'month': 5, 'state': 'NC', 'magnitude': 1, 'injury': 0, 'fatalities': 0, 'loss': 6.0, 'latitude': 35.98, 'longitude': -80.77, 'miles_traveled': 3.0, 'width_yards': 440, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 986, 'year': 1998, 'month': 5, 'state': 'NC', 'magnitude': 2, 'injury': 0, 'fatalities': 0, 'loss': 5.0, 'latitude': 35.62, 'longitude': -81.98, 'miles_traveled': 3.7, 'width_yards': 880, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 987, 'year': 1998, 'month': 5, 'state': 'NC', 'magnitude': 1, 'injury': 0, 'fatalities': 0, 'loss': 5.0, 'latitude': 35.85, 'longitude': -81.35, 'miles_traveled': 10.0, 'width_yards': 200, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 988, 'year': 1998, 'month': 5, 'state': 'NC', 'magnitude': 0, 'injury': 0, 'fatalities': 0, 'loss': 0.0, 'latitude': 36.02, 'longitude': -80.52, 'miles_traveled': 5.0, 'width_yards': 100, 'nu

{'tornado_num': 139, 'year': 2000, 'month': 4, 'state': 'LA', 'magnitude': 1, 'injury': 0, 'fatalities': 0, 'loss': 6.0, 'latitude': 32.88, 'longitude': -93.98, 'miles_traveled': 8.0, 'width_yards': 500, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 136, 'year': 2000, 'month': 4, 'state': 'AR', 'magnitude': 0, 'injury': 0, 'fatalities': 0, 'loss': 4.0, 'latitude': 33.77, 'longitude': -93.57, 'miles_traveled': 2.4, 'width_yards': 200, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 117, 'year': 2000, 'month': 4, 'state': 'TX', 'magnitude': 1, 'injury': 0, 'fatalities': 0, 'loss': 0.0, 'latitude': 32.5, 'longitude': -94.2, 'miles_traveled': 5.0, 'width_yards': 100, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 134, 'year': 2000, 'month': 4, 'state': 'LA', 'magnitude': 3, 'injury': 3, 'fatalities': 0, 'loss': 7.0, 'latitude': 32.43, 'longitude': -94.05, 'miles_traveled': 32.5, 'width_yards': 500, 'num_

{'tornado_num': 317, 'year': 2002, 'month': 11, 'state': 'GA', 'magnitude': 2, 'injury': 0, 'fatalities': 0, 'loss': 6.0, 'latitude': 34.3, 'longitude': -84.98, 'miles_traveled': 12.0, 'width_yards': 100, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 458, 'year': 2002, 'month': 11, 'state': 'GA', 'magnitude': 2, 'injury': 13, 'fatalities': 0, 'loss': 7.0, 'latitude': 34.35, 'longitude': -84.58, 'miles_traveled': 23.0, 'width_yards': 100, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 901, 'year': 2002, 'month': 11, 'state': 'MS', 'magnitude': 1, 'injury': 0, 'fatalities': 0, 'loss': 5.0, 'latitude': 31.08, 'longitude': -89.82, 'miles_traveled': 4.0, 'width_yards': 25, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 396, 'year': 2002, 'month': 11, 'state': 'LA', 'magnitude': 1, 'injury': 0, 'fatalities': 0, 'loss': 4.0, 'latitude': 30.53, 'longitude': -90.23, 'miles_traveled': 0.3, 'width_yards': 150,

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




{'tornado_num': 14, 'year': 1978, 'month': 1, 'state': 'FL', 'magnitude': 0, 'injury': 0, 'fatalities': 0, 'loss': 4.0, 'latitude': 28.08, 'longitude': -80.6, 'miles_traveled': 2.0, 'width_yards': 20, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 15, 'year': 1978, 'month': 1, 'state': 'FL', 'magnitude': 0, 'injury': 0, 'fatalities': 0, 'loss': 4.0, 'latitude': 30.07, 'longitude': -81.87, 'miles_traveled': 0.5, 'width_yards': 20, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 16, 'year': 1978, 'month': 1, 'state': 'FL', 'magnitude': 0, 'injury': 0, 'fatalities': 0, 'loss': 3.0, 'latitude': 28.05, 'longitude': -82.53, 'miles_traveled': 1.0, 'width_yards': 50, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 17, 'year': 1978, 'month': 1, 'state': 'FL', 'magnitude': 1, 'injury': 0, 'fatalities': 0, 'loss': 5.0, 'latitude': 28.58, 'longitude': -81.33, 'miles_traveled': 0.5, 'width_yards': 20, 'num_states_

{'tornado_num': 66, 'year': 1981, 'month': 4, 'state': 'NE', 'magnitude': 2, 'injury': 0, 'fatalities': 0, 'loss': 5.0, 'latitude': 40.87, 'longitude': -96.87, 'miles_traveled': 8.3, 'width_yards': 50, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 67, 'year': 1981, 'month': 4, 'state': 'KS', 'magnitude': 1, 'injury': 0, 'fatalities': 0, 'loss': 3.0, 'latitude': 39.05, 'longitude': -96.23, 'miles_traveled': 0.1, 'width_yards': 10, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 68, 'year': 1981, 'month': 4, 'state': 'KS', 'magnitude': 2, 'injury': 0, 'fatalities': 0, 'loss': 5.0, 'latitude': 39.78, 'longitude': -95.0, 'miles_traveled': 1.8, 'width_yards': 33, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 69, 'year': 1981, 'month': 4, 'state': 'IA', 'magnitude': 0, 'injury': 0, 'fatalities': 0, 'loss': 0.0, 'latitude': 41.95, 'longitude': -94.17, 'miles_traveled': 5.2, 'width_yards': 30, 'num_states_a

{'tornado_num': 203, 'year': 1984, 'month': 4, 'state': 'MN', 'magnitude': 3, 'injury': 52, 'fatalities': 1, 'loss': 7.0, 'latitude': 45.02, 'longitude': -93.23, 'miles_traveled': 5.5, 'width_yards': 60, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 204, 'year': 1984, 'month': 4, 'state': 'KS', 'magnitude': 0, 'injury': 0, 'fatalities': 0, 'loss': 2.0, 'latitude': 37.02, 'longitude': -95.9, 'miles_traveled': 0.1, 'width_yards': 10, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 205, 'year': 1984, 'month': 4, 'state': 'OK', 'magnitude': 2, 'injury': 8, 'fatalities': 0, 'loss': 5.0, 'latitude': 36.02, 'longitude': -97.07, 'miles_traveled': 6.0, 'width_yards': 70, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 206, 'year': 1984, 'month': 4, 'state': 'WI', 'magnitude': 1, 'injury': 0, 'fatalities': 0, 'loss': 5.0, 'latitude': 45.57, 'longitude': -92.37, 'miles_traveled': 0.5, 'width_yards': 27, 'num_sta

{'tornado_num': 330, 'year': 1987, 'month': 6, 'state': 'NM', 'magnitude': 0, 'injury': 0, 'fatalities': 0, 'loss': 3.0, 'latitude': 35.08, 'longitude': -106.55, 'miles_traveled': 0.2, 'width_yards': 20, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 334, 'year': 1987, 'month': 6, 'state': 'FL', 'magnitude': 0, 'injury': 1, 'fatalities': 0, 'loss': 3.0, 'latitude': 27.88, 'longitude': -81.83, 'miles_traveled': 2.0, 'width_yards': 30, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 335, 'year': 1987, 'month': 6, 'state': 'OH', 'magnitude': 0, 'injury': 0, 'fatalities': 0, 'loss': 4.0, 'latitude': 39.97, 'longitude': -80.85, 'miles_traveled': 6.0, 'width_yards': 73, 'num_states_affected': 1, 'state_num': 1, 'segment_num': 1}
{'tornado_num': 332, 'year': 1987, 'month': 6, 'state': 'MI', 'magnitude': 1, 'injury': 0, 'fatalities': 0, 'loss': 3.0, 'latitude': 41.92, 'longitude': -84.37, 'miles_traveled': 0.8, 'width_yards': 147, 'num_s

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

