###Goal
  - To predict the amount of property damage that can be expected by tornados based the Fujita scale types

###Import Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

%matplotlib inline

###Import/Read Data

In [None]:
#import the csv data
#the dataset is from the NOAA Storm Prediction Center

#copy path to csv file
raw_tornado_catalog = pd.read_csv('') 

In [None]:
#feature selection
tornado_catalog = raw_tornado_catalog[['mag', 'loss', 'st']]

In [None]:
#one hot encoding
tornado_catalog_encoded = pd.get_dummies(tornado_catalog)

In [None]:
tornado_catalog_encoded

Unnamed: 0,mag,loss,st_AK,st_AL,st_AR,st_AZ,st_CA,st_CO,st_CT,st_DC,st_DE,st_FL,st_GA,st_HI,st_IA,st_ID,st_IL,st_IN,st_KS,st_KY,st_LA,st_MA,st_MD,st_ME,st_MI,st_MN,st_MO,st_MS,st_MT,st_NC,st_ND,st_NE,st_NH,st_NJ,st_NM,st_NV,st_NY,st_OH,st_OK,st_OR,st_PA,st_PR,st_RI,st_SC,st_SD,st_TN,st_TX,st_UT,st_VA,st_VT,st_WA,st_WI,st_WV,st_WY
0,3,6.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,3,5.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,4.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3,3.00,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,2,5.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60109,1,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
60110,2,0.05,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
60111,1,0.10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
60112,1,0.01,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
tornado_catalog_encoded.dtypes

mag        int64
loss     float64
st_AK      uint8
st_AL      uint8
st_AR      uint8
st_AZ      uint8
st_CA      uint8
st_CO      uint8
st_CT      uint8
st_DC      uint8
st_DE      uint8
st_FL      uint8
st_GA      uint8
st_HI      uint8
st_IA      uint8
st_ID      uint8
st_IL      uint8
st_IN      uint8
st_KS      uint8
st_KY      uint8
st_LA      uint8
st_MA      uint8
st_MD      uint8
st_ME      uint8
st_MI      uint8
st_MN      uint8
st_MO      uint8
st_MS      uint8
st_MT      uint8
st_NC      uint8
st_ND      uint8
st_NE      uint8
st_NH      uint8
st_NJ      uint8
st_NM      uint8
st_NV      uint8
st_NY      uint8
st_OH      uint8
st_OK      uint8
st_OR      uint8
st_PA      uint8
st_PR      uint8
st_RI      uint8
st_SC      uint8
st_SD      uint8
st_TN      uint8
st_TX      uint8
st_UT      uint8
st_VA      uint8
st_VT      uint8
st_WA      uint8
st_WI      uint8
st_WV      uint8
st_WY      uint8
dtype: object

In [None]:
#index and aggregate the data by state
#providing a count of the number of tornadoes and a sum of the total property damage

tornado_catalog_encoded = pd.DataFrame
({'count': tornado_catalog_encoded.groupby(['st'])['loss'].count(), 'total_loss': tornado_catalog_encoded.groupby(['st'])['loss'].sum(), 'f-scale': tornado_catalog_encoded.groupby(['mag'])['loss'].count()})


{'count': st
 AK       4
 AL    1979
 AR    1715
 AZ     241
 CA     423
 CO    2071
 CT      94
 DC       1
 DE      60
 FL    3233
 GA    1483
 HI      41
 IA    2404
 ID     206
 IL    2349
 IN    1391
 KS    4027
 KY     900
 LA    1858
 MA     158
 MD     346
 ME     124
 MI    1004
 MN    1708
 MO    2154
 MS    2034
 MT     406
 NC    1239
 ND    1483
 NE    2758
 NH      88
 NJ     142
 NM     561
 NV      86
 NY     422
 OH    1014
 OK    3658
 OR     105
 PA     752
 PR      24
 RI      10
 SC     942
 SD    1745
 TN    1145
 TX    8484
 UT     123
 VA     675
 VT      44
 WA     112
 WI    1309
 WV     128
 WY     651
 Name: loss, dtype: int64, 'f-scale': mag
 0    27933
 1    20221
 2     8924
 3     2412
 4      565
 5       59
 Name: loss, dtype: int64, 'total_loss': st
 AK        2.00
 AL     8889.27
 AR     4297.70
 AZ      364.45
 CA      620.26
 CO     1367.13
 CT      203.32
 DC        4.00
 DE      171.45
 FL     7351.86
 GA     4749.54
 HI       64.06
 IA     6076.

###Create Training and Test Data

In [None]:
X = tornado_catalog_encoded.drop("mag", axis=1)
y = tornado_catalog_encoded["mag"]

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=33)
len(X), len(X_train), len(X_test)

###Model

In [None]:
tf.random.set_seed(33)

#create a model

tornado_model_A = tf.keras.Sequential([
    tf.keras.layers.Dense(400, activation="relu"),
    tf.keras.layers.Dense(400, activation="relu"),
    tf.keras.layers.Dense(400, activation="relu"),
    tf.keras.layers.Dense(400, activation="relu"),
    tf.keras.layers.Dense(40, activation="relu"),
    tf.keras.layers.Dense(1)
])

#compile the model

tornado_model_A.compile(loss=tf.keras.losses.mae,
                        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                        metrics=["mae"])

#fit the model
history_A = tornado_model_A.fit(X_train, y_train, epochs=500)

In [None]:
tornado_model_A.summary()

###Evaluation

In [None]:
tornado_model_A.evaluate(X_test, y_test)

In [None]:
y_train.median(), y_train.mean()

In [None]:
pd.DataFrame(history_A.history).plot()
plt.ylabel("loss")
plt.xlabel("epochs")