# This script uses a Neural Network to predict EF Rating given Length (miles), Width (yds), Loss ($)

In [1]:
# Import dependencies
from pathlib import Path 
import pandas as pd
import numpy as np

## Import & Inspect the Data

In [2]:
# Loading data
file_path = Path("../Cleaned_Data/1950-2020_tornadoes_cleaned.csv")
tornado_df = pd.read_csv(file_path)

In [3]:
tornado_df.columns

Index(['Timestamp', 'Year', 'Month', 'Day', 'State', 'Start_Lat', 'Start_Lon',
       'End_Lat', 'End_Lon', 'EF', 'Injuries', 'Fatalities', 'Loss',
       'Crop_Loss', 'Length', 'Width'],
      dtype='object')

In [4]:
# drop na rows
tornado_df = tornado_df.dropna()

## Machine Learning: Neural Network using Keras

In [5]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy

ModuleNotFoundError: No module named 'tensorflow'

In [6]:
!pip3 install ann_visualizer
!pip install graphviz

Collecting ann_visualizer
  Downloading ann_visualizer-2.5.tar.gz (4.7 kB)
Building wheels for collected packages: ann-visualizer
  Building wheel for ann-visualizer (setup.py): started
  Building wheel for ann-visualizer (setup.py): finished with status 'done'
  Created wheel for ann-visualizer: filename=ann_visualizer-2.5-py3-none-any.whl size=4181 sha256=62c4feecfd36c4d3d7e51269733f4382824c69d9b39d915b2d0170cad1442c89
  Stored in directory: c:\users\nat\appdata\local\pip\cache\wheels\2d\d8\86\67f4a249969eaaa31c6df569f4ebfa84634fae3af2c627107b
Successfully built ann-visualizer
Installing collected packages: ann-visualizer
Successfully installed ann-visualizer-2.5
Collecting graphviz
  Downloading graphviz-0.20-py3-none-any.whl (46 kB)
Installing collected packages: graphviz
Successfully installed graphviz-0.20


### 1. Define input data (X) and output data (y)

In [7]:
X = tornado_df.copy()
X = X.drop(['Timestamp', 'Year', 'Month', 'Day', 'State', 'Start_Lat', 'Start_Lon',
       'End_Lat', 'End_Lon', 'EF', 'Injuries', 'Fatalities',
       'Crop_Loss'], axis=1)
X

Unnamed: 0,Loss,Length,Width
41903,1110000.0,17.00,120
49035,150000.0,0.46,50
50046,200000.0,13.93,200
50056,900000.0,17.26,360
50070,1000000.0,4.80,440
...,...,...,...
66897,20000.0,1.26,50
66918,35000.0,0.26,50
66919,45000.0,2.17,100
66971,150000.0,2.58,100


In [8]:
y = tornado_df["EF"].ravel()
y[:10]

array([2, 2, 2, 2, 3, 1, 1, 2, 0, 2], dtype=int64)

### 2. Splitting into Train and Test sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state =78)

### 3. Scale the input data (X)

In [None]:
# Creating StandardScaler instance
scaler = StandardScaler()
# Fitting Standard Scaller
X_scaler = scaler.fit(X_train)
# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### 4. Define Deep Learning Model

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.

# Rule of Thumb:
#   1. Number of Layers: Start with two hidden layers (this does not include the last layer).
#   2. Number of nodes (size) of intermediate layers: a number from the geometric progression of 2, e.g., 4, 8, 16, 32, … . 
#      The first layer should be around half of the number of input data features. The next layer size as half of the previous.

number_input_features = len(X.columns)
hidden_nodes_layer1 = 4
hidden_nodes_layer2 = 2

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add( tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu") )

# Second hidden layer
nn.add( tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu") )

# Output layer
nn.add( tf.keras.layers.Dense(units=1, activation="sigmoid") )

# Check the structure of the model
nn.summary()

### 5. Compile & Train the Model

In [None]:
# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

In [None]:
# Compile the model
nn.compile(loss="mean_squared_error", optimizer="adam", metrics=["accuracy"])

# Create a callback that saves the model's weights every epoch
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    period = 5)

In [None]:
# Train the model
fit_model = nn.fit(X_train_scaled,y_train,validation_split=0.33, epochs=100,callbacks=[cp_callback])

### 6. Save the Model

In [None]:
nn.save("NN_predictEF.h5")

### 7. Plot Model Performance

In [None]:
print(fit_model.history.keys())

In [None]:
# summarize history for accuracy
plt.plot(fit_model.history['accuracy'])
plt.plot(fit_model.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


In [None]:
# summarize history for loss
plt.plot(fit_model.history['loss'])
plt.plot(fit_model.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# Visualize the Neural Network
from ann_visualizer.visualize import ann_viz
ann_viz(nn,view=True, filename="NN_predictEF", title= "EF Neural Network")

### 6. Model Accuracy

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")