In [None]:
import pandas as pd
import datetime as dt
import sqlalchemy
import numpy as np
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
import matplotlib.pyplot as plt
import sqlite3
import psycopg2
from sklearn.linear_model import LinearRegression

## Create Tornado table

In [None]:
tornadoes = "1950-2018_all_tornadoes.csv"

In [None]:
#read data

tornadoes_report = pd.read_csv(tornadoes, delimiter=",")

In [None]:
#Rename columns
tornadoes_report = tornadoes_report.rename(columns={"om":"Tornado ID","date":"Date","time":"Time","tz":"Time Zone",
                                                  "st": "State", "mag":"Magnitude fscale", "inj":"Injuries","fat":"Fatalities",
                                                 "loss": "Est. Property Loss", "slat":"Starting Latitude","slon":"Starting Longitude",
                                                 "elat":"Ending Latitude", "elon":"Ending Longitude", "len": "Length in miles", "wid":"Width in yards"
                                                  })

In [None]:
#Create DF
tornado_df = pd.DataFrame(tornadoes_report[["Tornado ID","yr","mo","dy","Date","State","Magnitude fscale","Injuries",
                                          "Fatalities", "Est. Property Loss", "Starting Latitude", "Starting Longitude",
                                          "Ending Latitude", "Ending Longitude", "Length in miles", "Width in yards"]])

tornado_df.head()


In [None]:
# Create a copy of the tornado dataframe for the 
new_t_df = tornado_df.copy()
new_t_df.head()

In [None]:
#  Create new columns and reduce to what is needed
columnsTitles = ["Tornado ID", "yr", "mo", "Magnitude fscale"]

new_t_df = new_t_df.reindex(columns=columnsTitles)
new_t_df.head()

In [None]:
new_t_df = new_t_df.drop_duplicates(subset=None, keep='first', inplace=False)
new_t_df

In [None]:
cnt = pd.get_dummies(new_t_df["Magnitude fscale"])
cnt

In [None]:
newer_t_df = new_t_df.merge(cnt, left_index=True, right_index=True)
newer_t_df

In [None]:
nado_df = newer_t_df.rename(columns={-9:"Mag Unknown",0:"F0",1:"F1",2:"F2",3:"F3",4:"F4",5:"F5"})
nado_df

In [None]:
new_nado_df = nado_df.groupby(["yr","mo"]).sum()
new_nado_df["Total Tornadoes"]=new_nado_df["F0"]+new_nado_df["F1"]+new_nado_df["F2"]+new_nado_df["F3"]+new_nado_df["F4"]+new_nado_df["F5"]+new_nado_df["Mag Unknown"]
new_nado_df

## DRAFT ONLY - Starting to Build Tornado and ENSO Data Table... pete

In [None]:
enso = "oni_enso.csv"
#read data

enso_data = pd.read_csv(enso, delimiter=",")

In [None]:
#Rename columns
enso_data = enso_data.rename(columns={"SEAS":"Season","YR":"yr","TOTAL":"Total Anom","ANOM":"Anomaly"
                                     })
                                      
enso_data.head()

In [None]:
new_enso = enso_data.copy()
new_enso.head(20)

In [None]:
# df.loc[df.my_channel > 20000, 'my_channel'] = 0

In [None]:
new_enso["mo"] = new_enso.loc[new_enso.Season == "DJF", "mo"] = 1
new_enso.head()

In [None]:
new_enso.loc[new_enso['Season'].eq("DJF"), "mo"] = 1
new_enso.loc[new_enso['Season'].eq("JFM"), "mo"] = 2 
new_enso.loc[new_enso['Season'].eq("FMA"), "mo"] = 3 
new_enso.loc[new_enso['Season'].eq("MAM"), "mo"] = 4 
new_enso.loc[new_enso['Season'].eq("AMJ"), "mo"] = 5 
new_enso.loc[new_enso['Season'].eq("MJJ"), "mo"] = 6 
new_enso.loc[new_enso['Season'].eq("JJA"), "mo"] = 7 
new_enso.loc[new_enso['Season'].eq("JAS"), "mo"] = 8 
new_enso.loc[new_enso['Season'].eq("ASO"), "mo"] = 9 
new_enso.loc[new_enso['Season'].eq("SON"), "mo"] = 10 
new_enso.loc[new_enso['Season'].eq("OND"), "mo"] = 11 
new_enso.loc[new_enso['Season'].eq("NDJ"), "mo"] = 12 
new_enso.head()

I want to remove the "Season" column, and then add the tornado counts by category (F0, F1,...) and totals

In [None]:
new_enso['mo'] = new_enso['mo'].astype(int)
new_enso.head()

In [None]:
columnsTitles = ["yr", "mo", "Total Anom", "Anomaly"]

new_enso = new_enso.reindex(columns=columnsTitles)
new_enso.head()

In [None]:
comb_df = new_nado_df.merge(new_enso, how='left', left_index=True, right_on=['yr','mo'])
# comb_df

In [None]:
#  Create new columns and reduce to what is needed
columnsNew = ["yr", "mo", "Anomaly", "Total Anom", "Total Tornadoes", "F0", "F1", "F2", "F3", "F4", "F5"]

new_comb_df = comb_df.reindex(columns=columnsNew)
# new_comb_df

## January Regression Curve

In [None]:
# January - ENSO Anomaly
X = new_comb_df.loc[new_comb_df["mo"] == 1, ["Anomaly", "Total Anom"]]

In [None]:
# January - Total Tornadoes
y = new_comb_df.loc[new_comb_df["mo"] == 1, ["Total Tornadoes"]]


In [None]:
print(X.shape, y.shape)

In [None]:
X = X.to_numpy()
print(X)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c="blue")

In [None]:
# Use train_test_split to create training and testing data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data
X_scaler = StandardScaler().fit(X_train)

In [None]:
# Transform the training and testing data using the X_scaler

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
from tensorflow.keras.utils import to_categorical

In [None]:
# One-hot encoding
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [None]:
# first, create a normal neural network with 2 inputs, 6 hidden nodes, and 2 outputs
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=6, activation='relu', input_dim=2))
model.add(Dense(units=2, activation='softmax'))

In [None]:
model.summary()

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
# Fit the model to the training data
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

In [None]:
# y = y.values.tolist()
print(y)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)


In [None]:
# Score the model

score = model.score(X, y)
print(f"R2 Score: {score}")

In [None]:
predictions = model.predict(X)
print(f"January Residuals")
# Plot Residuals
plt.scatter(predictions, predictions - y)
plt.hlines(y=0, xmin=predictions.min(), xmax=predictions.max())
plt.show()

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_) 

In [None]:
print(X["Anomaly"])

In [None]:
print(model.coef_)

In [None]:
m_coef1 = model.coef_[0,0]

m_coef2 = model.coef_[0,1]
print(m_coef1)
print(m_coef2)

In [None]:
x_min1 = X["Anomaly"].min()
x_max1 = X["Anomaly"].max()
print(x_min1)
print(x_max1)

In [None]:
x_min2 = X["Total Anom"].min()
x_max2 = X["Total Anom"].max()
print(x_min2)
print(x_max2)

In [None]:
y_min_actual = y.min()
y_max_actual = y.max()
print(y_min_actual)
print(y_max_actual)

In [None]:
y_min = model.intercept_ + m_coef1 * x_min1 + m_coef2 * x_min2
y_max = model.intercept_ + m_coef1 * x_max1 + m_coef2 * x_max2
print(f"Actual Min Value: {y_min_actual}")
print(f"Calculated Min Value: {y_min}")
print(f"Actual Max Value: {y_max_actual}")
print(f"Calculated Max Value: {y_max}")

In [None]:
y_min_predicted = model.predict([[x_min1, x_min2]])
y_max_predicted = model.predict([[x_max1, x_max2]])
print(f"Actual Min Value: {y_min}")
print(f"Predicted Min Value: {y_min_predicted}")
print(f"Actual Max Value: {y_max}")
print(f"Predicted Max Value: {y_max_predicted}")

In [None]:
print(f"January Regression Curve")
plt.scatter(X["Anomaly"], X["Total Anom"], y, c='blue')
plt.plot([x_min, x_max], [y_min, y_max], c='red')

## February Regression Curve

In [None]:
# February - ENSO Anomaly
X = new_comb_df.loc[new_comb_df["mo"] == 2, ["Anomaly"]]
# Febuary - Total Tornadoes
y = new_comb_df.loc[new_comb_df["mo"] == 2, ["Total Tornadoes"]]
print(X.shape, y.shape)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)

# Score the model

score = model.score(X, y)
print(f"R2 Score: {score}")

In [None]:
predictions = model.predict(X)
print(f"February Residuals")
# Plot Residuals
plt.scatter(predictions, predictions - y)
plt.hlines(y=0, xmin=predictions.min(), xmax=predictions.max())
plt.show()

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_) 

In [None]:
x_min = X.min()
x_max = X.max()
# print(x_min)
# print(x_max)

In [None]:
y_min_actual = y.min()
y_max_actual = y.max()
print(y_min_actual)
print(y_max_actual)

In [None]:
y_min = 27.22809609 + -4.13959493 * x_min
y_max = 27.22809609 + -4.13959493 * x_max
print(f"Actual Min Value: {y_min_actual}")
print(f"Calculated Min Value: {y_min}")
print(f"Actual Max Value: {y_max_actual}")
print(f"Calculated Max Value: {y_max}")

In [None]:
y_min_predicted = model.predict([x_min])
y_max_predicted = model.predict([x_max])
print(f"Actual Min Value: {y_min}")
print(f"Predicted Min Value: {y_min_predicted}")
print(f"Actual Max Value: {y_max}")
print(f"Predicted Max Value: {y_max_predicted}")

In [None]:
print(f"February Regression Curve")
plt.scatter(X, y, c='blue')
plt.plot([x_min, x_max], [y_min, y_max], c='red')

## March Regression Curve

In [None]:
# March - X: ENSO Anomaly and y: Total Tornadoes
X = new_comb_df.loc[new_comb_df["mo"] == 3, ["Anomaly"]]

y = new_comb_df.loc[new_comb_df["mo"] == 3, ["Total Tornadoes"]]
print(X.shape, y.shape)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)

# Score the model

score = model.score(X, y)
print(f"R2 Score: {score}")

In [None]:
predictions = model.predict(X)
print(f"March Residuals")
# Plot Residuals
plt.scatter(predictions, predictions - y)
plt.hlines(y=0, xmin=predictions.min(), xmax=predictions.max())
plt.show()

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_) 

In [None]:
x_min = X.min()
x_max = X.max()
# print(x_min)
# print(x_max)

In [None]:
y_min_actual = y.min()
y_max_actual = y.max()
print(y_min_actual)
print(y_max_actual)

In [None]:
y_min = 60.98378899 + -9.48619428 * x_min
y_max = 60.98378899 + -9.48619428 * x_max
print(f"Actual Min Value: {y_min_actual}")
print(f"Calculated Min Value: {y_min}")
print(f"Actual Max Value: {y_max_actual}")
print(f"Calculated Max Value: {y_max}")

In [None]:
y_min_predicted = model.predict([x_min])
y_max_predicted = model.predict([x_max])
print(f"Actual Min Value: {y_min}")
print(f"Predicted Min Value: {y_min_predicted}")
print(f"Actual Max Value: {y_max}")
print(f"Predicted Max Value: {y_max_predicted}")

In [None]:
print(f"March Regression Curve")
plt.scatter(X, y, c='blue')
plt.plot([x_min, x_max], [y_min, y_max], c='red')

## April Regression Curve

In [None]:
# April - X: ENSO Anomaly and y: Total Tornadoes
X = new_comb_df.loc[new_comb_df["mo"] == 4, ["Anomaly"]]

y = new_comb_df.loc[new_comb_df["mo"] == 4, ["Total Tornadoes"]]
print(X.shape, y.shape)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)

# Score the model

score = model.score(X, y)
print(f"R2 Score: {score}")

In [None]:
predictions = model.predict(X)
print(f"April Residuals")
# Plot Residuals
plt.scatter(predictions, predictions - y)
plt.hlines(y=0, xmin=predictions.min(), xmax=predictions.max())
plt.show()

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_) 

In [None]:
x_min = X.min()
x_max = X.max()
print(x_min)
print(x_max)

In [None]:
y_min_actual = y.min()
y_max_actual = y.max()
print(y_min_actual)
print(y_max_actual)

In [None]:
y_min = 130.69512523 + -35.42522666 * x_min
y_max = 130.69512523 + -35.42522666 * x_max
print(f"Actual Min Value: {y_min_actual}")
print(f"Calculated Min Value: {y_min}")
print(f"Actual Max Value: {y_max_actual}")
print(f"Calculated Max Value: {y_max}")

In [None]:
y_min_predicted = model.predict([x_min])
y_max_predicted = model.predict([x_max])
print(f"Actual Min Value: {y_min}")
print(f"Predicted Min Value: {y_min_predicted}")
print(f"Actual Max Value: {y_max}")
print(f"Predicted Max Value: {y_max_predicted}")

In [None]:
print(f"April Regression Curve")
plt.scatter(X, y, c='blue')
plt.plot([x_min, x_max], [y_min, y_max], c='red')

## May Regression Curve

In [None]:
# May - X: ENSO Anomaly and y: Total Tornadoes
X = new_comb_df.loc[new_comb_df["mo"] == 5, ["Anomaly"]]

y = new_comb_df.loc[new_comb_df["mo"] == 5, ["Total Tornadoes"]]
print(X.shape, y.shape)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)

# Score the model

score = model.score(X, y)
print(f"R2 Score: {score}")

In [None]:
predictions = model.predict(X)
print(f"May Residuals")
# Plot Residuals
plt.scatter(predictions, predictions - y)
plt.hlines(y=0, xmin=predictions.min(), xmax=predictions.max())
plt.show()

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_) 

In [None]:
x_min = X.min()
x_max = X.max()
print(x_min)
print(x_max)

In [None]:
y_min_actual = y.min()
y_max_actual = y.max()
print(y_min_actual)
print(y_max_actual)

In [None]:
y_min = 202.07784876 + -1.03125139 * x_min
y_max = 202.07784876 + -1.03125139 * x_max
print(f"Actual Min Value: {y_min_actual}")
print(f"Calculated Min Value: {y_min}")
print(f"Actual Max Value: {y_max_actual}")
print(f"Calculated Max Value: {y_max}")

In [None]:
y_min_predicted = model.predict([x_min])
y_max_predicted = model.predict([x_max])
print(f"Actual Min Value: {y_min}")
print(f"Predicted Min Value: {y_min_predicted}")
print(f"Actual Max Value: {y_max}")
print(f"Predicted Max Value: {y_max_predicted}")

In [None]:
print(f"May Regression Curve")
plt.scatter(X, y, c='blue')
plt.plot([x_min, x_max], [y_min, y_max], c='red')

## June Regression Curve

In [None]:
# June - X: ENSO Anomaly and y: Total Tornadoes
X = new_comb_df.loc[new_comb_df["mo"] == 6, ["Anomaly"]]

y = new_comb_df.loc[new_comb_df["mo"] == 6, ["Total Tornadoes"]]
print(X.shape, y.shape)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)

# Score the model

score = model.score(X, y)
print(f"R2 Score: {score}")

In [None]:
predictions = model.predict(X)
print(f"June Residuals")
# Plot Residuals
plt.scatter(predictions, predictions - y)
plt.hlines(y=0, xmin=predictions.min(), xmax=predictions.max())
plt.show()

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_) 

In [None]:
x_min = X.min()
x_max = X.max()
print(x_min)
print(x_max)

In [None]:
y_min_actual = y.min()
y_max_actual = y.max()
print(y_min_actual)
print(y_max_actual)

In [None]:
# Pull the value from the model coefficient array of arrays - assign to variable m_coef
m_coef = model.coef_[0]
print(m_coef)
# print(model.coef_)

In [None]:
y_min = model.intercept_ + m_coef * x_min
y_max = model.intercept_ + m_coef * x_max
print(f"Actual Min Value: {y_min_actual}")
print(f"Calculated Min Value: {y_min}")
print(f"Actual Max Value: {y_max_actual}")
print(f"Calculated Max Value: {y_max}")

In [None]:
y_min_predicted = model.predict([x_min])
y_max_predicted = model.predict([x_max])
print(f"Actual Min Value: {y_min}")
print(f"Predicted Min Value: {y_min_predicted}")
print(f"Actual Max Value: {y_max}")
print(f"Predicted Max Value: {y_max_predicted}")

In [None]:
print(f"June Regression Curve")
plt.scatter(X, y, c='blue')
plt.plot([x_min, x_max], [y_min, y_max], c='red')

## July Regression Curve

In [None]:
# July - X: ENSO Anomaly and y: Total Tornadoes
X = new_comb_df.loc[new_comb_df["mo"] == 7, ["Anomaly"]]

y = new_comb_df.loc[new_comb_df["mo"] == 7, ["Total Tornadoes"]]
print(X.shape, y.shape)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)

# Score the model

score = model.score(X, y)
print(f"R2 Score: {score}")

In [None]:
predictions = model.predict(X)
print(f"July Residuals")
# Plot Residuals
plt.scatter(predictions, predictions - y)
plt.hlines(y=0, xmin=predictions.min(), xmax=predictions.max())
plt.show()

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_) 

In [None]:
# Pull the value from the model coefficient array of arrays - assign to variable m_coef
m_coef = model.coef_[0]
print(m_coef)
# print(model.coef_)

In [None]:
x_min = X.min()
x_max = X.max()
print(x_min)
print(x_max)

In [None]:
y_min_actual = y.min()
y_max_actual = y.max()
print(y_min_actual)
print(y_max_actual)

In [None]:
y_min = model.intercept_ + m_coef * x_min
y_max = model.intercept_ + m_coef * x_max
print(f"Actual Min Value: {y_min_actual}")
print(f"Calculated Min Value: {y_min}")
print(f"Actual Max Value: {y_max_actual}")
print(f"Calculated Max Value: {y_max}")

In [None]:
y_min_predicted = model.predict([x_min])
y_max_predicted = model.predict([x_max])
print(f"Actual Min Value: {y_min}")
print(f"Predicted Min Value: {y_min_predicted}")
print(f"Actual Max Value: {y_max}")
print(f"Predicted Max Value: {y_max_predicted}")

In [None]:
print(f"July Regression Curve")
plt.scatter(X, y, c='blue')
plt.plot([x_min, x_max], [y_min, y_max], c='red')

## August Regression Curve

In [None]:
# August - X: ENSO Anomaly and y: Total Tornadoes
X = new_comb_df.loc[new_comb_df["mo"] == 8, ["Anomaly"]]

y = new_comb_df.loc[new_comb_df["mo"] == 8, ["Total Tornadoes"]]
print(X.shape, y.shape)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)

# Score the model

score = model.score(X, y)
print(f"R2 Score: {score}")

In [None]:
predictions = model.predict(X)
print(f"August Residuals")
# Plot Residuals
plt.scatter(predictions, predictions - y)
plt.hlines(y=0, xmin=predictions.min(), xmax=predictions.max())
plt.show()

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_) 

In [None]:
# Pull the value from the model coefficient array of arrays - assign to variable m_coef
m_coef = model.coef_[0]
# print(m_coef)

In [None]:
x_min = X.min()
x_max = X.max()
# print(x_min)
# print(x_max)

y_min_actual = y.min()
y_max_actual = y.max()
# print(y_min_actual)
# print(y_max_actual)

In [None]:
y_min = model.intercept_ + m_coef * x_min
y_max = model.intercept_ + m_coef * x_max
print(f"Actual Min Value: {y_min_actual}")
print(f"Calculated Min Value: {y_min}")
print(f"Actual Max Value: {y_max_actual}")
print(f"Calculated Max Value: {y_max}")

In [None]:
y_min_predicted = model.predict([x_min])
y_max_predicted = model.predict([x_max])
print(f"Actual Min Value: {y_min}")
print(f"Predicted Min Value: {y_min_predicted}")
print(f"Actual Max Value: {y_max}")
print(f"Predicted Max Value: {y_max_predicted}")

In [None]:
print(f"August Regression Curve")
plt.scatter(X, y, c='blue')
plt.plot([x_min, x_max], [y_min, y_max], c='red')

## September Regression Curve

In [None]:
# September - X: ENSO Anomaly and y: Total Tornadoes
X = new_comb_df.loc[new_comb_df["mo"] == 9, ["Anomaly"]]

y = new_comb_df.loc[new_comb_df["mo"] == 9, ["Total Tornadoes"]]
print(X.shape, y.shape)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)

# Score the model

score = model.score(X, y)
print(f"R2 Score: {score}")

In [None]:
predictions = model.predict(X)
print(f"September Residuals")
# Plot Residuals
plt.scatter(predictions, predictions - y)
plt.hlines(y=0, xmin=predictions.min(), xmax=predictions.max())
plt.show()

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_) 

In [None]:
# Pull the value from the model coefficient array of arrays - assign to variable m_coef
m_coef = model.coef_[0]
# print(m_coef)

In [None]:
x_min = X.min()
x_max = X.max()
# print(x_min)
# print(x_max)

y_min_actual = y.min()
y_max_actual = y.max()
# print(y_min_actual)
# print(y_max_actual)

In [None]:
y_min = model.intercept_ + m_coef * x_min
y_max = model.intercept_ + m_coef * x_max
# print(f"Actual Min Value: {y_min_actual}")
# print(f"Calculated Min Value: {y_min}")
# print(f"Actual Max Value: {y_max_actual}")
# print(f"Calculated Max Value: {y_max}")

In [None]:
y_min_predicted = model.predict([x_min])
y_max_predicted = model.predict([x_max])
# print(f"Actual Min Value: {y_min}")
# print(f"Predicted Min Value: {y_min_predicted}")
# print(f"Actual Max Value: {y_max}")
# print(f"Predicted Max Value: {y_max_predicted}")

In [None]:
print(f"September Regression Curve")
plt.scatter(X, y, c='blue')
plt.plot([x_min, x_max], [y_min, y_max], c='red')

## October Regression Curve

In [None]:
# October - X: ENSO Anomaly and y: Total Tornadoes
X = new_comb_df.loc[new_comb_df["mo"] == 10, ["Anomaly"]]

y = new_comb_df.loc[new_comb_df["mo"] == 10, ["Total Tornadoes"]]
print(X.shape, y.shape)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)

# Score the model

score = model.score(X, y)
print(f"R2 Score: {score}")

In [None]:
predictions = model.predict(X)
print(f"October Residuals")
# Plot Residuals
plt.scatter(predictions, predictions - y)
plt.hlines(y=0, xmin=predictions.min(), xmax=predictions.max())
plt.show()

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_) 

In [None]:
# Pull the value from the model coefficient array of arrays - assign to variable m_coef
m_coef = model.coef_[0]
# print(m_coef)

In [None]:
x_min = X.min()
x_max = X.max()
# print(x_min)
# print(x_max)

y_min_actual = y.min()
y_max_actual = y.max()
# print(y_min_actual)
# print(y_max_actual)

In [None]:
y_min = model.intercept_ + m_coef * x_min
y_max = model.intercept_ + m_coef * x_max
# print(f"Actual Min Value: {y_min_actual}")
# print(f"Calculated Min Value: {y_min}")
# print(f"Actual Max Value: {y_max_actual}")
# print(f"Calculated Max Value: {y_max}")

In [None]:
y_min_predicted = model.predict([x_min])
y_max_predicted = model.predict([x_max])
# print(f"Actual Min Value: {y_min}")
# print(f"Predicted Min Value: {y_min_predicted}")
# print(f"Actual Max Value: {y_max}")
# print(f"Predicted Max Value: {y_max_predicted}")

In [None]:
print(f"October Regression Curve")
plt.scatter(X, y, c='blue')
plt.plot([x_min, x_max], [y_min, y_max], c='red')

## November Regression Curve

In [None]:
# November - X: ENSO Anomaly and y: Total Tornadoes
X = new_comb_df.loc[new_comb_df["mo"] == 11, ["Anomaly"]]

y = new_comb_df.loc[new_comb_df["mo"] == 11, ["Total Tornadoes"]]
print(X.shape, y.shape)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)

# Score the model

score = model.score(X, y)
print(f"R2 Score: {score}")

In [None]:
predictions = model.predict(X)
print(f"November Residuals")
# Plot Residuals
plt.scatter(predictions, predictions - y)
plt.hlines(y=0, xmin=predictions.min(), xmax=predictions.max())
plt.show()

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_) 

In [None]:
# Pull the value from the model coefficient array of arrays - assign to variable m_coef
m_coef = model.coef_[0]
# print(m_coef)

In [None]:
x_min = X.min()
x_max = X.max()
# print(x_min)
# print(x_max)

y_min_actual = y.min()
y_max_actual = y.max()
# print(y_min_actual)
# print(y_max_actual)

In [None]:
y_min = model.intercept_ + m_coef * x_min
y_max = model.intercept_ + m_coef * x_max
# print(f"Actual Min Value: {y_min_actual}")
# print(f"Calculated Min Value: {y_min}")
# print(f"Actual Max Value: {y_max_actual}")
# print(f"Calculated Max Value: {y_max}")

In [None]:
y_min_predicted = model.predict([x_min])
y_max_predicted = model.predict([x_max])
# print(f"Actual Min Value: {y_min}")
# print(f"Predicted Min Value: {y_min_predicted}")
# print(f"Actual Max Value: {y_max}")
# print(f"Predicted Max Value: {y_max_predicted}")

In [None]:
print(f"November Regression Curve")
plt.scatter(X, y, c='blue')
plt.plot([x_min, x_max], [y_min, y_max], c='red')

## December Regression Curve

In [None]:
# December - X: ENSO Anomaly and y: Total Tornadoes
X = new_comb_df.loc[new_comb_df["mo"] == 12, ["Anomaly"]]

y = new_comb_df.loc[new_comb_df["mo"] == 12, ["Total Tornadoes"]]
print(X.shape, y.shape)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)

# Score the model

score = model.score(X, y)
print(f"R2 Score: {score}")

In [None]:
predictions = model.predict(X)
print(f"December Residuals")
# Plot Residuals
plt.scatter(predictions, predictions - y)
plt.hlines(y=0, xmin=predictions.min(), xmax=predictions.max())
plt.show()

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_) 

In [None]:
# Pull the value from the model coefficient array of arrays - assign to variable m_coef
m_coef = model.coef_[0]
# print(m_coef)

In [None]:
x_min = X.min()
x_max = X.max()
# print(x_min)
# print(x_max)

y_min_actual = y.min()
y_max_actual = y.max()
# print(y_min_actual)
# print(y_max_actual)

In [None]:
y_min = model.intercept_ + m_coef * x_min
y_max = model.intercept_ + m_coef * x_max
# print(f"Actual Min Value: {y_min_actual}")
# print(f"Calculated Min Value: {y_min}")
# print(f"Actual Max Value: {y_max_actual}")
# print(f"Calculated Max Value: {y_max}")

In [None]:
y_min_predicted = model.predict([x_min])
y_max_predicted = model.predict([x_max])
# print(f"Actual Min Value: {y_min}")
# print(f"Predicted Min Value: {y_min_predicted}")
# print(f"Actual Max Value: {y_max}")
# print(f"Predicted Max Value: {y_max_predicted}")

In [None]:
print(f"December Regression Curve")
plt.scatter(X, y, c='blue')
plt.plot([x_min, x_max], [y_min, y_max], c='red')

## DRAFT ONLY - *********... pete

In [None]:
engine = sqlalchemy.create_engine("postgresql://postgres:YOURPASSWORD@localhost/Tornadoes_USA")
conn = engine.connect()

In [None]:
table_name = 'tornadoes'
tornado_df.to_sql(table_name, conn, index=False, if_exists='replace')

In [None]:
engine.execute('ALTER TABLE tornadoes ALTER COLUMN "Date" TYPE Date USING "Date"::date')

## Create Mobile Home table 

In [None]:
mobile_homes = "Mobile_Home_Parks.csv"

In [None]:
#read data

mobile_data = pd.read_csv(mobile_homes, delimiter=",")

In [None]:
#Create DF
mobile_df = pd.DataFrame(mobile_data[["OBJECTID","NAME","ADDRESS","CITY","STATE","ZIP",
                                      "TYPE","STATUS","COUNTY","LATITUDE","LONGITUDE",
                                      "NAICS_DESC","VAL_DATE","UNITS","SIZE"]])


mobile_df.head()

In [None]:
table_name = 'mobile_homes'
mobile_df.to_sql(table_name, conn, index=False, if_exists='replace')

In [None]:
engine.execute('ALTER TABLE mobile_homes ALTER COLUMN "VAL_DATE" TYPE Date USING "VAL_DATE"::date')