In [1]:
# Importing Dependencies
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np

In [2]:
# Reading CSV and making DataFrame
f = pd.read_csv("../data/new_model.csv")
model_df = pd.DataFrame(f)

In [3]:
# Grabbing the Columns Needed
duration_df = model_df[["WEATHER_DELAY", "DurationinMinutes"]]
duration_df.head()

Unnamed: 0,WEATHER_DELAY,DurationinMinutes
0,0.0,32
1,0.0,32
2,0.0,32
3,0.0,32
4,0.0,32


In [4]:
# Dropping NaN
duration_df.dropna(inplace=True)

In [5]:
# Renaming Column
duration_df.rename(columns={"DurationinMinutes": "LENGTH_OF_THUNDERSTORM"},inplace=True)

In [6]:
# Creating Binary Data
duration_df["NEW_WEATHER"] = duration_df["WEATHER_DELAY"].apply(lambda x: 1 if x > 0 else 0)
duration_df["NEW_THUNDERSTORM"] = duration_df["LENGTH_OF_THUNDERSTORM"].apply(lambda x: 0 if x == 0 else 1)

In [8]:
# Assign X (data) and y (target)
X = duration_df.drop("LENGTH_OF_THUNDERSTORM", axis=1)
y = duration_df['NEW_WEATHER']

print(X.shape, y.shape)

(615, 3) (615,)


In [9]:
# Splitting into Train/Test Data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [10]:
# Using Logistical Regression
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()

In [11]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

LogisticRegression(multi_class='ovr', n_jobs=1, solver='liblinear')

In [12]:
classifier.fit(X_train, y_train)

LogisticRegression()

In [13]:
# Printing Scores
print(f"Training Data Score: {classifier.score(X_train, y_train)}")
print(f"Testing Data Score: {classifier.score(X_test, y_test)}")

Training Data Score: 0.9978308026030369
Testing Data Score: 1.0


In [14]:
# Making Predictions
predictions = classifier.predict(X_test)
print(f"First 10 Predictions:   {predictions[:10]}")
print(f"First 10 Actual labels: {y_test[:10].tolist()}")

First 10 Predictions:   [1 1 0 1 1 1 1 0 1 1]
First 10 Actual labels: [1, 1, 0, 1, 1, 1, 1, 0, 1, 1]


In [15]:
# Showing Predictions in DataFrame
pd.DataFrame({"Prediction": predictions, "Actual": y_test}).reset_index(drop=True) 

Unnamed: 0,Prediction,Actual
0,1,1
1,1,1
2,0,0
3,1,1
4,1,1
...,...,...
149,0,0
150,0,0
151,0,0
152,1,1
