<a href="https://colab.research.google.com/github/NicoleLund/flight_delay_prediction/blob/model_prep_210819/data_manipulation_modeling/investigate_models/d_svc_delay.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# d_svc_delay
----

Written in Google Colab

By Nicole Lund 

This workbook attempted to build a svc model for 2017 flight performance.  However, it could not complete before Google Colab usage limits were exceeded.

In [1]:
# Import Dependencies

# Plotting
%matplotlib inline
import matplotlib.pyplot as plt

# Data manipulation
import numpy as np
import pandas as pd
from statistics import mean
from operator import itemgetter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

# Parameter Selection
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

# Model Development
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC 
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

# Model Metrics
from sklearn.metrics import classification_report

# Save/load files
from tensorflow.keras.models import load_model
import joblib

# # Ignore deprecation warnings
# import warnings
# warnings.simplefilter('ignore', FutureWarning)

In [2]:
# Set the seed value for the notebook, so the results are reproducible
from numpy.random import seed
seed(1)

# Read in the csv model files

In [3]:
# Read the CSV files from AWS to Pandas Dataframe
X_train = pd.read_csv("https://flight-delay-prediction.s3.us-west-1.amazonaws.com/2017_TUS_X_train.csv")
X_test = pd.read_csv("https://flight-delay-prediction.s3.us-west-1.amazonaws.com/2017_TUS_X_test.csv")
y_train = pd.read_csv("https://flight-delay-prediction.s3.us-west-1.amazonaws.com/2017_TUS_y_train.csv")
y_test = pd.read_csv("https://flight-delay-prediction.s3.us-west-1.amazonaws.com/2017_TUS_y_test.csv")

In [4]:
X_train.head(3)

Unnamed: 0,OP_CARRIER_FL_NUM,CRS_DEP_TIME,CRS_ARR_TIME,DISTANCE,DL,OO,UA,WN,AA,EV,AS,Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,ATL,DEN,DFW,HOU,IAH,JFK,LAS,LAX,MDW,MSP,OAK,ORD,PDX,PHX,SAN,SEA,SFO,SJC,SLC
0,5538,1120,1647,1437,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5538,1219,1650,1437,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2068,1625,2040,1440,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
y_train.head(3)

Unnamed: 0,CANCELLED,DIVERTED,DELAY
0,0,0,0
1,0,0,0
2,0,0,0


## SVC Classifier Method

In [None]:
# Create model
model = SVC(C=1, gamma=0.1, kernel='linear')
model = model.fit(X_train, y_train.DELAY)

In [None]:
from google.colab import files
joblib.dump(model, 'd_SVC_delay_model.sav')
files.download('d_SVC_delay_model.sav')

# Hyperparameter Tuning

In [None]:
# Create the GridSearchCV model
param_grid = [{'C': [1, 5], 'gamma': [0.01, 0.1], 'kernel': ['linear']},
              {'C': [1, 5], 'gamma': [0.01, 0.1], 'kernel': ['rbf']}]
grid = GridSearchCV(model, param_grid, verbose=3)

In [None]:
# Fit the model using the grid search estimator. 
grid.fit(X_train, y_train.DELAY)

In [None]:
joblib.dump(grid, 'd_SVC_delay_grid.sav')
files.download('d_SVC_delay_grid.sav')

In [None]:
print(grid.best_params_)
print(grid.best_score_)

# Score Model

In [None]:
print('SVC Model Score:')
model.score(X_test, y_test.DELAY)

# Make **Predictions**

In [None]:
# Make predictions with the hypertuned model
grid_predictions = grid.predict(X_test)
print(classification_report(y_test.DELAY, grid_predictions,
                            target_names=y.unique()))

In [None]:
model_predictions = model.predict(X_test)
pd.DataFrame({"Prediction": model_predictions, "Actual": y_test.DELAY})

# This model is not suitable. Visual inspection of the predictions shows that it does not perform well despite having reasonable accuracy.