# d_svc_delay_dec_hrs
----

Written in Google Colab

By Nicole Lund 

This workbook attempted to build a svc model for 2017 flight performance using decimal hours for input.  However, it could not complete before Google Colab usage limits were exceeded.

In [4]:
# Import Dependencies

# Plotting
%matplotlib inline
import matplotlib.pyplot as plt

# Data manipulation
import numpy as np
import pandas as pd
from statistics import mean
from operator import itemgetter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

# Parameter Selection
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

# Model Development
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC 
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

# Model Metrics
from sklearn.metrics import classification_report

# Save/load files
from tensorflow.keras.models import load_model
import joblib

# # Ignore deprecation warnings
# import warnings
# warnings.simplefilter('ignore', FutureWarning)

In [5]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [6]:
# Set the seed value for the notebook, so the results are reproducible
from numpy.random import seed
seed(1)

# Read in the csv model files

In [7]:
# Read the CSV files from AWS to Pandas Dataframe
X_train = pd.read_csv("https://raw.githubusercontent.com/NicoleLund/flight_delay_prediction/model_refine_210821/data_manipulation_modeling/feature_assessment/2017_TUS_X_train_dec_hrs.csv")
X_test = pd.read_csv("https://raw.githubusercontent.com/NicoleLund/flight_delay_prediction/model_refine_210821/data_manipulation_modeling/feature_assessment/2017_TUS_X_test_dec_hrs.csv")
y_train = pd.read_csv("https://raw.githubusercontent.com/NicoleLund/flight_delay_prediction/model_refine_210821/data_manipulation_modeling/feature_assessment/2017_TUS_y_train_dec_hrs.csv")
y_test = pd.read_csv("https://raw.githubusercontent.com/NicoleLund/flight_delay_prediction/model_refine_210821/data_manipulation_modeling/feature_assessment/2017_TUS_y_test_dec_hrs.csv")

In [8]:
X_train.head(3)

Unnamed: 0,OP_CARRIER_FL_NUM,CRS_DEP_hours,CRS_ARR_hours,DISTANCE,DL,OO,UA,WN,AA,EV,AS,Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,ATL,DEN,DFW,HOU,IAH,JFK,LAS,LAX,MDW,MSP,OAK,ORD,PDX,PHX,SAN,SEA,SFO,SJC,SLC
0,5538,11.333333,16.783333,1437,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5538,12.316667,16.833333,1437,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2068,16.416667,20.666667,1440,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
y_train.head(3)

Unnamed: 0,CANCELLED,DIVERTED,DELAY
0,0,0,0
1,0,0,0
2,0,0,0


## SVC Classifier Method

In [None]:
# Create model
with tf.device('/device:GPU:0'):
  model = SVC(C=1, gamma=0.1, kernel='linear')
  model = model.fit(X_train, y_train.DELAY)

In [None]:
from google.colab import files
joblib.dump(model, 'd_SVC_delay_dec_hrs_model.sav')
files.download('d_SVC_delay_dec_hrs_model.sav')

# Hyperparameter Tuning

In [None]:
# Create the GridSearchCV model
param_grid = [{'C': [1, 5], 'gamma': [0.01, 0.1], 'kernel': ['linear']},
              {'C': [1, 5], 'gamma': [0.01, 0.1], 'kernel': ['rbf']}]
grid = GridSearchCV(model, param_grid, verbose=3)

In [None]:
# Fit the model using the grid search estimator. 
grid.fit(X_train, y_train.DELAY)

In [None]:
joblib.dump(grid, 'd_SVC_delay_dec_hrs_grid.sav')
files.download('d_SVC_delay_dec_hrs_grid.sav')

In [None]:
print(grid.best_params_)
print(grid.best_score_)

# Score Model

In [None]:
print('SVC Model Score:')
model.score(X_test, y_test.DELAY)

# Make **Predictions**

In [None]:
# Make predictions with the hypertuned model
grid_predictions = grid.predict(X_test)
print(classification_report(y_test.DELAY, grid_predictions,
                            target_names=y.unique()))

In [None]:
model_predictions = model.predict(X_test)
pd.DataFrame({"Prediction": model_predictions, "Actual": y_test.DELAY})