Slightly altered version to allow for exporting of scalers, encoders, and models.

In [57]:

import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report

df = pd.read_csv('./nyc-flights.csv')
df = df.drop(['tailnum','year'], axis=1)
arr_delay = [0 if i<=0 else 1 for i in df['arr_delay']]
df['arr_delay'] = arr_delay

encoder_carrier = preprocessing.OrdinalEncoder()
encoder_origin = preprocessing.OrdinalEncoder()
encoder_dest = preprocessing.OrdinalEncoder()

encoder_carrier = encoder_carrier.fit(df[['carrier']])
encoder_origin = encoder_origin.fit(df[['origin']])
encoder_dest = encoder_dest.fit(df[['dest']])

with open('./encoder_carrier.pkl', 'w+b') as f:
    pickle.dump(encoder_carrier, f)
    
with open('./encoder_origin.pkl', 'w+b') as f:
    pickle.dump(encoder_origin, f)
    
with open('./encoder_dest.pkl', 'w+b') as f:
    pickle.dump(encoder_dest, f)

df[['carrier']] = encoder_carrier.transform(df[['carrier']])
df[['origin']] = encoder_origin.transform(df[['origin']])
df[['dest']] = encoder_dest.transform(df[['dest']])

print(encoder_carrier.transform([['VX']]))
print(encoder_origin.transform([['JFK']]))
print(encoder_dest.transform([['ABQ']]))

y = df['arr_delay']
X = df.drop(['arr_delay'],1)

sc_X = MinMaxScaler()
sc_X = sc_X.fit(X)
X_scaled = sc_X.transform(X)

sc_y = MinMaxScaler()
sc_y = sc_y.fit(np.asarray(y).reshape(-1, 1))
y_scaled_array = sc_y.transform(np.asarray(y).reshape(-1, 1))

print(y_scaled_array)

[[13.]]
[[1.]]
[[0.]]
[[0.]
 [1.]
 [1.]
 ...
 [0.]
 [1.]
 [0.]]


In [61]:
d_test = [1,2,3,4,5,13.0,6,1.0,0.0,7,8,9,10]
sc_X.transform([d_test])

array([[ 0.00000000e+00,  3.33333333e-02,  8.33680700e-04,
         1.89107413e-02,  1.66736140e-03,  8.66666667e-01,
         8.09061489e-04,  5.00000000e-01,  0.00000000e+00,
        -2.25903614e-02, -1.75905093e-02,  3.75000000e-01,
         1.69491525e-01]])

In [38]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
import pickle

y_scaled = np.ravel(y_scaled_array)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, random_state=1, test_size=0.3)

print(X_test[0])

clf = MLPClassifier(hidden_layer_sizes=(9,9),activation="logistic",solver='sgd', learning_rate='constant', learning_rate_init=0.3, max_iter=600, random_state=1)
clf.fit(X_train, y_train)

with open('./model.pkl', 'w+b') as f:
    pickle.dump(clf, f)
    
with open('./scaler.pkl', 'w+b') as f:
    pickle.dump(sc_X, f)

y_pred = clf.predict(X_test)


[0.27272727 0.23333333 0.43851605 0.01361573 0.55147978 0.73333333
 0.18624595 0.         0.47524752 0.44126506 0.43628554 0.41666667
 0.89830508]


In [39]:

from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error

print("Accuracy:")
print(accuracy_score(y_test, y_pred))
print("MSE:")
print(mean_squared_error(y_test, y_pred))


Accuracy:
0.8554118725180735
MSE:
0.14458812748192648


In [40]:

from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

         0.0       0.87      0.89      0.88      5807
         1.0       0.83      0.81      0.82      4014

    accuracy                           0.86      9821
   macro avg       0.85      0.85      0.85      9821
weighted avg       0.85      0.86      0.85      9821

