In [0]:
import numpy
import pandas as pd
from keras.models import Sequential
from keras.optimizers import Adam
from keras.layers import Dense,Dropout
from keras.layers import LSTM
from matplotlib import pyplot
from google.colab import files
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import MinMaxScaler
from keras.optimizers import Adam
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.utils import class_weight
from keras.layers import BatchNormalization
from sklearn.model_selection import train_test_split


uploaded = files.upload()
seed = 7
numpy.random.seed(seed)

In [0]:
from sklearn.utils import resample

raw_data = pd.read_csv('MTA_DELAY_DATA (1).csv')

df_majority = raw_data[raw_data['RESULT']==0].iloc[1:-2,0:3].dropna()
df_minority = raw_data[raw_data['RESULT']==1].iloc[1:-2,0:3].dropna()
print(raw_data['RESULT'].value_counts())


df_majority_downsampled = resample(df_majority, 
                                 replace=False,    # sample without replacement
                                 n_samples=3475,   # to match minority class
                                 random_state=123) # reproducible results
 
# Combine minority class with downsampled majority class
df_downsampled = pd.concat([df_majority_downsampled, df_minority])
 
# Display new class counts
print(df_downsampled['RESULT'].value_counts())
print(numpy.unique(df_downsampled['RESULT']))

X = df_downsampled.iloc[1:-2,0:2].dropna()
Y = df_downsampled.iloc[1:-2,2:3].dropna()

X, XTest, Y, YTest = train_test_split(X, Y, test_size = 0.3, random_state = 0)

print(YTest['RESULT'].value_counts()) #Just a double check to make sure the test data is balanced

In [0]:
def create_model(activation):
    model = Sequential()

    model.add(Dense(128,activation=activation,input_dim =2))
    model.add(BatchNormalization())

    model.add(Dense(64,activation=activation))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(32,activation=activation))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(16,activation=activation))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(1,activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.001),metrics=['accuracy'])  

    return model
model = create_model('softsign')
history = model.fit(X,Y,epochs=1500,batch_size=15, shuffle = True,validation_data = (XTest,YTest), verbose = 1)

In [0]:
def create_model(activation):
    model = Sequential()

    model.add(Dense(128,activation=activation,input_dim =2))
    model.add(BatchNormalization())

    model.add(Dense(64,activation=activation))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(32,activation=activation))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(16,activation=activation))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(1,activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.001),metrics=['accuracy'])  

    return model


# create model
model = KerasClassifier(build_fn=create_model,epochs=500,batch_size=15)

# define the grid search parameters
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid = dict(activation=activation)

grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X,Y,shuffle = True,validation_data = (XTest,YTest), verbose = 1)


In [0]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [0]:
model = Sequential()

model.add(Dense(8, kernel_initializer='uniform', activation='tanh',input_dim =2))
model.add(BatchNormalization())

model.add(Dense(16, kernel_initializer='uniform', activation='tanh'))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(32, kernel_initializer='uniform', activation='tanh'))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Dense(1,  kernel_initializer='uniform', activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.001),metrics=['accuracy'])  

history = model.fit(X,Y,epochs=500,batch_size=5, shuffle = True, validation_split = 0.0, verbose = 1)

In [0]:
from sklearn.metrics import roc_auc_score

predict = model.predict_classes(X)
print(numpy.unique(predict))

for index,val in enumerate(predict):
  print("Predicted: %s, actual: %s, for val %s" % (val[0],Y.iloc[index].values,X.iloc[index].values))

predict = [val[0] for val in predict]

print("ras score: ",roc_auc_score(Y,predict))


#predict = model.predict(numpy.array([0.0,0.12]).reshape(-1,2))
#print("Probability of delay: ",predict[0][0])

In [0]:
pyplot.plot(history.history['loss'])
pyplot.plot(history.history['val_loss'])
pyplot.title('model train vs validation loss')
pyplot.ylabel('loss')
pyplot.xlabel('epoch')
pyplot.legend(['train', 'validation'], loc='upper right')
pyplot.show()