In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras import models
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from keras import backend as K

In [5]:
df_ready = pd.read_csv('df_ready.csv')

In [6]:
X = df_ready.drop(['is_canceled'], axis=1).to_numpy()
y = df_ready['is_canceled'].to_numpy()
scaler = preprocessing.StandardScaler()
X_standardized = scaler.fit_transform(X)

print("Mean Before Standardized:", round(X[:,0].mean()))
print("Standard deviation Before Standardized:", X[:,0].std())
print("\nMean After Standardized:", round(X_standardized[:,0].mean()))
print("Standard deviation After Standardized:", X_standardized[:,0].std())

Mean Before Standardized: 104.0
Standard deviation Before Standardized: 106.86264950916215

Mean After Standardized: 0.0
Standard deviation After Standardized: 1.0


In [10]:
#Build Model
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

def create_model():
    network = models.Sequential()
    network.add(Dense(units=128, activation="elu", input_shape=(X.shape[1],)))
    network.add(BatchNormalization())
    network.add(Dropout(rate = 0.1))
    network.add(Dense(units=128, activation="elu", kernel_regularizer = l2(0.2), bias_regularizer = l2(0.2)))
    network.add(BatchNormalization())
    network.add(Dense(units=128, activation="elu", kernel_regularizer = l2(0.2), bias_regularizer = l2(0.2)))
    network.add(BatchNormalization())
    network.add(Dense(units=1, activation="sigmoid"))
    
    opt = Adam(lr=0.001, decay = 1e-5, beta_1=0.9, beta_2=0.999, amsgrad=False)
    network.compile(loss="binary_crossentropy",
                    optimizer=opt, 
                    metrics=["accuracy", f1_m])
    return network

network = create_model()


In [11]:
#Model Evaluation

'''
cv = StratifiedKFold(n_splits=5, random_state=None, shuffle=True)
network_score = []
for train_idx, test_idx in cv.split(X_standardized,y):
    network.fit(X_standardized[train_idx], y[train_idx], 
                epochs=10,
                verbose=1,
                batch_size=128)
    y_pred = network.predict(X_standardized[test_idx])
    network_score.append(accuracy_score(y[test_idx], y_pred.round()))
    print(network_score[-1])

network_score = np.array(network_score)
print("\nAccuracy mean:", network_score.mean())
print("Accuracy std:",network_score.std())
'''


   

'\ncv = StratifiedKFold(n_splits=5, random_state=None, shuffle=True)\nnetwork_score = []\nfor train_idx, test_idx in cv.split(X_standardized,y):\n    network.fit(X_standardized[train_idx], y[train_idx], \n                epochs=10,\n                verbose=1,\n                batch_size=128)\n    y_pred = network.predict(X_standardized[test_idx])\n    network_score.append(accuracy_score(y[test_idx], y_pred.round()))\n    print(network_score[-1])\n\nnetwork_score = np.array(network_score)\nprint("\nAccuracy mean:", network_score.mean())\nprint("Accuracy std:",network_score.std())\n'

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, test_size = 0.1)
scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
network.fit(x = X_train, y = y_train, epochs = 20, batch_size = 32, validation_split = 0.1, shuffle = True)

Train on 96705 samples, validate on 10746 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7ff454d02310>

In [17]:
y_pred = network.predict(X_test).ravel().round()
print("Model Performance :")
print(classification_report(y_test, y_pred))

Model Performance :
              precision    recall  f1-score   support

           0       0.88      0.91      0.89      7464
           1       0.84      0.79      0.81      4475

    accuracy                           0.86     11939
   macro avg       0.86      0.85      0.85     11939
weighted avg       0.86      0.86      0.86     11939

