## Modeling

In [13]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn import metrics
from scipy.stats import uniform, loguniform

import warnings
warnings.filterwarnings("ignore")

In [15]:
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [98]:
df = pd.read_csv('../model_ready_2015.csv')

In [17]:
X = pd.get_dummies(df.drop(columns='Fin'), drop_first=True)
y = to_categorical(df['Fin'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=7)

In [18]:
X.shape

(478034, 716)

In [20]:
ss = StandardScaler()
Xs_train = ss.fit_transform(X_train)
Xs_test = ss.transform(X_test)

In [None]:
np.random.seed(7)
best_params_model = Sequential()
best_params_model.add(Dense(716, input_shape=(716,), activation='relu'))
best_params_model.add(Dense(64, activation='relu'))
best_params_model.add(Dense(200, activation='relu'))
best_params_model.add(Dense(4, activation='softmax'))

best_params_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = best_params_model.fit(Xs_train, y_train, validation_data=(Xs_test, y_test), epochs=10, batch_size=512, verbose=1)

preds = best_params_model.predict(Xs_test)

### Binary Classification

In [99]:
X = pd.get_dummies(df.drop(columns='Fin'), drop_first=True)
y = df['Fin'].apply(lambda x: 0 if x > 1 else x)

In [100]:
#deal with imbalanced classes
from imblearn.under_sampling import RandomUnderSampler
rus = RandomUnderSampler(random_state=7, replacement=True)# fit predictor and target variable
X, y = rus.fit_resample(X, y)
print('original dataset shape:', len(y))
print('Resample dataset shape', len(y_rus))

original dataset shape: 120826
Resample dataset shape 120826


In [101]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=7)

In [102]:
pd.DataFrame(y).value_counts(normalize=True)

Fin
0      0.5
1      0.5
dtype: float64

In [103]:
X.shape

(120826, 716)

In [104]:
ss = StandardScaler()
Xs_train = ss.fit_transform(X_train)
Xs_test = ss.transform(X_test)

In [105]:
np.random.seed(7)
nn_model = Sequential()
nn_model.add(Dense(716, input_shape=(716,), activation='relu'))
nn_model.add(Dense(64, activation='relu'))
nn_model.add(Dense(200, activation='relu'))
nn_model.add(Dense(1, activation='sigmoid'))

nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = nn_model.fit(Xs_train, y_train, validation_data=(Xs_test, y_test), epochs=10, batch_size=512, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [87]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import plot_confusion_matrix

In [106]:
preds = np.argmax(nn_model.predict(Xs_test), axis=1)

In [107]:
accuracy = accuracy_score(y_test, preds)
accuracy

0.5

In [108]:
f1 = f1_score(y_test, preds)
f1

0.0

In [109]:
matrix = confusion_matrix(y_test, preds)
matrix

array([[18124,     0],
       [18124,     0]], dtype=int64)

In [110]:
tn, fp, fn, tp = confusion_matrix(y_test, preds).ravel()
(tn, fp, fn, tp)

(18124, 0, 18124, 0)