# Machine Learning Models and Ensemble Method
---


In [6]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # disable GPU
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation,Dropout

SEED = 111 # constant seed for reproducibility
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)


## Train/Test Split:

In [7]:
df = pd.read_csv("UFC_TRAIN.csv")

# train/test split
X = df.drop(["Winner","B_fighter","R_fighter"], axis=1).values
y = df["Winner"].values
X_TRAIN, X_TEST, y_TRAIN, y_TEST = train_test_split(X,y, test_size=0.2, random_state=SEED)

# tackling imbalance issue
#theMin = UFC_FINAL["Winner"].value_counts().min()
#minority = UFC_FINAL[UFC_FINAL["Winner"]=="Blue"].iloc[0:theMin]
#undersampleMaj = UFC_FINAL[UFC_FINAL["Winner"]=="Red"].iloc[0:theMin]
#UFC_FINAL = pd.concat([minority, undersampleMaj], axis=0)
#UFC_FINAL = UFC_FINAL.sample(frac=1).reset_index(drop=True)
#UFC_FINAL["Winner"].value_counts()

## ML Models
### 1- DNN:

In [None]:
# scaling
scaler = MinMaxScaler()
scaler.fit(X_TRAIN)
X_train = scaler.transform(X_TRAIN)
X_test = scaler.transform(X_TEST)
y_train = y_TRAIN
y_test = y_TEST
print(f"X_train shape: {X_train.shape} | X_test shape: {X_test.shape} | y_train shape: {y_train.shape} | y_test shape: {y_test.shape}")

# model
model = Sequential()

# input layer
model.add(Dense(units=42, activation='relu'))
model.add(Dropout(0.5)) # deactivates 50% of nodes

# h1
model.add(Dense(units=42, activation='relu'))
model.add(Dropout(0.5))
'''
# h2
model.add(Dense(units=42, activation='relu'))
model.add(Dropout(0.5))

# h3
model.add(Dense(units=20, activation='relu'))
model.add(Dropout(0.5))
'''
# output layer
model.add(Dense(units=1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

from tensorflow.keras.callbacks import EarlyStopping # prevent diverge of loss & val_loss
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=32)

##### without PCA Performance

In [None]:
model.fit(x=X_train, 
          y=y_train, 
          epochs=100,
          validation_data=(X_test, y_test), verbose=1,
          callbacks=[early_stop]
          )
model_loss = pd.DataFrame(model.history.history)
model_loss.plot()

### DNN on PCA-Performed Dataset:

In [None]:
UFC_PCA = pd.read_csv("UFC_PCA.csv")

UFC_PCA["Winner"] = UFC_PCA["Winner"].replace("Blue",1)
UFC_PCA["Winner"] = UFC_PCA["Winner"].replace("Red",0)

# train/test split
X_pca = UFC_PCA.iloc[:,1:].values
y_pca = UFC_PCA["Winner"].values
X_train_pca, X_test_pca, y_train_pca, y_test_pca = train_test_split(X_pca, y_pca, test_size=0.3, random_state=101)

# scaling
scaler_pca = MinMaxScaler()
scaler_pca.fit(X_train_pca)
X_train_pca = scaler_pca.transform(X_train_pca)
X_test_pca = scaler_pca.transform(X_test_pca)

print(f"X_train shape: {X_train_pca.shape} | X_test shape: {X_test_pca.shape} | y_train shape: {y_train_pca.shape} | y_test shape: {y_test_pca.shape}")

# model
model = Sequential()

# input layer
model.add(Dense(units=42, activation='relu'))
model.add(Dropout(0.5)) # deactivates 50% of nodes

# h1
model.add(Dense(units=42, activation='relu'))
model.add(Dropout(0.5))

# h2
model.add(Dense(units=42, activation='relu'))
model.add(Dropout(0.5))

# h3
model.add(Dense(units=20, activation='relu'))
model.add(Dropout(0.5))

# output layer
model.add(Dense(units=1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

from tensorflow.keras.callbacks import EarlyStopping # prevent diverge of loss & val_loss
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=16)

##### PCA Performance:

In [None]:
model.fit(x=X_train_pca, 
          y=y_train_pca, 
          epochs=10,
          validation_data=(X_test_pca, y_test_pca), verbose=1,
          callbacks=[early_stop]
          )

model_loss = pd.DataFrame(model.history.history)
model_loss.plot()