In [None]:
# Import libraries

import pandas as pd
import numpy as np

import seaborn as sns #visualisation
import matplotlib.pyplot as plt #visualisation

%matplotlib inline 
sns.set(color_codes=True)

# create tables
from tabulate import tabulate

### Import Test data file

In [None]:
# read in the .csv file - of filtered student_info

data_df = pd.read_csv("test_data.csv")

In [None]:
data_df.info()

### Algorithm 5 - MPL

In [None]:
# machine learning libraries
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

# model layers
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
#from scikeras.wrappers import KerasClassifier, KerasRegressor

In [None]:
# Function to create and return the Keras model

def create_model(optimizer='adam', neurons_layer1=20, neurons_layer2=10):
    
    model = Sequential()
    
    # Hidden layer 1 with neuron count taken from the param_grid values, and relu as the activation function
    model.add(Dense(neurons_layer1, input_dim=X_train.shape[1], activation='relu'))
    
    # Hidden layer 2 with neuron count taken from the param_grid values, and relu as the activation function
    model.add(Dense(neurons_layer2, activation='relu'))
    
    # Final layer with 1 neuron, and sigmoid as the activation function
    # use 'softmax' for multiclass classification
    model.add(Dense(1, activation='sigmoid'))
    
    # compile the model
    # use 'categorical_crossentropy' for multiclass classification
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

In [None]:
# wrap the model using KerasClassifier 

model = KerasClassifier(build_fn=create_model, epochs=10, batch_size=32, verbose=0)
#model = scikeras(build_fn=create_model, epochs=10, batch_size=32, verbose=0)

### Hyperparm tuning - GridSearch CV

In [None]:
# Define hyperparameter grid
param_grid = {
    'optimizer': ['SGD', 'RMSprop', 'Adam'],
    'batch_size': [16, 32, 64],
    'neurons_layer1': [10, 20, 30],
    'neurons_layer2': [5, 10, 15],
    'epochs': [10, 20]
}

In [None]:
# divide the dataset into features (X) and the target variable (y)

# variable 1 = gender
X = data_df.drop(columns=['gender'])  # drop target variable from the dataframe
y = data_df['gender']        # id target variable 

# variable 2 = studied_credits
X2 = data_df.drop(columns=['studied_credits'])
y2 = data_df['studied_credits']

# variable 3 = tenure
X3 = data_df.drop(columns=['tenure'])
y3 = data_df['tenure']

In [None]:
# split the data into training and test sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X2, y2, test_size=0.2, random_state=0)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X3, y3, test_size=0.2, random_state=0)

### Fit and tune with model for variable 1 = gender

In [None]:
# Perform grid search using GridSearchCV

grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)

grid_result = grid.fit(X_train, y_train)

In [None]:
# Print the best hyperparameters

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

In [None]:
# Create the best model using the best hyperparameters
best_model = create_model(optimizer=grid_result.best_params_['optimizer'], 
                          neurons_layer1=grid_result.best_params_['neurons_layer1'],
                          neurons_layer2=grid_result.best_params_['neurons_layer2'])

In [None]:
# Fit the best model
best_model.fit(X_train, y_train, epochs=grid_result.best_params_['epochs'], 
               batch_size=grid_result.best_params_['batch_size'], verbose=1)

In [None]:
# Evaluate the model on test data
score = best_model.evaluate(X_test, y_test, verbose=0)

In [None]:
print(f'Test loss for gender: {score[0]}')
print(f'Test accuracy gender: {score[1]}')


### Fit and tune with model for variable 2 - studied_credits¶

In [None]:
# Perform grid search using GridSearchCV

grid2 = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)

grid_result2 = grid.fit(X_train, y_train)

In [None]:
# Print the best hyperparameters

print("Best: %f using %s" % (grid_result2.best_score_, grid_result2.best_params_))

In [None]:
# Create the best model using the best hyperparameters
best_model2 = create_model(optimizer=grid_result.best_params_['optimizer'], 
                          neurons_layer1=grid_result.best_params_['neurons_layer1'],
                          neurons_layer2=grid_result.best_params_['neurons_layer2'])

In [None]:
# Fit the best model
best_model2.fit(X_train2, y_train2, epochs=grid_result.best_params_['epochs'], 
               batch_size=grid_result.best_params_['batch_size'], verbose=1)

In [None]:
# Evaluate the model on test data
score2 = best_model2.evaluate(X_test2, y_test2, verbose=0)

In [None]:
print(f'Test loss for studied_credits: {score2[0]}')
print(f'Test accuracy studied_credits: {score2[1]}')


### Fit and tune with model for variable 3 - tenure

In [None]:
# Perform grid search using GridSearchCV

grid3 = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)

grid_result3 = grid3.fit(X_train3, y_train3)

In [None]:
# Print the best hyperparameters

print("Best: %f using %s" % (grid_result3.best_score_, grid_result3.best_params_))

In [None]:
# Create the best model using the best hyperparameters
best_model3 = create_model(optimizer=grid_result.best_params_['optimizer'], 
                          neurons_layer1=grid_result.best_params_['neurons_layer1'],
                          neurons_layer2=grid_result.best_params_['neurons_layer2'])

In [None]:
# Fit the best model
best_model3.fit(X_train3, y_train3, epochs=grid_result.best_params_['epochs'], 
               batch_size=grid_result.best_params_['batch_size'], verbose=1)

In [None]:
# Evaluate the model on test data
score3 = best_model3.evaluate(X_test, y_test, verbose=0)

In [None]:
print(f'Test loss for tenure: {score3[0]}')
print(f'Test accuracy tenure: {score3[1]}')

### Display the results of all 3 tests

In [None]:
algorithm_5_results = {'gender': [{score[1]}], 
        'studied_credits': [{score2[1]}], 
        'Tenure':[{score3[1]}]}

print(tabulate(algorithm_5_results, headers='keys', tablefmt='fancy_grid'))