In [1]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split, StratifiedKFold, RepeatedStratifiedKFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import balanced_accuracy_score
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [4]:
# Read data
df = pd.read_csv('./CALTECH_LEFT.csv', index_col=0)

In [5]:
# Split features and labels
X = df.drop('labels', axis=1)
y = df['labels']

In [6]:
# Split data into training and testing
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2)

In [7]:
# Normalize data
sc = StandardScaler()
Xtrain = sc.fit_transform(Xtrain)
Xtest = sc.transform(Xtest)
ytrain = ytrain.values
ytest = ytest.values

In [8]:
# Perform repeated stratified K-fold to ensure the consistence of the results
repeated_fold = RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=1421)
acc_list = []
max_acc = 0
best_obj = None
for train_index, test_index in repeated_fold.split(Xtrain, ytrain):
    X_train, X_test = Xtrain[train_index,:], Xtrain[test_index,:]
    y_train, y_test = ytrain[train_index], ytrain[test_index]
    nn = MLPClassifier(beta_1=0.1, beta_2=0.001, hidden_layer_sizes=(150, 100, 50),
              learning_rate='adaptive', max_iter=1000000)
    nn.fit(X_train, y_train)
    y_hat = nn.predict(X_test)
    acc = balanced_accuracy_score(y_test, y_hat)
    if acc > max_acc:
        max_acc = acc
        best_obj = nn
    acc_list.append(acc)

In [9]:
np.mean(acc_list)

0.8033333333333332