In [1]:
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn

from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [2]:
dataset = sio.loadmat("sleep_data.mat")
X=dataset["X"]
Y=dataset["Y"]

In [3]:
Y=np.squeeze(Y)

In [None]:
colors = ['red','green','blue']
plt.scatter(X[:,0],X[:,1],X[:,2],c=Y, cmap=ListedColormap(colors))

## Preprocessing

In [None]:
# balance the data
index0 = np.random.permutation(sample0)
Y0_index = np.where(Y==0)[0][index0[:sample2]]
index1 = np.random.permutation(sample1)
Y1_index = np.where(Y==1)[0][index1[:sample2]]
Y2_index = np.where(Y==2)[0]
X_balanced=X[np.concatenate((Y0_index,Y1_index,Y2_index))]
Y_balanced=Y[np.concatenate((Y0_index,Y1_index,Y2_index))]
X = X_balanced
Y = Y_balanced

In [4]:
scaler = preprocessing.StandardScaler().fit(X)
X_scale = scaler.transform(X)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X_scale, Y, test_size = 0.2)

## Random Forest

## Using Bayesian Hyperparameter optimization to tune the hyperparameters

In [None]:
# minimize the objective over the space
from hyperopt import hp
from hyperopt import fmin, tpe, space_eval, Trials

def objective(params):
    rf_clf = RandomForestClassifier(**params, oob_score=True, random_state = 10)
    rf_clf.fit(X_train, y_train)
    return -np.mean(cross_val_score(rf_clf, X_train, y_train, cv=5)) # cross validation

space = {"n_estimators":hp.choice('n_estimators',[100, 200, 300, 400,500,600]),
         "max_depth": hp.quniform("max_depth", 1, 15,1),
         "criterion": hp.choice("criterion", ["gini", "entropy"]),
        }

trials = Trials()
best = fmin(objective, space, algo=tpe.suggest, max_evals=100, trials = trials)

print(best)
print(space_eval(space, best))
## {'criterion': 'entropy', 'max_depth': 8.0, 'n_estimators': 300}

## Optimal hyperparameters

In [6]:
-objective(space_eval(space, best))

RandomForestClassifier(oob_score=True, random_state=10)

In [7]:
cross_val_score(rf_clf, X_train, y_train, cv=5)

array([0.95119418, 0.95945946, 0.95530146, 0.96049896, 0.95322245])

In [8]:
y_pred = rf_clf.predict(X_test)

## Metrics

In [None]:
classification_report(y_test, y_pred, target_names = ('awake','nrem','rem'))

In [None]:
confusion_matrix(y_test,y_pred, labels = [0,1,2])