In [1]:
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn

from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [2]:
dataset = sio.loadmat("sleep_data.mat")
X=dataset["X"]
Y=dataset["Y"]

In [3]:
Y=np.squeeze(Y)

In [None]:
colors = ['red','green','blue']
plt.scatter(X[:,0],X[:,1],X[:,2],c=Y, cmap=ListedColormap(colors))

## Preprocessing

In [4]:
# rescale
scaler = preprocessing.StandardScaler().fit(X)
X_scale = scaler.transform(X)

In [11]:
# strtified split for unbalanced data
X_train, X_test, y_train, y_test = train_test_split(X_scale, Y, test_size = 0.2, stratify = Y)

## Random Forest

In [12]:
rf_clf = RandomForestClassifier(oob_score=True, random_state = 10)
rf_clf.fit(X_train, y_train)

RandomForestClassifier(oob_score=True, random_state=10)

In [13]:
# cross validation
cross_val_score(rf_clf, X_train, y_train, cv=5)

array([0.9615784 , 0.95114345, 0.95218295, 0.95426195, 0.95218295])

In [14]:
y_pred = rf_clf.predict(X_test)

## Metrics

In [15]:
classification_report(y_test, y_pred, target_names = ('awake','nrem','rem'))

'              precision    recall  f1-score   support\n\n       awake       0.98      0.96      0.97       669\n        nrem       0.95      0.98      0.97       497\n         rem       0.85      0.89      0.87        37\n\n    accuracy                           0.96      1203\n   macro avg       0.93      0.94      0.93      1203\nweighted avg       0.96      0.96      0.96      1203\n'

In [16]:
confusion_matrix(y_test,y_pred, labels = [0,1,2])

array([[639,  25,   5],
       [  8, 488,   1],
       [  4,   0,  33]], dtype=int64)