In [1]:
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn

from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [2]:
dataset = sio.loadmat("sleep_data.mat")
X=dataset["X"]
Y=dataset["Y"]

In [3]:
Y=np.squeeze(Y)

In [None]:
colors = ['red','green','blue']
plt.scatter(X[:,0],X[:,1],X[:,2],c=Y, cmap=ListedColormap(colors))

## Preprocessing

In [4]:
scaler = preprocessing.StandardScaler().fit(X)
X_scale = scaler.transform(X)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X_scale, Y, test_size = 0.2)

## Random Forest

In [6]:
rf_clf = RandomForestClassifier(oob_score=True, random_state = 10)
rf_clf.fit(X_train, y_train)

RandomForestClassifier(oob_score=True, random_state=10)

In [7]:
cross_val_score(rf_clf, X_train, y_train, cv=5)

array([0.95119418, 0.95945946, 0.95530146, 0.96049896, 0.95322245])

In [8]:
y_pred = rf_clf.predict(X_test)

## Metrics

In [9]:
classification_report(y_test, y_pred, target_names = ('awake','nrem','rem'))

'              precision    recall  f1-score   support\n\n       awake       0.97      0.94      0.96       685\n        nrem       0.94      0.97      0.95       482\n         rem       0.82      0.89      0.85        36\n\n    accuracy                           0.95      1203\n   macro avg       0.91      0.93      0.92      1203\nweighted avg       0.95      0.95      0.95      1203\n'

In [10]:
confusion_matrix(y_test,y_pred, labels = [0,1,2])

array([[647,  31,   7],
       [ 14, 468,   0],
       [  3,   1,  32]], dtype=int64)