# Sleep-Wake and Sleep Quality Recognition Pipeline

This notebook contains the classification pipeline for creating *personalized* and *population models* similar to the paper as follows: 

[1] S. Gashi, L. Alecci, E. D. Lascio, M. E. Debus, F. Gasparini and S. Santini, "The Role of Model Personalization for Sleep Stage and Sleep Quality Recognition Using Wearables," in IEEE Pervasive Computing, doi: 10.1109/MPRV.2022.3164334.

If you use snippets of this script, please make sure to cite our paper [1], which is available at: https://ieeexplore.ieee.org/document/9768202 


### Import the libraries needed for running the script

In [8]:
import pandas as pd
import numpy as np
from sklearn.dummy import DummyClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.metrics import average_precision_score, confusion_matrix, classification_report, balanced_accuracy_score
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

### Classification pipeline

In [9]:
def classification_pipeline(final, label, features, model_type):
    seed = 42

    classificationReportDF = pd.DataFrame()
    cols = ['Session', 'Classifier', 'Accuracy', 'Precision', 'Recall', 'F1', 'AccuracyB']
    results = pd.DataFrame(columns = cols)

    for session in final['Participant'].unique():
        
        # Create the training and test set
        if model_type == 'Personalized':
            tmp = final[final['Participant'] == session]['SessionID'].max() - final[final['Participant'] == session]['SessionID'].mean()
            test_select = final[(final['Participant'] == session) & (final['SessionID'] > tmp)]
            train_select_1 = final[final['Participant'] != session]
            train_select_2 = final[(final['Participant'] == session) & (final['SessionID'] <= tmp)]
            train_select = pd.concat([train_select_1, train_select_2], axis=0)
        else:
            test_select = final[final['Participant'] == session]
            train_select = final[final['Participant'] != session]


        train_data = []
        train_labels = []

        # Prepare the train set 
        train_subselect = train_select[features]
        train_data = train_subselect.values
        train_labels = train_select[label].values

        # Prepare the test set 
        test_subselect = test_select[features]
        test_data = test_subselect.values
        test_labels = test_select[label].values

        ''' Normalize the features '''
        scaler = StandardScaler()
        train_data = scaler.fit_transform(train_data)
        test_data = scaler.transform(test_data)

        ''' Initialize other classifiers'''
        classifiers = []
        classifiers.append(('DummyS', DummyClassifier(strategy='stratified')))
        classifiers.append(('DummyC', DummyClassifier(strategy='constant', constant=train_select.SleepAwake_Label.value_counts().idxmax())))
        classifiers.append(('SVM', SVC(random_state=seed, class_weight='balanced')))
        classifiers.append(('DT', DecisionTreeClassifier(random_state=42)))
        classifiers.append(('kNN', KNeighborsClassifier()))
        classifiers.append(('RF', RandomForestClassifier()))
        classifiers.append(('MLP', MLPClassifier()))
        classifiers.append(('XGBoost', XGBClassifier()))


        #Train on the train set, predict the validation and test sets
        for (name,c) in classifiers:
            c = c.fit(train_data, train_labels)

            result = []
            result.append(session) 
            result.append(name)

            predictions_test = c.predict(test_data)
            result.append(metrics.accuracy_score(test_labels, predictions_test))
            result.append(metrics.precision_score(test_labels, predictions_test, average='weighted'))
            result.append(metrics.recall_score(test_labels, predictions_test, average='weighted'))
            result.append(metrics.f1_score(test_labels, predictions_test, average='weighted'))
            result.append(metrics.balanced_accuracy_score(test_labels, predictions_test))

            report = classification_report(test_labels, predictions_test, output_dict=True)
            report_df = pd.DataFrame(report).transpose()
            report_df['Classifier'] = name
            report_df["Participant"] = session

            report_df = pd.concat([report_df, classificationReportDF], axis=0)
            classificationReportDF = report_df

            print(classification_report(test_labels, predictions_test))
            print(confusion_matrix(test_labels, predictions_test))

            row = pd.Series(result,cols)
            results = results.append([row],ignore_index=True)

    return results

In [10]:
final = pd.read_csv(path_to_features_file)  #TODO: Define
results = classification_pipeline(final, 'SleepWake_Label', features_list, 'Personalized')  #TODO: features_list
results.groupby('Classifier').mean()