In [None]:
import __init__
from data_processing.data_splitter import DataSplitter
from datapath_manager import ITWDataPathManager, DataPathManager
from collections import defaultdict
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import shap
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from evaluators import Evaluator
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_selection import SelectFromModel
%matplotlib inline
from trainers import MachineLearningModelTrainer, BranchNeuralNetworkTrainer
from tqdm import tqdm
from dataloader import EmbeddingDataLoader
import yaml

In [None]:
WINDOW_SHIFT = 0.25
WINDOW_SIZE = [60, 120]
DATASET_NAME = 'DCU_NVT_EXP2'
dl_model_name = 'branch_neural_network'
ml_model_name = 'extra_trees'
model_type = 'dependent'
test_size = 0.2
target_metrics = ['accuracy', 'balanced_accuracy', 'precision', 'recall', 'f1']

In [None]:
def train_model(window_size: int, signal_type: str, indices = None):
    
    data_ws = defaultdict(dict)
    models_ws = defaultdict(dict)
    eval_ws = defaultdict(dict)
    
    ds_splitter = DataSplitter(DATASET_NAME, model_type, test_size)
    for _ in tqdm(range(ds_splitter.num_subjects)):
        data = ds_splitter.next()
        X_train, y_train, X_test, y_test, target_user = data

        if indices is not None:
            X_train = X_train[:, indices]
            X_test = X_test[:, indices]

        # Log data
        data_ws[target_user]['X_train'] = X_train
        data_ws[target_user]['y_train'] = y_train
        data_ws[target_user]['X_test'] = X_test
        data_ws[target_user]['y_test'] = y_test

        train_embedding_dl = EmbeddingDataLoader(X_train, y_train)
        validate_embedding_dl = EmbeddingDataLoader(X_test, y_test)

        ds_path_manager = DataPathManager(DATASET_NAME)

        # Train ML model
        ml_model_path = ds_path_manager.get_saved_model_path(target_user, ml_model_name, model_type, window_size, WINDOW_SHIFT)
        ml_trainer = MachineLearningModelTrainer(
            ml_model_path,
            ml_model_name,
            target_metrics = target_metrics,
            random_state = 0
        )
        eval_results = ml_trainer.train(train_embedding_dl, validate_embedding_dl)
        models_ws[target_user][ml_model_name] = ml_trainer.model
        eval_ws[target_user][ml_model_name] = eval_results

        # Train DL model
        config_path = os.path.join(os.path.dirname(os.getcwd()), 'models', 'model_config', f'branchnn_sensor_combination_{signal_type}.yaml')
        config_dict = yaml.safe_load(open(config_path, 'r'))

        dl_model_path = ds_path_manager.get_saved_model_path(target_user, dl_model_name, model_type, window_size, WINDOW_SHIFT)
        saved_log_path = './logs.txt'
        dl_trainer = BranchNeuralNetworkTrainer(
            saved_log_path,
            dl_model_path,
            config_dict,
            target_metrics = target_metrics,
        )
        eval_results = dl_trainer.train(train_embedding_dl, validate_embedding_dl, num_epochs = 1000)

        models_ws[target_user][dl_model_name] = dl_trainer.model
        eval_ws[target_user][dl_model_name] = eval_results
    return data_ws, models_ws, eval_ws

# EVALUATION SCORES OF WINDOW SIZE OF 60 and 120

In [None]:
# data_60, models_60, eval_60 = train_model(WINDOW_SIZE[0])

In [None]:
# data_120, models_120, eval_120 = train_model(WINDOW_SIZE[1])

In [None]:
# target_users = data_60.keys()
# for user_id in target_users:
#     print(' ----------------------------------------------------- ')
#     print(f'User {user_id}')
#     print(eval_60[user_id][ml_model_name])
#     print(eval_120[user_id][ml_model_name])
#     print(eval_60[user_id][dl_model_name])
#     print(eval_120[user_id][dl_model_name])

# EVALUATION SCORE ON SIGNAL SENSOR

In [None]:
# _, models_bvp, eval_bvp = train_model(WINDOW_SIZE[0], 'bvp', indices = [i for i in range(30)])
# _, models_eda, eval_eda = train_model(WINDOW_SIZE[1], indices = [i for i in range(30, 66)])
# _, models_temp, eval_temp = train_model(WINDOW_SIZE[1], indices = [i for i in range(66, 72)])
_, models_bvp_eda, eval_bvp_eda = train_model(WINDOW_SIZE[0], 'bvp_eda', indices = [i for i in range(66)])
# _, models_eda_temp, eval_eda_temp = train_model(WINDOW_SIZE[1], indices = [i for i in range(30, 72)])
# _, models_bvp_temp, eval_bvp_temp = train_model(WINDOW_SIZE[1], indices = [i for i in range(72) if i not in range(30, 66)])

In [None]:
target_users = models_bvp_eda.keys()
for user_id in target_users:
    print(' ----------------------------------------------------- ')
    print(f'User {user_id}')
    print(f'---- bvp_eda ----')
    print(eval_bvp_eda[user_id][ml_model_name])
    print(eval_bvp_eda[user_id][dl_model_name])