In [1]:
import __init__
from data_processing.data_splitter import DataSplitter
from data_processing.datapath_manager import DataPathManager
from data_processing.dataloader import DatasetLoader, EmbeddingDataLoader
from models.trainers import MachineLearningModelTrainer
from tqdm import tqdm
from models.evaluators import Evaluator
from collections import defaultdict
import numpy as np
import datetime
import mlflow
import lightgbm as lgb
mlflow.set_tracking_uri('http://localhost:5010')


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset_name = 'DCU_NVT_EXP2'
model_type = 'dependent'
strategy = 'lgb'
WINDOW_SIZE = 60
WINDOW_SHIFT = 0.25
DEFAULT_SIGNAL = 'eda'
SAMPLING_RATE = 4
random_state = 0
TEST_SIZE = 0.3
target_metrics = ['accuracy', 'balanced_accuracy']

In [3]:
# logger.log_field('model_type', model_type)

In [4]:
ds_path_manager = DataPathManager(dataset_name)
ds_splitter = DataSplitter(dataset_name, model_type, TEST_SIZE)
data = DatasetLoader(dataset_name).load_dataset_data()

# saved_log_path = ds_path_manager.get_log_path(strategy, model_type, WINDOW_SIZE, WINDOW_SHIFT)
# print(saved_log_path)

In [5]:
tasks_indices = defaultdict(dict)

index = 0
for user_id in tqdm(data[DEFAULT_SIGNAL].keys()):
    for task_id, signal_data in data[DEFAULT_SIGNAL][user_id].items():

        tasks_indices[user_id][task_id] = []

        len_signal = len(signal_data)
        step = int(WINDOW_SHIFT * SAMPLING_RATE)
        first_iter = int(WINDOW_SIZE * SAMPLING_RATE)

        for current_iter in range(first_iter, len_signal, step):
            previous_iter = current_iter - first_iter
            tasks_indices[user_id][task_id].append(index)
            index += 1

100%|██████████| 11/11 [00:00<00:00, 361.12it/s]


In [6]:
models = {}

In [7]:
ds_splitter.reset()

current_datetime = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S.%f')
# experiment_id = mlflow.create_experiment(name = dataset_name)
experiment_id = mlflow.get_experiment_by_name(dataset_name).experiment_id


for _ in tqdm(range(ds_splitter.num_subjects)):
    data = ds_splitter.next()
    X_train, y_train, X_test, y_test, target_user = data
    print(target_user)

    train_embedding_dl = EmbeddingDataLoader(X_train, y_train)
    validation_embedding_dl = EmbeddingDataLoader(X_test, y_test)
    
    saved_model_path = ds_path_manager.get_saved_model_path(target_user, strategy, model_type, WINDOW_SIZE, WINDOW_SHIFT)
    model = MachineLearningModelTrainer(strategy, target_metrics = target_metrics, random_state = random_state) 

    eval_results = model.train(train_embedding_dl, validation_embedding_dl)
    print(eval_results)
    
    # params = {
    #     'user_id': target_user,
    #     'model_type': model_type,
    #     'strategy': strategy,
    # }
    # tags = {
    #     'window_size': WINDOW_SIZE,
    #     'window_shift': WINDOW_SHIFT,
    #     'test_size': TEST_SIZE,
    # }

    # with mlflow.start_run(
    #     experiment_id=experiment_id,
    #     run_name = f'{target_user}',
    #     # tags = tags
    # ):
    #     # mlflow.autolog(log_models = True)
    #     mlflow.log_params(params)
    #     mlflow.log_metrics(eval_results)

    # models[target_user] = model


  0%|          | 0/11 [00:00<?, ?it/s]

cgurrin


  9%|▉         | 1/11 [00:00<00:09,  1.06it/s]

Train Evaluation Results: {'accuracy': 1.0, 'balanced_accuracy': 1.0}
->>> Validation Evaluation Results: {'accuracy': 0.8583953241232731, 'balanced_accuracy': 0.8596049855777524}
-----------------------------------------------------------------------------------------
{'accuracy': 0.8583953241232731, 'balanced_accuracy': 0.8596049855777524}
ltkhiem


 18%|█▊        | 2/11 [00:01<00:07,  1.27it/s]

Train Evaluation Results: {'accuracy': 1.0, 'balanced_accuracy': 1.0}
->>> Validation Evaluation Results: {'accuracy': 0.8209342560553633, 'balanced_accuracy': 0.7942415076779897}
-----------------------------------------------------------------------------------------
{'accuracy': 0.8209342560553633, 'balanced_accuracy': 0.7942415076779897}
lttnga


 27%|██▋       | 3/11 [00:02<00:05,  1.36it/s]

Train Evaluation Results: {'accuracy': 1.0, 'balanced_accuracy': 1.0}
->>> Validation Evaluation Results: {'accuracy': 0.7595738554563778, 'balanced_accuracy': 0.710581194422612}
-----------------------------------------------------------------------------------------
{'accuracy': 0.7595738554563778, 'balanced_accuracy': 0.710581194422612}
lzhou


 36%|███▋      | 4/11 [00:02<00:04,  1.60it/s]

Train Evaluation Results: {'accuracy': 1.0, 'balanced_accuracy': 1.0}
->>> Validation Evaluation Results: {'accuracy': 1.0, 'balanced_accuracy': 1.0}
-----------------------------------------------------------------------------------------
{'accuracy': 1.0, 'balanced_accuracy': 1.0}
nmduy


 45%|████▌     | 5/11 [00:03<00:03,  1.66it/s]

Train Evaluation Results: {'accuracy': 1.0, 'balanced_accuracy': 1.0}
->>> Validation Evaluation Results: {'accuracy': 0.8293525179856115, 'balanced_accuracy': 0.7926613386990193}
-----------------------------------------------------------------------------------------
{'accuracy': 0.8293525179856115, 'balanced_accuracy': 0.7926613386990193}
ntnhu


 55%|█████▍    | 6/11 [00:04<00:03,  1.41it/s]

Train Evaluation Results: {'accuracy': 1.0, 'balanced_accuracy': 1.0}
->>> Validation Evaluation Results: {'accuracy': 0.911976911976912, 'balanced_accuracy': 0.8848375636699849}
-----------------------------------------------------------------------------------------
{'accuracy': 0.911976911976912, 'balanced_accuracy': 0.8848375636699849}
nvtu


 64%|██████▎   | 7/11 [00:04<00:02,  1.47it/s]

Train Evaluation Results: {'accuracy': 1.0, 'balanced_accuracy': 1.0}
->>> Validation Evaluation Results: {'accuracy': 0.7732256203115984, 'balanced_accuracy': 0.7367077863944471}
-----------------------------------------------------------------------------------------
{'accuracy': 0.7732256203115984, 'balanced_accuracy': 0.7367077863944471}
pmnguyet


 73%|███████▎  | 8/11 [00:05<00:01,  1.66it/s]

Train Evaluation Results: {'accuracy': 1.0, 'balanced_accuracy': 1.0}
->>> Validation Evaluation Results: {'accuracy': 0.62784588441331, 'balanced_accuracy': 0.5975424169471641}
-----------------------------------------------------------------------------------------
{'accuracy': 0.62784588441331, 'balanced_accuracy': 0.5975424169471641}
qmboi


 82%|████████▏ | 9/11 [00:05<00:01,  1.64it/s]

Train Evaluation Results: {'accuracy': 1.0, 'balanced_accuracy': 1.0}
->>> Validation Evaluation Results: {'accuracy': 0.9408369408369408, 'balanced_accuracy': 0.9195304005321603}
-----------------------------------------------------------------------------------------
{'accuracy': 0.9408369408369408, 'balanced_accuracy': 0.9195304005321603}
tkvan


 91%|█████████ | 10/11 [00:06<00:00,  1.71it/s]

Train Evaluation Results: {'accuracy': 1.0, 'balanced_accuracy': 1.0}
->>> Validation Evaluation Results: {'accuracy': 0.8712557603686636, 'balanced_accuracy': 0.8305325239364327}
-----------------------------------------------------------------------------------------
{'accuracy': 0.8712557603686636, 'balanced_accuracy': 0.8305325239364327}
tlduyen


100%|██████████| 11/11 [00:07<00:00,  1.56it/s]

Train Evaluation Results: {'accuracy': 1.0, 'balanced_accuracy': 1.0}
->>> Validation Evaluation Results: {'accuracy': 0.7494239631336406, 'balanced_accuracy': 0.6996956168831169}
-----------------------------------------------------------------------------------------
{'accuracy': 0.7494239631336406, 'balanced_accuracy': 0.6996956168831169}





--------------------------------------------------------

In [None]:
dataset, ground_truth, groups, _ = DatasetLoader(dataset_name).load_data_for_training(window_shift = WINDOW_SHIFT, window_size = WINDOW_SIZE)

In [None]:
evaluator = Evaluator(target_metrics = ['accuracy'])

In [None]:
TEST_SIZE = 0.3

for user_id in tasks_indices.keys():
    print('-------------- {} --------------'.format(user_id))
    train_X, test_X = [], []
    train_Y, test_Y = [], []
    for task_id, indices in tasks_indices[user_id].items():
        # if user_id in ['nvtu', 'pmnguyet', 'cgurrin', 'tlduyen', 'lzhou', 'ltkhiem', 'qmboi', 'ntnhu']:
        #     if task_id not in ['Baseline', 'Reading1', 'stest_Hard']:
        #         continue
        # elif user_id in ['nmduy', 'lttnga']:
        #     if task_id not in ['Baseline', 'Reading2', 'stest_Hard']:
        #         continue
        # elif user_id in ['tkvan']:
        #     if task_id not in ['Baseline', 'Reading3', 'stest_Hard']:
        #         continue
         
        # if task_id in ['Baseline', 'Reading1', 'Reading2', 'Reading3', 'stest_Hard']:
        if 1 == 1:
            index = (1 - len(indices) * TEST_SIZE)
            train_indices = indices[:int(index)]
            test_indices = indices[int(index):]
            train_X.append(dataset[train_indices, :])
            test_X.append(dataset[test_indices, :])

            # train_Y.append(ground_truth[train_indices])
            # test_Y.append(ground_truth[test_indices])
            if task_id in ['stest_Easy', 'stest_Medium']:
                train_Y.append([0] * len(ground_truth[train_indices]))
                test_Y.append([0] * len(ground_truth[test_indices]))
            else:
                train_Y.append(ground_truth[train_indices])
                test_Y.append(ground_truth[test_indices])
            
        # elif task_id in ['Reading2', 'Reading3']:
        #     test_X.append(dataset[indices, :])
        #     test_Y.append(ground_truth[indices])
        # elif task_id in ['stest_Medium']:
        #     test_X.append(dataset[indices, :])
        #     test_Y.append([0] * len(indices))
        # elif task_id in ['Reading1']:
        #     # X, y = dataset[indices, :], ground_truth[indices]
        #     # print(task_id, min(indices), max(indices))
        #     # new_X.append(X)
        #     # new_y.append(y)
        #     train_X.append(dataset[indices, :])
        #     train_Y.append(ground_truth[indices])
        # elif task_id in ['stest_Hard']:
        #     test_X.append(dataset[indices, :])
        #     test_Y.append(ground_truth[indices])

    # new_X = np.concatenate(new_X, axis=0)
    # new_y = np.concatenate(new_y, axis=0)

    X_train = np.concatenate(train_X, axis=0)
    X_test = np.concatenate(test_X, axis=0)
    y_train = np.concatenate(train_Y, axis=0)
    y_test = np.concatenate(test_Y, axis=0)
 #
    train_embedding_dl = EmbeddingDataLoader(X_train, y_train)
    validate_embedding_dl = EmbeddingDataLoader(X_test, y_test)

    models[user_id].train(train_embedding_dl, validate_embedding_dl)