In [2]:
import __init__
import numpy as np
import os
import pandas as pd
from datapath_manager import ITWDataPathManager, DataPathManager
from date_time_utils import get_date_time_from_float, convert_utc_to_local_time
from trainers import MachineLearningModelTrainer, BranchNeuralNetworkTrainer
import matplotlib.pyplot as plt
from dataloader import EmbeddingDataLoader
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, balanced_accuracy_score
import yaml, torch
from collections import Counter
from combine_features_itw import ITWFeatureCombiner
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import ExtraTreesClassifier

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
dataset_name = 'DCU_EXP2_ITW'
user_id = 'nvtu'
# user_id = 'tlduyen'
user_id = 'ntnhu'
# user_id = 'nmduy'
# date = '2022-09-02'
# date = '2022-09-05'
date = '2022-09-06'
date = '2022-09-08'

trained_dataset_name = 'DCU_NVT_EXP2'
dl_model_name = 'branch_neural_network'
ml_model_name = 'extra_trees'
model_type = 'dependent'
window_size = 60
window_shift = 0.25

In [4]:
dataset_path = ITWDataPathManager(dataset_name).get_dataset_path()
user_date_path = os.path.join(dataset_path, 'data', user_id, date, 'Lifelog')
stress_path = os.path.join(user_date_path, 'Stress')
relaxed_path = os.path.join(user_date_path, 'Relaxed')
low_stress_path = os.path.join(user_date_path, 'LowStress')

user_date_feature_path = os.path.join(dataset_path, 'features', user_id, date)
feature_path = os.path.join(user_date_feature_path, 'bvp_eda_temp.npy')
metadata_path = os.path.join(user_date_feature_path, 'metadata.csv')
labels_path = os.path.join(user_date_feature_path, 'stress_state.npy')

In [5]:
# Load features and metadata
features = np.load(feature_path)
metadata = pd.read_csv(metadata_path)
# y_test = np.load(labels_path)

In [6]:
def show_images(image_paths, rows, cols):
    fig, axes = plt.subplots(nrows = rows, ncols = cols, figsize = (20, 10))
    
    for i in range(rows):
        for j in range(cols):
            index = i * rows + j
            img = plt.imread(image_paths[index])
            # frame_index = os.path.basename(image_paths[index])
            # frame_index = '-'.join(image_paths[index].split('/')[-2:])
            frame_index = image_paths[index].split('/')[-2]
            axes[i, j].imshow(img)
            axes[i, j].set_title(frame_index)

In [7]:
def get_moments_indices(moments, metadata):
    moments = [convert_utc_to_local_time(moment).timestamp() for moment in moments]
    indices = metadata.loc[metadata['date_time'].isin(moments)].index.tolist()
    return indices

In [8]:
def get_features_from_moments(moments, features, metadata):
    indices = get_moments_indices(moments, metadata)
    return features[indices, :]

In [9]:
stress_images = sorted([os.path.join(stress_path, image_name) for image_name in os.listdir(stress_path)])
relaxed_images = sorted([os.path.join(relaxed_path, image_name) for image_name in os.listdir(relaxed_path)])
# low_stress_images = sorted([os.path.join(relaxed_path, image_name) for image_name in os.listdir(low_stress_path)])
# # Merge relaxed with low stress
# relaxed_images.extend(low_stress_images)

In [10]:
# show_images(stress_images[:12], 3, 4)
# X_test = features
# y_test = [0 for _ in range(X_test.shape[0] - 1)]
# y_test.append(1)
# y_test = np.array(y_test).astype(np.int64)
# test_dataloader = EmbeddingDataLoader(X_test, y_test)
features_index = [i for i in range(72) if i < 30 or i >= 66]
# features_index = [i for i in range(72)]

In [11]:
stress_moments = [os.path.basename(os.path.splitext(image_name)[0]) for image_name in stress_images]
relaxed_moments = [os.path.basename(os.path.splitext(image_name)[0]) for image_name in relaxed_images]

stress_features = get_features_from_moments(stress_moments, features, metadata)
relaxed_features = get_features_from_moments(relaxed_moments, features, metadata)
X_test, y_test = np.concatenate((stress_features, relaxed_features), axis=0)[:, features_index], np.concatenate((np.ones(len(stress_features)), np.zeros(len(relaxed_features))), axis=0)
y_test = y_test.astype(int)
test_dataloader = EmbeddingDataLoader(X_test, y_test)

In [12]:
def get_pretrained_model(model_name):
    ds_path_manager = DataPathManager(trained_dataset_name)
    user_model_saved_path = ds_path_manager.get_saved_model_path(user_id, model_name, model_type, window_size, window_shift)
    if model_name == 'branch_neural_network':
        config_path = os.path.join(os.path.dirname(os.getcwd()), 'models', 'model_config', 'branchnn_sensor_combination.yaml')
        config_dict = yaml.safe_load(open(config_path, 'r'))
        model = BranchNeuralNetworkTrainer('.', user_model_saved_path, config_dict, target_metrics=['accuracy', 'balanced_accuracy', 'precision', 'recall', 'f1'])
    else:
        model = MachineLearningModelTrainer(user_model_saved_path, model_name, eval_mode = True, target_metrics = ['accuracy', 'balanced_accuracy', 'precision', 'recall', 'f1'])
    return model

In [13]:
def evaluate(model_name, dataloader):
    y_test = dataloader.dataset.ground_truth
    model = get_pretrained_model(model_name)
    print(model.predict_and_evaluate(dataloader))
    # ConfusionMatrixDisplay.from_predictions(y_test, model.predict(dataloader), display_labels=['Relaxed', 'Stress'])

In [14]:
evaluate(dl_model_name, test_dataloader)

LOAD PRETRAINED MODEL
{'accuracy': 0.537117903930131, 'balanced_accuracy': 0.43909486510008705, 'precision': 0.12154696132596685, 'recall': 0.29333333333333333, 'f1': 0.17187499999999997}


In [15]:
evaluate(ml_model_name, test_dataloader)

LOAD PRETRAINED MODEL


FileNotFoundError: [Errno 2] No such file or directory: '/mnt/DATA/nvtu/PhD/stress_data/DCU_NVT_EXP2/models/60_0.25/dependent/extra_trees/ntnhu_extra_trees_dependent_60_0.25.joblib'

In [None]:
model = get_pretrained_model(dl_model_name)
y_pred = model.predict(test_dataloader)
Counter(y_pred)

In [None]:
stress_indices = get_moments_indices(stress_moments, metadata)
relaxed_indices = get_moments_indices(relaxed_moments, metadata)
info_stress = metadata.loc[stress_indices]
info_relaxed = metadata.loc[relaxed_indices]

In [None]:
TP = [i for i in range(len(y_pred)) if y_pred[i] == 1 and y_test[i] == 1]
FN = [i for i in range(len(y_pred)) if y_pred[i] == 0 and y_test[i] == 1]
FP = [i for i in range(len(y_pred)) if y_pred[i] == 1 and y_test[i] == 0]
print(TP)
print(FP)
print(FN)

In [None]:
info_stress = np.array(info_stress['date_time_str'].tolist())
info_relaxed = np.array(info_relaxed['date_time_str'].tolist())
info = np.concatenate((info_stress, info_relaxed), axis=0)

In [None]:
info[TP]

In [None]:
info[FN]

In [None]:
info[FP]

In [None]:
model = get_pretrained_model(dl_model_name)
test_dataloader = EmbeddingDataLoader(features, y_test)
model.predict_and_evaluate(test_dataloader)
ConfusionMatrixDisplay.from_predictions(y_test, y_pred, display_labels=['Relaxed', 'Stress'])

In [None]:
# m = ExtraTreesClassifier(
#     n_estimators = 500,
#     random_state = 0, 
#     n_jobs = -1, 
#     max_features = 'sqrt', 
#     max_depth = 8, 
#     min_samples_split = 2, 
#     min_samples_leaf = 8,
#     oob_score = True, 
#     bootstrap = True, 
#     class_weight = 'balanced'
# )
m = KNeighborsClassifier(n_neighbors=5, weights='distance', n_jobs=-1)
m.fit(features, y_test)
y_pred = m.predict(features)
balanced_accuracy_score(y_test, y_pred)

In [None]:
features = np.load(os.path.join(user_date_feature_path, 'X.npy'))
y_test = np.load(os.path.join(user_date_feature_path, 'y.npy'))
print(y_test.shape)

In [None]:
model = get_pretrained_model(dl_model_name)
y_pred = model.predict(test_dataloader)
Counter(y_pred)

In [None]:
moments = [convert_utc_to_local_time(moment).timestamp() for moment in stress_moments]
stressed = []
for i, moment in enumerate(moments):
    if len(metadata[metadata['date_time'] == moment]) > 0:
        stressed.append(stress_images[i])
indices = [i for i in range(len(y_pred)) if y_pred[i] == 1]
indices

In [None]:
show_images(stressed[:12], 3, 4)

In [None]:
num_points = len(y_pred)
images = np.array([*stress_images, *relaxed_images])
diff_indices = [i for i in range(num_points) if y_pred[i] != y_test[i]]
diff_images = images[diff_indices]
diff_images[:5]

In [None]:
_y = np.zeros(features.shape[0])
_test_dataloader = EmbeddingDataLoader(features, _y)
_y_pred = model.predict(_test_dataloader)
Counter(_y_pred)

In [None]:
# show_images(diff_images[:100], 10, 10)

# --------- SESSION TEST ---------------

In [None]:
session_id = '20220906_204400'
user_id = 'nmduy'
date = '2022-09-06'
model_name = 'logistic_regression'

In [None]:
itw_feat_combiner = ITWFeatureCombiner(dataset_name)
features = itw_feat_combiner.combine_session_features(user_id, date, session_id)

In [None]:
X_test = features
y_test = np.zeros(X_test.shape[0])
test_dataloader = EmbeddingDataLoader(X_test, y_test)

In [None]:
evaluate(ml_model_name, test_dataloader)

In [None]:
a = [
    1662649363.73,
1662649523.59,
1662652170.25,
1662652360.69
]
for x in a: 
    b = get_date_time_from_float(x)
    print(b)