In [160]:
from model import *
import torch
import torch.nn as nn
import torch
import torch.optim as optim
from torch.autograd import Variable
import configparser
import os
import os.path as osp
import numpy as np
from sklearn.model_selection import StratifiedKFold, LeaveOneGroupOut
from tqdm import tqdm
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import balanced_accuracy_score
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier

In [161]:
def get_dataset_folder_path(dataset_name: str) -> str:
    # Read dataset path from config.ini file
    config_path = osp.join(os.getcwd(), '..', 'config.ini')
    parser = configparser.ConfigParser()
    parser.read(config_path)
    dataset_folder_path = None
    if dataset_name == 'AffectiveROAD':
        dataset_folder_path = parser['DATA_PATH']['affectiveROAD_dataset_path']
    elif dataset_name in ['WESAD_CHEST', 'WESAD_WRIST', 'RESAMPLED_WESAD_CHEST']:
        dataset_folder_path = parser['DATA_PATH']['wesad_dataset_path']
    elif dataset_name == 'DCU_NVT_EXP1':
        dataset_folder_path = parser['DATA_PATH']['dcu_nvt_dataset_path']
    return dataset_folder_path


def load_dataset(dataset_name: str):
    dataset = None
    ground_truth = None
    # Initialize dataset folder path
    dataset_folder_path = get_dataset_folder_path(dataset_name)
    # Initialize dataset file path
    dataset_file_path = osp.join(dataset_folder_path, f'{dataset_name}_stats_feats_1_60.npy')
    # Initialize ground-truth file path
    ground_truth_file_path = osp.join(dataset_folder_path, f'{dataset_name}_ground_truth_1_60.npy')
    # Initialize group file path
    group_file_path = osp.join(dataset_folder_path, f'{dataset_name}_groups_1_60.npy')

    # Load dataset, ground-truth, and groups
    dataset = np.load(dataset_file_path) # Load dataset
    ground_truth = np.load(ground_truth_file_path) # Load corresponding ground-truth
    groups = np.load(group_file_path) # Load corresponding user_id labels

    dataset[np.isnan(dataset)] = 0
    # Filtering preprocess if dataset name is AffectiveROAD
    if dataset_name == 'AffectiveROAD':
        indices = np.where(ground_truth >= 0)[0]
        dataset = dataset[indices]
        groups = groups[indices]
        ground_truth = ground_truth[indices]

    return dataset, ground_truth, groups  

In [162]:
def transform_data(X_train, X_test, scaler_name: str = 'StandardScaler'): # Transform the data using Standard Scaler
    scaled_X_train = X_train
    scaled_X_test = X_test
    BOUNDING = 25
    std_scaler = StandardScaler()
    std_scaler.fit(X_train[:, BOUNDING:])
    scaled_X_train[:, BOUNDING:] = std_scaler.transform(X_train[:, BOUNDING:])
    scaled_X_test[:, BOUNDING:] = std_scaler.transform(X_test[:, BOUNDING:])
    robust_scaler = RobustScaler()
    robust_scaler.fit(X_train[:, :BOUNDING])
    scaled_X_train[:, :BOUNDING] = robust_scaler.transform(X_train[:, :BOUNDING])
    scaled_X_test[:, :BOUNDING] = robust_scaler.transform(X_test[:, :BOUNDING])
    return scaled_X_train, scaled_X_test

In [163]:
root = "/"
BATCH_SIZE = 500
N_INP = 60
N_OUT = 60
N_GEN_EPOCHS = 10000
KERNEL_TYPE = "multiscale"

In [164]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device('cpu')
print(device)

cuda


In [165]:
# define the MMDEmbedding
# gmmd_net = MMDEmbedding(N_INP, N_OUT).to(device)
# try:
#   gmmd_net.load_state_dict(torch.load("gmmd.pth"))
#   print("Model parameters are loaded")
# except:
#   pass

In [166]:
# gmmd_optimizer = optim.RMSprop(gmmd_net.parameters(), lr=0.004)

In [167]:
# def train_one_step(x, samples):
#     samples = Variable(samples).to(device)
#     gen_samples = gmmd_net(samples)

#     loss = MMD(x, gen_samples, KERNEL_TYPE)
#     gmmd_optimizer.zero_grad()
#     loss.backward()
#     gmmd_optimizer.step()

#     return loss

In [168]:
dataset_name = 'WESAD_WRIST'

In [169]:
X, y, groups = load_dataset(dataset_name)

In [170]:
logo = LeaveOneGroupOut()
test_groups = []
balanced_accs = []
cv_balanced_acc_scores = []
torch.manual_seed(0)
np.random.seed(0)

# feature_importances = []

for train_index, test_index in tqdm(logo.split(X, y, groups)):
    print(f"Training ---- {groups[test_index][0]}")
    X_train, y_train, X_test, y_test = X[train_index], y[train_index], X[test_index], y[test_index] # Get train and test data
    # Validate if the test set and train set have two classes
    num_classes_test = len(np.unique(y_test))
    num_classes_train = len(np.unique(y_train))
    if num_classes_test < 2 or num_classes_train < 2: # If one of them does not have enough classes, then ignore it
        continue
    
    n_train_samples = X_train.shape[0]
    n_test_samples = X_test.shape[0]
    X_train, X_test = transform_data(X_train, X_test, scaler_name = 'StandardScaler') # Feature scaling if possible
    # X_train = torch.from_numpy(X_train).to(device)
    # X_test = torch.from_numpy(X_test)
    gmmd_net = MMDEmbedding(N_INP, N_OUT).to(device)
    gmmd_optimizer = optim.Adam(gmmd_net.parameters(), lr=0.004)
    iterations = 0
    for ep in range(N_GEN_EPOCHS):
        avg_loss = 0
        # resampling_limit = 300 # From paper
        train_indices = np.random.choice(n_train_samples, BATCH_SIZE, replace = False)
        test_indices = np.random.choice(n_test_samples, BATCH_SIZE, replace = False)
        xx = torch.from_numpy(X_train[train_indices, :]).to(device).float()
        yy = torch.from_numpy(X_test[test_indices, :]).to(device).float()
        iterations += 1
        gen_samples = gmmd_net(yy)

        loss = MMD(xx, gen_samples, KERNEL_TYPE)
        gmmd_optimizer.zero_grad()
        loss.backward()
        gmmd_optimizer.step()
        if ep % 1000 == 0: 
            print(f"GMMD Training: {ep}. epoch completed,  loss: {loss.item()}")
    # Infer
    # clf = RandomForestClassifier(n_estimators = 1000, random_state = 0, n_jobs = 8, max_features='sqrt',
                                # oob_score=True, bootstrap=True, class_weight = 'balanced')
    # clf = SVC(C = 10, random_state = 0, class_weight = 'balanced')
    clf = MLPClassifier(random_state = 0, max_iter = 1000, early_stopping=True)
    clf.fit(X_train, y_train)
    embeded_X_test = gmmd_net(torch.from_numpy(X_test).to(device).float()).cpu().detach().numpy()
    y_preds = clf.predict(embeded_X_test)

    # Evaluate balanced accuracy on the predicted results of test set
    acc = balanced_accuracy_score(y_test, y_preds)
    balanced_accs.append(acc)
    test_groups.append(groups[test_index][0])
    print(f'BA Score: {acc}')
results = { 'groups': test_groups, 'balanced_accuracy_score': balanced_accs }

0it [00:00, ?it/s]

Training ---- S10
GMMD Training: 0. epoch completed,  loss: 3.9540963172912598
GMMD Training: 1000. epoch completed,  loss: 0.7316492199897766
GMMD Training: 2000. epoch completed,  loss: 0.475624680519104
GMMD Training: 3000. epoch completed,  loss: 0.46680542826652527
GMMD Training: 4000. epoch completed,  loss: 0.3920816481113434


0it [00:22, ?it/s]


KeyboardInterrupt: 