In [1]:
import sys
import os
sys.path.append("../")

import argparse
import json
import shutil

import pandas as pd
from apto.utils.misc import boolean_flag
from apto.utils.report import get_classification_report
import numpy as np
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import DataLoader, TensorDataset
from tqdm.auto import tqdm

from animus import EarlyStoppingCallback, IExperiment
from animus.torch.callbacks import TorchCheckpointerCallback
import wandb

from src.settings import LOGS_ROOT, UTCNOW
from src.ts_data import load_dataset

# Load dataset

In [2]:
features, labels = load_dataset('fbirn')
features = np.swapaxes(features, 1, 2)  # [n_features; time_len; n_channels;]


data_shape = features.shape  # [n_features; time_len; n_channels;]
n_classes = np.unique(labels).shape[0]

print("data shape: ", data_shape)
# train-val/test split
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
skf.get_n_splits(features, labels)

train_index, test_index = list(skf.split(features, labels))[0]

X_train, X_test = features[train_index], features[test_index]
y_train, y_test = labels[train_index], labels[test_index]

# train/val split
X_train, X_val, y_train, y_val = train_test_split(
    X_train,
    y_train,
    test_size=data_shape[0] // 5,
    random_state=42 + 0,
    stratify=y_train,
)

_train_ds = TensorDataset(
    torch.tensor(X_train, dtype=torch.float32),
    torch.tensor(y_train, dtype=torch.int64),
)
_valid_ds = TensorDataset(
    torch.tensor(X_val, dtype=torch.float32),
    torch.tensor(y_val, dtype=torch.int64),
)
_test_ds = TensorDataset(
    torch.tensor(X_test, dtype=torch.float32),
    torch.tensor(y_test, dtype=torch.int64),
)

data shape:  (311, 140, 53)


In [3]:
datasets = {
    "train": DataLoader(
        _train_ds,
        batch_size=64,
        num_workers=0,
        shuffle=True,
    ),
    "valid": DataLoader(
        _valid_ds,
        batch_size=64,
        num_workers=0,
        shuffle=False,
    ),
    "test": DataLoader(
        _test_ds,
        batch_size=64,
        num_workers=0,
        shuffle=False,
    ),
}

# Load model

In [4]:
from src.ts_model import MLP

best_config_path = "/Users/pavelpopov/mlp_project/assets/logs/230110.022223-experiment-mlp-fbirn/k_0/0000/config.json"
with open(best_config_path, "r") as fp:
    model_config = json.load(fp)

_model = MLP(model_config)

logpath = "/Users/pavelpopov/mlp_project/assets/logs/230110.022223-experiment-mlp-fbirn/k_0/0000/_model.best.pth"
checkpoint = torch.load(logpath, map_location=lambda storage, loc: storage)
_model.load_state_dict(checkpoint)

print(model_config["link"])

https://wandb.ai/pavalipopov/230110.002536-tune-mlp-fbirn/runs/cxwzwof3


# Run model

## Untuned test

In [122]:
all_scores, all_targets = [], []
total_loss = 0.0
_model.train(False)



with torch.set_grad_enabled(False):
    for _, data in enumerate(datasets["test"]):
        data, target = data

        logits = _model(data)
        score = torch.softmax(logits, dim=-1)

        all_scores.append(score.cpu().detach().numpy())
        all_targets.append(target.cpu().detach().numpy())

y_test_test = np.hstack(all_targets)
y_score_test = np.vstack(all_scores)
y_pred_test = np.argmax(y_score_test, axis=-1).astype(np.int32)

In [123]:
from sklearn.metrics import accuracy_score

print("Untuned acc")
accuracy_score(y_test_test, y_pred_test)

Untuned acc


0.7301587301587301

## Tuned test

In [124]:
all_scores, all_targets = [], []
total_loss = 0.0
_model.train(False)



with torch.set_grad_enabled(False):
    for _, data in enumerate(datasets["valid"]):
        data, target = data

        logits = _model(data)
        score = torch.softmax(logits, dim=-1)

        all_scores.append(score.cpu().detach().numpy())
        all_targets.append(target.cpu().detach().numpy())

y_test = np.hstack(all_targets)
y_score = np.vstack(all_scores)
y_pred = np.argmax(y_score, axis=-1).astype(np.int32)

from sklearn.metrics import accuracy_score

print("Untuned val acc")
accuracy_score(y_test, y_pred)

Untuned val acc


0.6451612903225806

In [125]:
print(y_test.shape)
print(y_score.shape)

(62,)
(62, 2)


In [126]:
def thr_gen(depth, sum, constr_threshold):
    if depth == 1:
        yield constr_threshold + [sum]
        return
    for thr in np.arange(0.0, sum+0.0001, 0.001):
        rest_sum = sum - thr
        if rest_sum >= 0.0:
            new_constr_threshold = constr_threshold + [thr]
            yield from thr_gen(depth-1, rest_sum, new_constr_threshold)

In [132]:
n_classes = 2 

best_acc = 0.0
best_threshold = [np.array(([0]*n_classes))]

for thr in thr_gen(n_classes, 1.0, []):
    thr = np.array(thr)

    new_y_score = y_score - thr

    y_pred = np.argmax(new_y_score, axis=-1).astype(np.int32)
    acc = accuracy_score(y_test, y_pred)
    if acc > best_acc:
        best_acc = acc
        best_threshold = [thr]
    elif acc == best_acc:
        best_threshold += [thr]

best_threshold = np.mean(np.stack(best_threshold), axis=0)
print(best_threshold.shape)
print("Tuned val acc")
print(best_acc)
print(best_threshold)

(2,)
Tuned val acc
0.6935483870967742
[0.26768421 0.73231579]


In [133]:
new_y_score_test = y_score_test - best_threshold
new_y_pred_test = np.argmax(new_y_score_test, axis=-1).astype(np.int32)
accuracy_score(y_test_test, new_y_pred_test)

0.6666666666666666

In [129]:
n_classes = 2 

best_threshold = np.array(([0.0]*n_classes))

for tuned_class in range(n_classes):
    best_acc = 0.0
    threshold = best_threshold.copy()
    for thr in np.arange(0.0, 1.0, 0.001):
        threshold[tuned_class] = thr

        new_y_score = y_score - threshold

        # print(new_y_score[0])
        y_pred = np.argmax(new_y_score, axis=-1).astype(np.int32)
        acc = accuracy_score(y_test, y_pred)
        if acc > best_acc:
            best_acc = acc
            best_threshold[tuned_class] = thr

new_y_score = y_score - best_threshold
y_pred = np.argmax(new_y_score, axis=-1).astype(np.int32)
best_acc = accuracy_score(y_test, y_pred)

print("Tuned val acc")
print(best_acc)
print(best_threshold)

Tuned val acc
0.6935483870967742
[0.184 0.629]


In [130]:
new_y_score_test = y_score_test - best_threshold
new_y_pred_test = np.argmax(new_y_score_test, axis=-1).astype(np.int32)
accuracy_score(y_test_test, new_y_pred_test)

0.6984126984126984