In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib widget

In [2]:
import sys
import os

sys.path.append('../tools')
import h5py
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch
from tqdm import tqdm
import sklearn
import random

import torchvision.transforms as T
import pytorch_lightning as pl
import pytorch_lightning.loggers as pl_loggers
import pytorch_lightning.callbacks as pl_callbacks
from models.rns_dataloader import get_data, get_data_by_episode

import data_utility
import annotation_utility
import interactive_plot

import warnings

warnings.filterwarnings("ignore", ".*Consider increasing the value of the `num_workers` argument*")
warnings.filterwarnings("ignore", ".*Set a lower value for log_every_n_steps if you want to see logs for the training epoch*")
warnings.filterwarnings("ignore", ".*exists and is not empty*")
warnings.filterwarnings("ignore", ".*Checkpoint directory {dirpath} exists and is not empty*")


In [3]:
data_dir = "../../../user_data/"
log_folder_root = '../../../user_data/logs/'
ckpt_folder_root = '../../../user_data/checkpoints/'

random_seed = 42
random.seed(random_seed)
torch.manual_seed(random_seed)
np.random.seed(random_seed)

if torch.cuda.is_available():
    torch.cuda.manual_seed(random_seed)
    # True ensures the algorithm selected by CUFA is deterministic
    torch.backends.cudnn.deterministic = True
    # torch.set_deterministic(True)
    # False ensures CUDA select the same algorithm each time the application is run
    torch.backends.cudnn.benchmark = False

import pytorch_lightning

pytorch_lightning.utilities.seed.seed_everything(seed=random_seed, workers=True)

Global seed set to 42


42

In [4]:
from models.rns_dataloader import RNS_Downstream
from models.SwaV import SwaV

In [5]:
import torch
import torchvision
from torch import nn

from lightly.data import LightlyDataset, SwaVCollateFunction
from lightly.loss import SwaVLoss
from lightly.loss.memory_bank import MemoryBankModule
from lightly.models.modules import SwaVProjectionHead, SwaVPrototypes


In [6]:
def collate_fn(batch):
    info = list(zip(*batch))
    data = info[0]
    label = info[1]
    return torch.stack(data), torch.stack(label)

In [7]:
data_list = os.listdir(data_dir+'rns_test_cache')[1:]

# data_list = ['HUP182.npy',   'HUP129.npy',   'HUP109.npy', 'HUP156.npy', 'HUP096.npy', 'RNS026.npy',  'HUP159.npy']
# data_list = ['RNS026.npy', 'HUP159.npy', 'HUP129.npy', 'HUP096.npy', 'HUP182.npy']
train_data, train_label, test_data, test_label, train_index, test_index = get_data(data_list, split=0.8)
# data, label,_,_ = get_data(data_list, split=1)
# train_data, test_data, train_label, test_label = sklearn.model_selection.train_test_split(data, label, test_size=0.8, random_state=42)

print(train_data.shape)
print(train_label.shape)
print(train_index.shape)
print(test_data.shape)
print(test_label.shape)
print(test_index.shape)

100%|██████████| 16/16 [00:16<00:00,  1.04s/it]

(87319, 249, 36)
(87319,)
(87319,)
(21837, 249, 36)
(21837,)
(21837,)





In [8]:
import torch.nn.functional as F
from torch import nn
import torch
import sklearn
from sigmoid_loss import sigmoid_focal_loss

class LinearHead(pl.LightningModule):
    def __init__(self, backbone,):
        super().__init__()
        self.backbone = backbone
        self.fc1 = nn.Linear(2048, 2)
        self.softmax = nn.Softmax(dim=1)
        self.alpha = 0
        self.gamma = 5

    def training_step(self, batch, batch_idx):
        x, y = batch
        # self.set_requires_grad(self.backbone, False)
        x = self.backbone(x)
        x = x.view(-1, 2048)
        pred = self.fc1(x)
        pred = self.softmax(pred)
        label = F.one_hot(y, num_classes=2).squeeze()
        loss = sigmoid_focal_loss(pred.float(), label.float(), alpha=self.alpha, gamma=self.gamma, reduction='mean')
        # Logging to TensorBoard (if installed) by default
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        x = self.backbone(x)
        x = x.view(-1, 2048)
        pred = self.fc1(x)
        pred = self.softmax(pred)
        label = F.one_hot(y, num_classes=2).squeeze()
        loss = sigmoid_focal_loss(pred.float(), label.float(), alpha=self.alpha, gamma=self.gamma, reduction='mean')
        out = torch.argmax(pred, dim=1)
        # print(out.size)
        out = out.detach().cpu().numpy()
        target = y.squeeze().detach().cpu().numpy()
        fscore = sklearn.metrics.f1_score(out, target,labels = [0,1],zero_division=0)
        acc = sklearn.metrics.accuracy_score(out, target)
        # print(acc)
        # print(precision)
        # print(recall)
        # print(fscore)
        # Logging to TensorBoard (if installed) by default
        self.log("val_loss", loss,prog_bar=False)
        self.log("val_acc", acc,prog_bar=False)
        self.log("val_fscore", fscore,prog_bar=False)
        return pred, label

    def predict_step(self, batch, batch_idx):
        # print(batch)
        x, y = batch
        emb = self.backbone(x)
        emb = emb.view(-1, 2048)
        pred = self.fc1(emb)
        # Logging to TensorBoard (if installed) by default
        return pred, y, emb

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

    def set_requires_grad(self, model, requires_grad=True, exclude = None):
        """
        Used in training adversarial approach
        :param model:
        :param requires_grad:
        :return:
        """
        for param in model.parameters():
            param.requires_grad = requires_grad

        if exclude is not None:
            for name, child in model.named_children():
                if name in exclude:
                    for param in child.parameters():
                        param.requires_grad =not requires_grad

In [9]:
linear_eval = 'supervised'

# # ckpt = torch.load("rns_ckpt/checkpoint31.pth")
# resnet = torchvision.models.resnet50(weights="IMAGENET1K_V1")
# backbone = nn.Sequential(*list(resnet.children())[:-1])
# swav = SwaV(backbone)
# # swav.load_state_dict(ckpt['model_state_dict'])
# model = SupervisedDownstream(backbone, 1)

swav = SwaV()
model = LinearHead(swav.backbone)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
ckpt_save_n_step = 500

checkpoint_callback = pl_callbacks.ModelCheckpoint(monitor='train_loss',
                                                   filename= linear_eval + '-{step}-{train_loss:.5f}',
                                                   dirpath=ckpt_folder_root + 'rns_linear_eval/' + linear_eval + '/',
                                                   save_top_k=-1,
                                                   every_n_train_steps=ckpt_save_n_step,
                                                   save_on_train_epoch_end=False)

early_stop_callback = pl_callbacks.EarlyStopping(monitor="val_fscore",
                                                 patience=10,
                                                 verbose=False,
                                                 mode="max")

csv_logger = pl_loggers.CSVLogger(ckpt_folder_root + 'rns_linear_eval/' + linear_eval + '/',
                                  name='logger')

trainer = pl.Trainer(logger=csv_logger,
                     max_epochs=100,
                     callbacks=[checkpoint_callback, early_stop_callback],
                     # callbacks=[checkpoint_callback],
                     accelerator='gpu',
                     devices=1,
                     log_every_n_steps=50,
                     precision=16,
                     check_val_every_n_epoch=None,
                     val_check_interval=ckpt_save_n_step,
                     enable_model_summary=False,
                     )

Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [10]:
from models.rns_dataloader import RNS_Downstream
train_dataset = RNS_Downstream(train_data, train_label, transform=True, astensor=True)
test_dataset = RNS_Downstream(test_data, test_label, transform=False, astensor=True)

train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=64,
    collate_fn=collate_fn,
    shuffle=True,
    drop_last=True,
)

val_dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=256,
    collate_fn=collate_fn,
    shuffle=False,
    drop_last=True,
)

# trainer.fit(model, train_dataloader, val_dataloader)

data loaded
(87319, 249, 36)
(87319,)
data loaded
(21837, 249, 36)
(21837,)


In [11]:
test_dataset = RNS_Downstream(test_data, test_label, transform=False, astensor=True)
val_dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=256,
    collate_fn=collate_fn,
    shuffle=False,
    drop_last=True,
)

data loaded
(21837, 249, 36)
(21837,)


In [11]:
predictions = trainer.predict(model,val_dataloader,ckpt_path=ckpt_folder_root+ 'rns_linear_eval/supervised/supervised-step=12500-train_loss=0.01456.ckpt')

Restoring states from the checkpoint path at ../../../user_data/checkpoints/rns_linear_eval/supervised/supervised-step=12500-train_loss=0.01456.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at ../../../user_data/checkpoints/rns_linear_eval/supervised/supervised-step=12500-train_loss=0.01456.ckpt


Predicting: 0it [00:00, ?it/s]

In [12]:
output_list = []
target_list = []
emb_list = []
m = nn.Softmax(dim=1)
for pred, y, emb in predictions:
    output_list.append(pred)
    target_list.append(y)
    emb_list.append(emb)

In [13]:
pred_raw = torch.vstack(output_list)
target = torch.vstack(target_list)
emb = torch.vstack(emb_list)
out = torch.argmax(pred_raw, dim=1)

In [14]:
sklearn.metrics.accuracy_score(torch.argmax(pred_raw, dim=1), target)

0.8933461556074552

In [17]:
clf_report = sklearn.metrics.classification_report(torch.argmax(pred_raw, dim=1), target, digits=6)

print(f"Classification Report : \n{clf_report}")

Classification Report : 
              precision    recall  f1-score   support

           0   0.912119  0.929826  0.920887     14578
           1   0.853354  0.820085  0.836389      7259

    accuracy                       0.893346     21837
   macro avg   0.882737  0.874956  0.878638     21837
weighted avg   0.892585  0.893346  0.892799     21837



In [18]:
fpr, tpr, thresholds = sklearn.metrics.roc_curve(target, m(pred_raw.float())[:,1], pos_label=1)
sklearn.metrics.auc(fpr, tpr)

0.9474834729965571

In [19]:
split_ind = np.insert(np.where(np.diff(test_index['episode_index'])!=0)[0],0,-1)
split_ind = np.insert(split_ind, split_ind.size, len(test_index))

In [20]:
len(test_index)

21837

In [21]:
split_ind[-1]

21837

In [34]:
test_label_split = []
pred_label_split = []
for i in range(len(split_ind)-1):
    start_ind = split_ind[i]
    end_ind = split_ind[i+1]
    test_label_split.append(target[start_ind+1:end_ind+1])
    pred_label_split.append(out[start_ind+1:end_ind+1])


In [35]:
pred_label_split[1]

tensor([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

In [36]:
test_label_split[1]

tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],

In [37]:
sklearn.metrics.accuracy_score([np.sign(tl.sum()) for tl in test_label_split], [np.sign(tl.sum()) for tl in pred_label_split])

0.8473282442748091

In [38]:
test_label_split[1]

tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],

In [39]:
clf_report = sklearn.metrics.classification_report([np.sign(tl.sum()) for tl in test_label_split], [np.sign(tl.sum()) for tl in pred_label_split], digits=4)

print(f"Classification Report : \n{clf_report}")

Classification Report : 
              precision    recall  f1-score   support

           0     1.0000    0.6610    0.7959       118
           1     0.7826    1.0000    0.8780       144

    accuracy                         0.8473       262
   macro avg     0.8913    0.8305    0.8370       262
weighted avg     0.8805    0.8473    0.8411       262



In [None]:
test_index