In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os

sys.path.append('../tools')
import h5py
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch
from tqdm import tqdm
import sklearn
import random

import torchvision.transforms as T
import pytorch_lightning as pl
import pytorch_lightning.loggers as pl_loggers
import pytorch_lightning.callbacks as pl_callbacks
from models.rns_dataloader import get_data

import data_utility
import annotation_utility
import interactive_plot

import warnings

warnings.filterwarnings("ignore", ".*Consider increasing the value of the `num_workers` argument*")
warnings.filterwarnings("ignore", ".*Set a lower value for log_every_n_steps if you want to see logs for the training epoch*")
warnings.filterwarnings("ignore", ".*exists and is not empty*")
warnings.filterwarnings("ignore", ".*Checkpoint directory {dirpath} exists and is not empty*")


In [3]:
data_dir = "../../../user_data/"
log_folder_root = '../../../user_data/logs/'
ckpt_folder_root = '../../../user_data/checkpoints/'

random_seed = 42
random.seed(random_seed)
torch.manual_seed(random_seed)
np.random.seed(random_seed)

if torch.cuda.is_available():
    torch.cuda.manual_seed(random_seed)
    # True ensures the algorithm selected by CUFA is deterministic
    torch.backends.cudnn.deterministic = True
    # torch.set_deterministic(True)
    # False ensures CUDA select the same algorithm each time the application is run
    torch.backends.cudnn.benchmark = False

In [4]:
raw_annotations = pd.read_csv(data_dir + 'full_updated_anns_annotTbl_cleaned.csv')
ids = list(np.unique(raw_annotations[raw_annotations['descriptions'].notnull()]['HUP_ID']))
# ids = list(np.unique(raw_annotations['HUP_ID']))
ids

['HUP047',
 'HUP084',
 'HUP096',
 'HUP109',
 'HUP121',
 'HUP129',
 'HUP131',
 'HUP137',
 'HUP147',
 'HUP153',
 'HUP156',
 'HUP159',
 'HUP182',
 'HUP197',
 'HUP199',
 'HUP205',
 'RNS026',
 'RNS029']

In [1]:
# ids = list(np.unique(raw_annotations['HUP_ID']))
def kadanes_algorithm(arr):
    # Initialize max_current and max_global to the first element of the array
    max_current = max_global = arr[0]

    # Iterate through the array starting from the second element
    for i in range(1, len(arr)):
        # Update max_current to be the maximum of the current element or the sum of max_current and the current element
        max_current = max(arr[i], max_current + arr[i])

        # If max_current is greater than max_global, update max_global
        if max_current > max_global:
            max_global = max_current

    # Return the maximum sum found
    return max_global

# Example array
arr = [-2, 1, -3, 4, -1, 2, 1, -5, 4]
arr = []
# Find the maximum sum subarray
print(kadanes_algorithm(arr))


6


In [5]:
data_import = data_utility.read_files(path=data_dir+'rns_data', patientIDs=ids,
                                      verbose=True)  # Import data with annotation

100%|██████████| 18/18 [00:45<00:00,  2.55s/it]


In [6]:
annotations = annotation_utility.read_annotation(annotation_path=data_dir +'full_updated_anns_annotTbl_cleaned.csv',
                                                 data=data_import, n_class=3)

  temp_df['Episode_Index'] = episode_ind.astype(int)
  temp_df['Episode_Index'] = episode_ind.astype(int)


In [8]:
import numpy.lib.recfunctions as rfn
np.random.seed(seed=42)
annot = annotations.annotations
annot_nonseizure = annot[annot['Class_Code'] == 0]
annot_seizure = annot[annot['Class_Code'] == 1]
patient_list = list(np.unique(annot['Patient_ID']))
clip_dict = {}
for p in patient_list:
    seizure_start_index = np.array([])
    seizure_end_index = np.array([])
    nonseizure_start_index = np.array([])
    nonseizure_end_index = np.array([])
    global_episode_index_seizure = np.array([])
    global_episode_index_nonseizure = np.array([])
    start_index = annot_seizure[annot_seizure['Patient_ID'] == p]['Episode_Start_Index']
    end_index = annot_seizure[annot_seizure['Patient_ID'] == p]['Episode_End_Index']
    annot_start_list = annot_seizure[annot_seizure['Patient_ID'] == p]['Annotation_Start_Index']
    annot_end_list = annot_seizure[annot_seizure['Patient_ID'] == p]['Annotation_End_Index']
    episode_index = annot_seizure[annot_seizure['Patient_ID'] == p]['Episode_Index']

    for i, slel in enumerate(zip(annot_start_list, annot_end_list, episode_index.index)):
        sl_order = np.argsort(slel[0])
        sl = np.array(slel[0])[sl_order]
        el = np.array(slel[1])[sl_order]
        ei = slel[2]

        annot_array = np.vstack((sl, el))
        seizure_start_index = np.hstack((seizure_start_index, annot_array[0, :]))
        seizure_end_index = np.hstack((seizure_end_index, annot_array[1, :]))

        nonseizure_start_index = np.hstack((nonseizure_start_index, start_index.iloc[i]))
        nonseizure_end_index = np.hstack((nonseizure_end_index, annot_array[0, 0]))

        nonseizure_start_index = np.hstack((nonseizure_start_index, annot_array[1, -1]))
        nonseizure_end_index = np.hstack((nonseizure_end_index, end_index.iloc[i]))

        if annot_array.shape[1] > 1:
            nonseizure_start_index = np.hstack((nonseizure_start_index,annot_array[1, :-1]))
            nonseizure_end_index = np.hstack((nonseizure_end_index,  annot_array[0, 1:]))


        global_episode_index_seizure = np.hstack((global_episode_index_seizure,
                                                  np.repeat(ei, len(seizure_start_index) -
                                                            len(global_episode_index_seizure))))
        global_episode_index_nonseizure = np.hstack((global_episode_index_nonseizure,
                                                     np.repeat(ei, len(nonseizure_start_index) -
                                                               len(global_episode_index_nonseizure))))

    assert len(global_episode_index_nonseizure) == len(nonseizure_start_index)
    assert len(global_episode_index_seizure) == len(seizure_start_index)

    start_index = annot_nonseizure[annot_nonseizure['Patient_ID'] == p]['Episode_Start_Index']
    end_index = annot_nonseizure[annot_nonseizure['Patient_ID'] == p]['Episode_End_Index']
    episode_index = start_index.index

    nonseizure_ind_arr = np.vstack(
        [nonseizure_start_index,
         nonseizure_end_index,
         global_episode_index_nonseizure]).astype(int)

    seizure_ind_arr = np.vstack(
        [seizure_start_index,
         seizure_end_index,
         global_episode_index_seizure]).astype(int)

    nonseizure_ind_arr_eps = np.vstack(
        [start_index,
         end_index,
         episode_index]).astype(int)

    nonseizure_clip_temp = np.hstack((nonseizure_ind_arr, nonseizure_ind_arr_eps))
    nonseizure_clip_label = np.zeros(nonseizure_clip_temp.shape[1]).astype(int)
    non_seizure_clip = np.vstack((nonseizure_clip_temp, nonseizure_clip_label))

    seizure_clip_temp = np.vstack(
            [seizure_start_index,
             seizure_end_index,
             global_episode_index_seizure]).astype(int)
    seizure_clip_label = np.ones(seizure_clip_temp.shape[1]).astype(int)
    seizure_clip = np.vstack((seizure_clip_temp, seizure_clip_label))

    combined_clip = np.hstack((seizure_clip, non_seizure_clip))

    valid = np.where((combined_clip[1] - combined_clip[0]) > 500)

    combined_clip = combined_clip[:,valid].squeeze()

    if combined_clip.shape[1]>0:
        # shuffled_index = np.arange(combined_clip.shape[1])
        # np.random.shuffle(shuffled_index)
        # clip_dict[p] = combined_clip[:, shuffled_index]


        structured_array = rfn.unstructured_to_structured(combined_clip.T.astype(int),
                                      np.dtype(
                                          [('start_index', 'int32'), ('end_index', 'int32'), ('episode_index', 'int32'),
                                           ('label', 'int32')]))

        clip_dict[p] = combined_clip.T[np.argsort(structured_array, order=['episode_index','start_index'])].T

np.save(data_dir + 'rns_test_cache/clip_dict.npy', clip_dict)

In [9]:
window_len = 9
stride = 5
concat_n = 1

ids = ['HUP047',
 'HUP084',
 'HUP096',
 'HUP109',
 'HUP121',
 'HUP129',
 'HUP131',
 'HUP137',
 'HUP147',
 # 'HUP153',
 'HUP156',
 'HUP159',
 'HUP182',
 'HUP197',
 'HUP199',
 # 'HUP205',
 'RNS026',
 'RNS029']

data_list = []
for id in ids:
    print(id)
    data_import[id].set_window_parameter(window_length=window_len, window_displacement=stride)
    data_import[id].normalize_data()
    window_indices, window_data = data_import[id].get_windowed_data(clip_dict[id][0], clip_dict[id][1])
    import_indices = np.array([])
    import_label = np.array([])
    import_clip_indices = np.array([])
    import_start_indicies = np.array([])
    import_patient_ID = np.array([])
    for i, ind in enumerate(window_indices):
        import_label = np.hstack((import_label, np.repeat(clip_dict[id][3][i], len(ind))))
        import_indices = np.hstack((import_indices, np.repeat(clip_dict[id][2][i], len(ind))))
        import_clip_indices = np.hstack((import_clip_indices, np.arange(len(ind))))
        import_start_indicies = np.hstack((import_start_indicies, np.repeat(clip_dict[id][0][i], len(ind))))
        import_patient_ID = np.hstack((import_patient_ID, np.repeat(id, len(ind))))
    assert import_label.shape[0] == window_data.shape[0]
    np.save(data_dir+'rns_test_cache/' + id + '.npy', {'data': window_data, 'label': import_label, 'patientID': import_patient_ID, 'indices': np.vstack([import_indices,import_clip_indices,import_start_indicies]).T})

HUP047
HUP084
bla
bla
bla
bla
bla
bla
bla
HUP096
bla
bla
HUP109
bla
bla
bla
bla
bla
bla
bla
HUP121
HUP129
bla
bla
bla
bla
bla
bla
bla
HUP131
bla
bla
bla
bla
HUP137
bla
bla
HUP147
bla
HUP156
bla
bla
bla
bla
bla
bla
bla
bla
bla
bla
bla
bla
bla
bla
HUP159
bla
bla
bla
bla
bla
bla
bla
bla
bla
HUP182
bla
HUP197
HUP199
bla
bla
bla
bla
bla
bla
bla
bla
bla
bla
bla
bla
bla
bla
bla
bla
RNS026
bla
bla
bla
bla
bla
bla
bla
bla
bla
bla
bla
RNS029
bla
bla
bla
bla


In [None]:
def collate_fn(batch):
    info = list(zip(*batch))
    data = info[0]
    label = info[1]
    return torch.stack(data), torch.stack(label)

In [None]:
data_list

In [69]:
data_list = os.listdir(data_dir+'rns_test_cache')[1:]

# data_list = ['HUP182.npy',   'HUP129.npy',   'HUP109.npy', 'HUP156.npy', 'HUP096.npy', 'RNS026.npy',  'HUP159.npy']
# data_list = ['RNS026.npy', 'HUP159.npy', 'HUP129.npy', 'HUP096.npy', 'HUP182.npy']
train_data, train_label, test_data, test_label, train_index, test_index = get_data(data_list, split=0.7)
# data, label,_,_ = get_data(data_list, split=1)
# train_data, test_data, train_label, test_label = sklearn.model_selection.train_test_split(data, label, test_size=0.8, random_state=42)

print(train_data.shape)
print(train_label.shape)
print(train_index.shape)
print(test_data.shape)
print(test_label.shape)
print(test_index.shape)


  0%|          | 0/16 [00:00<?, ?it/s][A
 19%|█▉        | 3/16 [00:00<00:00, 18.73it/s][A
 31%|███▏      | 5/16 [00:00<00:00, 12.52it/s][A
 44%|████▍     | 7/16 [00:00<00:00, 10.36it/s][A
 56%|█████▋    | 9/16 [00:00<00:00,  8.07it/s][A
 62%|██████▎   | 10/16 [00:01<00:00,  6.95it/s][A
 69%|██████▉   | 11/16 [00:01<00:00,  5.83it/s][A
 75%|███████▌  | 12/16 [00:01<00:00,  5.22it/s][A
 81%|████████▏ | 13/16 [00:01<00:00,  4.87it/s][A
 88%|████████▊ | 14/16 [00:02<00:00,  4.72it/s][A
 94%|█████████▍| 15/16 [00:02<00:00,  4.05it/s][A
100%|██████████| 16/16 [00:02<00:00,  5.70it/s][A

(13828, 2249, 4)
(13828,)
(13828,)
(5936, 2249, 4)
(5936,)
(5936,)





In [13]:
from models.SupervisedDownstream import SupervisedDownstream

In [8]:
import torch
import torchvision
from torch import nn
import os
import random
from lightly.loss import SwaVLoss
from lightly.loss.memory_bank import MemoryBankModule
from lightly.models.modules import SwaVProjectionHead, SwaVPrototypes
import lightning as L


class Transpose(nn.Module):
    def __init__(self, dim1, dim2):
        super(Transpose, self).__init__()
        self.dim1 = dim1
        self.dim2 = dim2

    def forward(self, x):
        return x.transpose(self.dim1, self.dim2)


class SwaV(L.LightningModule):
    def __init__(self, config):
        super().__init__()
        resnet = torchvision.models.resnet50()
        self.backbone = nn.Sequential(*list(resnet.children())[:-1])
        self.projection_head = SwaVProjectionHead(2048, 2048, 128)
        self.prototypes = SwaVPrototypes(128, 256, 1)
        self.start_queue_at_epoch = 15
        self.queues = nn.ModuleList([MemoryBankModule(size=512) for _ in range(2)])
        self.criterion = SwaVLoss(sinkhorn_epsilon=0.05)

        self.ft_enc = nn.ModuleList()
        for i, _ in enumerate(config.ft_enc_dims):
            if i == 0:
                self.ft_enc.append(
                    nn.Conv1d(
                        in_channels=4,
                        out_channels=config.ft_enc_dims[i],
                        kernel_size=config.ft_enc_kernel_widths[i],
                        stride=config.ft_enc_strides[i],
                        padding=0,
                        groups=config.channel_buffer_size,
                    )
                )
            else:
                self.ft_enc.append(
                    nn.Conv1d(
                        in_channels=config.ft_enc_dims[i - 1],
                        out_channels=config.ft_enc_dims[i],
                        kernel_size=config.ft_enc_kernel_widths[i],
                        stride=config.ft_enc_strides[i],
                        padding=0,
                        groups=config.channel_buffer_size,
                    )
                )
            # transpose the output of the convolutional layer
            self.ft_enc.append(Transpose(1, 2))
            # layer normalization
            self.ft_enc.append(nn.LayerNorm(config.ft_enc_dims[i]))
            # GELU activation
            self.ft_enc.append(nn.GELU())
            # transpose the output of the convolutional layer
            self.ft_enc.append(Transpose(1, 2))

        # add a adaptive pool so different sized crops can be the same length afterward
        self.ft_enc.append(nn.AdaptiveAvgPool1d(config.spatial_transformer_hidden))

        # convert the list of modules to a sequential module
        self.ft_enc = nn.Sequential(*self.ft_enc)

        crop_transforms = []
        crop_sizes = [224, 96]
        crop_min_scales = [0.14, 0.05]
        crop_max_scales = [1.0, 0.14]
        crop_counts = [2, 6]
        for i in range(len(crop_sizes)):
            random_resized_crop = T.RandomResizedCrop(crop_sizes[i], scale=(crop_min_scales[i], crop_max_scales[i]))

            crop_transforms.extend([T.Compose([random_resized_crop])] * crop_counts[i])

        self.crop_transforms = crop_transforms

    def training_step(self, batch, batch_idx):
        x = batch[0].float()

        x = self.ft_enc(x)

        views = []
        for tf in self.crop_transforms:
            views.append(tf(x).unsqueeze(1).repeat(1, 3, 1, 1))

        high_resolution, low_resolution = views[:2], views[2:]
        self.prototypes.normalize()

        high_resolution_features = [self._subforward(x) for x in high_resolution]
        low_resolution_features = [self._subforward(x) for x in low_resolution]

        high_resolution_prototypes = [
            self.prototypes(x, self.current_epoch) for x in high_resolution_features
        ]
        low_resolution_prototypes = [
            self.prototypes(x, self.current_epoch) for x in low_resolution_features
        ]
        queue_prototypes = self._get_queue_prototypes(high_resolution_features)
        loss = self.criterion(
            high_resolution_prototypes, low_resolution_prototypes, queue_prototypes
        )
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def predict_step(self, batch, batch_idx):
        x = batch[0].float()
        x = self.ft_enc(x)
        x = x.unsqueeze(1).repeat(1, 3, 1, 1)
        features = self.backbone(x).flatten(start_dim=1)
        # features = self.projection_head(features)
        features = nn.functional.normalize(features, dim=1, p=2)
        return features

    def configure_optimizers(self):
        optim = torch.optim.Adam(self.parameters(), lr=0.001)
        return optim

    def _subforward(self, input):
        features = self.backbone(input).flatten(start_dim=1)
        features = self.projection_head(features)
        features = nn.functional.normalize(features, dim=1, p=2)
        return features

    @torch.no_grad()
    def _get_queue_prototypes(self, high_resolution_features):
        if len(high_resolution_features) != len(self.queues):
            raise ValueError(
                f"The number of queues ({len(self.queues)}) should be equal to the number of high "
                f"resolution inputs ({len(high_resolution_features)}). Set `n_queues` accordingly."
            )

        # Get the queue features
        queue_features = []
        for i in range(len(self.queues)):
            _, features = self.queues[i](high_resolution_features[i], update=True)
            # Queue features are in (num_ftrs X queue_length) shape, while the high res
            # features are in (batch_size X num_ftrs). Swap the axes for interoperability.
            features = torch.permute(features, (1, 0))
            queue_features.append(features)

        # If loss calculation with queue prototypes starts at a later epoch,
        # just queue the features and return None instead of queue prototypes.
        if (
                self.start_queue_at_epoch > 0
                and self.current_epoch < self.start_queue_at_epoch
        ):
            return None

        # Assign prototypes
        queue_prototypes = [
            self.prototypes(x, self.current_epoch) for x in queue_features
        ]
        return queue_prototypes

In [70]:
class DownStream(L.LightningModule):

    def __init__(self, ft_enc, backbone):
        super().__init__()
        self.ft_enc = ft_enc
        self.backbone = backbone
        self.fc1 = nn.Linear(2048,256)
        self.fc2 = nn.Linear(256,2)
        self.batchNorm = nn.BatchNorm1d(256)
        self.relu = nn.ReLU()
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.ft_enc(x)
        x = x.unsqueeze(1).repeat(1, 3, 1, 1)
        features = self.backbone(x).flatten(start_dim=1)
        # features = self.projection_head(features)
        features = nn.functional.normalize(features, dim=1, p=2)
        return features

    def training_step(self, batch, batch_idx):
        x = batch[0].float()
        y = batch[1].float()
        with torch.no_grad():
            x = self(x)
        x = self.relu(self.batchNorm(self.fc1(x)))
        x = self.fc2(x)
        pred = self.softmax(x)
        label = nn.functional.one_hot(y.to(torch.int64), num_classes=2).squeeze()

        loss = nn.functional.binary_cross_entropy_with_logits(pred.float(), label.float())

        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):

        x = batch[0].float()
        y = batch[1].float()
        with torch.no_grad():
            x = self(x).view(-1,2048)
        x = self.relu(self.batchNorm(self.fc1(x)))
        x = self.fc2(x)
        pred = self.softmax(x)
        label = nn.functional.one_hot(y.to(torch.int64), num_classes=2).squeeze()
        loss = nn.functional.binary_cross_entropy_with_logits(pred.float(), label.float())
        out = torch.argmax(pred, dim=1).detach().cpu().numpy()
        target = y.squeeze().detach().cpu().numpy()

        precision, recall, fscore, support = sklearn.metrics.precision_recall_fscore_support(out, target,labels = [0,1],zero_division=0)
        acc = sklearn.metrics.accuracy_score(out, target)

        self.log("val_loss", loss,prog_bar= True)
        self.log("val_acc", acc,prog_bar=True)
        self.log("val_f1", fscore[1],prog_bar=True)

        return pred, label

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer



In [81]:
from models.rns_dataloader import RNSDatasetDownStream
from models.SupervisedDownstream_v2 import SupervisedDownstream
from model_config import MODEL_CONFIG
labeled_dataset = RNSDatasetDownStream(train_data, train_label, transform=False)
labeled_test = RNSDatasetDownStream(test_data, test_label, transform=False)

swav = SwaV.load_from_checkpoint(ckpt_folder_root + 'rns_swav/model_epoch-epoch=24-train_loss=0.81759.ckpt', config = MODEL_CONFIG)
model = SupervisedDownstream(swav.ft_enc, swav.backbone)

device = "cuda" if torch.cuda.is_available() else "cpu"

checkpoint_callback = pl_callbacks.ModelCheckpoint(monitor='val_loss',
                                                   filename='rns_swav_50_all_linear_eval-{epoch:02d}-{val_loss:.5f}', save_last=True, save_top_k=-1, dirpath=ckpt_folder_root + 'rns_swav_50_all_linear_eval')
csv_logger = pl_loggers.CSVLogger(log_folder_root, name='rns_swav_50_all_linear_eval')

trainer = pl.Trainer(logger=csv_logger, max_epochs=80, callbacks=[checkpoint_callback], accelerator='gpu', devices=1,precision=16)


Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [82]:
dataloader = torch.utils.data.DataLoader(
    labeled_dataset,
    batch_size=32,
    shuffle=False,
    drop_last=True
)

test_dataloader = torch.utils.data.DataLoader(
    labeled_test,
    batch_size=128,
    shuffle=False,
    drop_last=True
)

In [83]:
trainer.fit(model,dataloader,test_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type       | Params
----------------------------------------
0 | ft_enc   | Sequential | 22.7 K
1 | backbone | Sequential | 23.5 M
2 | fc1      | Linear     | 1.0 M 
3 | fc2      | Linear     | 32.8 K
4 | dp       | Dropout1d  | 0     
5 | fc3      | Linear     | 520   
6 | fc4      | Linear     | 18    
7 | softmax  | Softmax    | 0     
----------------------------------------
24.6 M    Trainable params
0         Non-trainable params
24.6 M    Total params
49.226    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

C:\Users\Patrick Xu\AppData\Local\Programs\Python\Python310\lib\site-packages\pytorch_lightning\trainer\trainer.py:653: Detected KeyboardInterrupt, attempting graceful shutdown...


In [84]:
predictions = trainer.predict(model,test_dataloader)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 7it [00:00, ?it/s]

In [85]:
output_list = []
target_list = []
m = nn.Softmax(dim=1)
for pred, y in predictions:
    output_list.append(pred)
    target_list.append(y)

pred_raw = torch.vstack(output_list)
target = torch.hstack(target_list)
out = torch.argmax(pred_raw, dim=1)

In [86]:
clf_report = sklearn.metrics.classification_report(torch.argmax(pred_raw, dim=1), target, digits=6)

print(f"Classification Report : \n{clf_report}")

Classification Report : 
              precision    recall  f1-score   support

           0   0.372611  0.868095  0.521415      1774
           1   0.870216  0.376982  0.526069      4162

    accuracy                       0.523753      5936
   macro avg   0.621414  0.622538  0.523742      5936
weighted avg   0.721505  0.523753  0.524678      5936



In [None]:
output_list = []
target_list = []
emb_list = []
m = nn.Softmax(dim=1)
for pred, y, emb in predictions:
    output_list.append(pred)
    target_list.append(y)
    emb_list.append(emb)

In [36]:
target = torch.ones([10, 64], dtype=torch.float32)  # 64 classes, batch size = 10
output = torch.full([10, 64], 1.5)  # A prediction (logit)
pos_weight = torch.ones([64])  # All weights are equal to 1
criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)

print(target.size())
print(output.size())


torch.Size([10, 64])
torch.Size([10, 64])


In [49]:
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

In [56]:
pca_comp_n = 30
batch_size = 32

pca = PCA(n_components=pca_comp_n, copy=True).fit(emb)
p = pca.transform(emb)

# ind = np.random.choice(len(emb), 10000)
#
tsne = TSNE(n_components=2, verbose=1, perplexity=30, random_state=142, init='pca')
z = tsne.fit_transform(emb)


[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 19558 samples in 0.053s...
[t-SNE] Computed neighbors for 19558 samples in 7.026s...
[t-SNE] Computed conditional probabilities for sample 1000 / 19558
[t-SNE] Computed conditional probabilities for sample 2000 / 19558
[t-SNE] Computed conditional probabilities for sample 3000 / 19558
[t-SNE] Computed conditional probabilities for sample 4000 / 19558
[t-SNE] Computed conditional probabilities for sample 5000 / 19558
[t-SNE] Computed conditional probabilities for sample 6000 / 19558
[t-SNE] Computed conditional probabilities for sample 7000 / 19558
[t-SNE] Computed conditional probabilities for sample 8000 / 19558
[t-SNE] Computed conditional probabilities for sample 9000 / 19558
[t-SNE] Computed conditional probabilities for sample 10000 / 19558
[t-SNE] Computed conditional probabilities for sample 11000 / 19558
[t-SNE] Computed conditional probabilities for sample 12000 / 19558
[t-SNE] Computed conditional probabilities for sam

In [57]:
interictal_inds = np.where(train_label == 0)[0]
ictal_inds = np.where(train_label == 1)[0]

In [58]:
pca.explained_variance_ratio_

array([0.11751629, 0.0819208 , 0.07092781, 0.05053015, 0.04508704,
       0.03998285, 0.03665047, 0.03078197, 0.02964514, 0.02819553,
       0.02539695, 0.02462943, 0.02219062, 0.01869597, 0.01753298,
       0.01708065, 0.01542148, 0.01397562, 0.01352872, 0.01216361,
       0.01162982, 0.01111314, 0.0099635 , 0.00896058, 0.00830962,
       0.00800154, 0.00769934, 0.00756533, 0.00690039, 0.00637149])

In [59]:
# spc = p
#
# fig = plt.figure(figsize=(10, 8))
# ax = fig.add_subplot(projection='3d')
# ax.scatter(spc[interictal_inds,0],spc[interictal_inds,1],spc[interictal_inds,2],c='gold',label= 'interictal')
# ax.scatter(spc[ictal_inds, 0], spc[ictal_inds, 1],spc[ictal_inds, 2], c='royalblue', label='ictal')
# # plt.title('Swav Embedding t-SNE')
# ax.set_xlabel('comp 1')
# ax.set_ylabel("comp 2")
# ax.set_zlabel("comp 2")
# plt.legend()
# # plt.xlim(-67, 74)
# # plt.ylim(-67, 75)
# plt.grid()
# # plt.show()

In [62]:
spc = z
plt.figure(figsize=(10, 8))
plt.scatter(spc[interictal_inds,0],spc[interictal_inds,1],c='gold',label= 'interictal', s = 10)
plt.scatter(spc[ictal_inds, 0], spc[ictal_inds, 1], c='royalblue', label='ictal',s = 1)
# plt.title('Swav Embedding t-SNE')
plt.xlabel('comp 1')
plt.ylabel("comp 2")
plt.legend()
# plt.xlim(-67, 74)
# plt.ylim(-67, 75)
plt.grid()
plt.show()

<IPython.core.display.Javascript object>

In [61]:
z.shape

(19558, 2)