In [1]:
from lib.ekyn import *
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from torch import nn
from torch.nn.functional import relu
from lib.models import *
from lib.deep_learning_utils import evaluate
from sklearn.metrics import ConfusionMatrixDisplay,classification_report,f1_score
import numpy as np
from sklearn.model_selection import train_test_split
import copy
from tqdm import tqdm


In [2]:
ekyn_ids = get_ekyn_ids()
snezana_mice_ids = get_snezana_mice_ids()

ekyn_ids = np.array(ekyn_ids)
snezana_mice_ids = np.array(snezana_mice_ids)

ekyn_train_ids,ekyn_test_ids = train_test_split(ekyn_ids,test_size=.2,shuffle=True,random_state=0)
snezana_mice_train_ids,snezana_mice_test_ids = train_test_split(snezana_mice_ids,test_size=.2,shuffle=True,random_state=0)

train_ids = {'ekyn':ekyn_train_ids,'snezana_mice':snezana_mice_train_ids}
test_ids  = {'ekyn':ekyn_test_ids,'snezana_mice':snezana_mice_test_ids}
batch_size = 512

from torch.utils.data import DataLoader,ConcatDataset
trainloader = DataLoader(
        dataset=ConcatDataset(
        [EpochedDataset(id=id,condition=condition,robust=True,downsampled=True) for id in train_ids['ekyn'] for condition in CONDITIONS] 
        + [EpochedDataset(id=id,snezana_mice=True) for id in train_ids['snezana_mice']]
        ),
        batch_size=batch_size,
        shuffle=True,
        num_workers=1
    )

testloader = DataLoader(
        dataset=ConcatDataset(
        [EpochedDataset(id=id,condition=condition,robust=True,downsampled=True) for id in test_ids['ekyn'] for condition in CONDITIONS] 
        + [EpochedDataset(id=id,snezana_mice=True) for id in test_ids['snezana_mice']]
        ),
        batch_size=batch_size,
        shuffle=True,
        num_workers=1
    )
print(train_ids)
print(test_ids)
print(len(ekyn_ids),len(snezana_mice_ids))
print(f'{len(trainloader)} training batches {len(testloader)} testing batches')
print(f'{len(trainloader)*batch_size} training samples {len(testloader)*batch_size} testing samples')
print(f'{len(trainloader)*batch_size*10/3600:.2f} training hours {len(testloader)*batch_size*10/3600:.2f} testing hours')

{'ekyn': array(['F1-0', 'B3-1', 'A4-0', 'F1-1', 'E2-1', 'C4-1', 'F5-1', 'E4-0',
       'B1-0', 'A1-0', 'C1-0', 'E4-1'], dtype='<U4'), 'snezana_mice': array(['21-WK-15', '21-WT-1', '21-WK-18', '21-WT-10', '21-WK-6',
       '21-HET-4', '21-KO-11', '21-WK-17', '21-WK-13', '386', '354',
       '21-KO-4', '381', '21-WK-11', '21-KO-12', '21-HET-2', '21-WK-16',
       '21-KO-2', '21-WT-7', '21-KO-6', '21-WT-8', '21-HET-5', '21-KO-10',
       '21-WK-10', '21-KO-3', '21-WT-13', '21-WT-4', '21-WT-9', '21-WK-8',
       '21-HET-10', '21-KO-1', '429', '21-WK-1', '21-HET-3', '21-KO-9',
       '21-WK-5', '21-KO-7', '21-KO-5', '21-HET-7', '21-WK-9', '382',
       '21-HET-12', '21-HET-1', '378', '21-WT-5', '21-WT-2'], dtype='<U9')}
{'ekyn': array(['A1-1', 'C4-0', 'D1-0', 'E1-0'], dtype='<U4'), 'snezana_mice': array(['21-WK-4', '21-WK-3', '21-WT-12', '21-WK-12', '21-HET-9',
       '21-HET-11', '21-WK-2', '21-WT-3', '21-KO-8', '21-WT-6',
       '21-HET-13', '21-HET-8'], dtype='<U9')}
16 58
2532 training 

In [3]:
model = ResNetv2(block=ResBlockv2,widthi=[2,4,8,16],depthi=[2,2,2,2],n_output_neurons=3)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(),lr=3e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10)

In [4]:
trainlossi = []
testlossi = []
best_model_wts = copy.deepcopy(model.state_dict())
best_dev_loss = torch.inf
lossi = []

In [5]:
model.to('cuda')

for i in tqdm(range(100)):
    model.train()
    for Xi,yi in trainloader:
        Xi,yi = Xi.to('cuda'),yi.to('cuda')
        logits = model(Xi)
        loss = criterion(logits,yi)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        trainlossi.append(loss.item())
        # plt.figure()
        # plt.plot(trainlossi)
        # plt.savefig(f'lossi.jpg')
        # plt.close()
    model.eval()
    with torch.no_grad():
        loss_total = 0
        for Xi,yi in testloader:
            Xi,yi = Xi.to('cuda'),yi.to('cuda')
            logits = model(Xi)
            loss = criterion(logits,yi)
            loss_total += loss.item()
        testlossi.append(loss_total/len(testloader))
    
    if testlossi[-1] < best_dev_loss:
        best_dev_loss = testlossi[-1]
        best_model_wts = copy.deepcopy(model.state_dict())
    
    print(best_dev_loss)
    plt.figure()
    plt.plot(torch.linspace(0,len(testlossi),len(trainlossi)),trainlossi)
    plt.plot(torch.linspace(0,len(testlossi),len(testlossi) + 1),[trainlossi[0]] + testlossi)
    plt.savefig('loss.jpg')
    plt.close()

RuntimeError: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero.

In [None]:
best_dev_loss

In [None]:
fig,axes = plt.subplots(nrows=1,ncols=2,figsize=(8,4))
loss,y_true,y_pred = evaluate(dataloader=trainloader,model=model,criterion=criterion,device='cuda')
print(loss)
ConfusionMatrixDisplay.from_predictions(y_true,y_pred,normalize='true',ax=axes[0],colorbar=False)
axes[0].set_title(f'trainf1 : {f1_score(y_true,y_pred,average="macro"):.3f}')
loss,y_true,y_pred = evaluate(dataloader=testloader,model=model,criterion=criterion,device='cuda')
print(loss)
ConfusionMatrixDisplay.from_predictions(y_true,y_pred,normalize='true',ax=axes[1],colorbar=False)
axes[1].set_title(f'testf1 : {f1_score(y_true,y_pred,average="macro"):.3f}')
# first block 4
# 0.15651415670436988 
# 0.17776457286082847
# first block 8
# 0.14261297114022264
# 0.17632858462388454
# first block 16
# 0.12881732295086315
# 0.1683129257734678
