In [1]:
#train3 from 2: use adam, remove weight_decay; one BW channel (not 3)
#tune: mseloss multiply 100
import torch,os,glob
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torchvision.transforms import v2
from torchvision import tv_tensors
import pandas as pd
import numpy as np

from nets import ResNet18_3lbCBAM
from tqdm import tqdm
from configparser import ConfigParser
from torch.utils.data import  DataLoader
from LIDC_Mpad_data import LIDC_Dataset
import pickle

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0  # best test accuracy
best_epoch = 0

prep_tr = [
    v2.Lambda(lambda x: tv_tensors.Image(torch.clamp(x,-1000.,400.)) if isinstance(x, tv_tensors.Image) else x),
    v2.Lambda(lambda x: tv_tensors.Image((x+1000)/1400) if isinstance(x, tv_tensors.Image) else x),
    v2.CenterCrop((384,384)),
]
aug_tr = [
    v2.RandomAffine(degrees=10),
    v2.RandomHorizontalFlip(),
]
trans_train = v2.Compose( prep_tr + aug_tr )
trans_test = v2.Compose( prep_tr  )

In [3]:

parser = ConfigParser()
parser.read('.settings')
root_dir = parser.get('dataset','root_dir') #/workspaces/data/lidc-idri/slices
meta_dir = parser.get('dataset','meta_dir') #/workspaces/data/lidc-idri/splits
result_dir = os.path.join(parser.get('dataset','result_dir'),'stage2/basel0_3lbMGA-tunePad')
if not os.path.isdir(result_dir):
        os.mkdir(result_dir)

train_data = LIDC_Dataset(root_dir,metapath=os.path.join(meta_dir,'trainBB_malB.csv'),transform=trans_train, loadBB=True)
test_data = LIDC_Dataset(root_dir,metapath=os.path.join(meta_dir,'testBB_malB.csv'),transform=trans_test)
total_train_data = len(train_data)
total_test_data = len(test_data)
print('total_train_data:',total_train_data, 'total_test_data:',total_test_data)

batch_size = int(parser['dataset']['batch_size'])
trainloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=8)
testloader = DataLoader(test_data, batch_size=batch_size, num_workers=8)

total_train_data: 5495 total_test_data: 2354


In [4]:
net = ResNet18_3lbCBAM(pretrained=True,attr="MGA")
net.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
net.fc = nn.Linear(net.fc.in_features, 2)
net = net.to(device)

In [5]:
from torchinfo import summary
summary(net, input_size=(batch_size,1, 384, 384))

Layer (type:depth-idx)                             Output Shape              Param #
ResNet18_3lbCBAM                                   [32, 2]                   --
├─Conv2d: 1-1                                      [32, 64, 192, 192]        3,136
├─BatchNorm2d: 1-2                                 [32, 64, 192, 192]        128
├─ReLU: 1-3                                        [32, 64, 192, 192]        --
├─MaxPool2d: 1-4                                   [32, 64, 96, 96]          --
├─Sequential: 1-5                                  [32, 64, 96, 96]          --
│    └─BasicBlock: 2-1                             [32, 64, 96, 96]          --
│    │    └─Conv2d: 3-1                            [32, 64, 96, 96]          36,864
│    │    └─BatchNorm2d: 3-2                       [32, 64, 96, 96]          128
│    │    └─ReLU: 3-3                              [32, 64, 96, 96]          --
│    │    └─Conv2d: 3-4                            [32, 64, 96, 96]          36,864
│    │    └─BatchNorm2

In [6]:
lr = 1e-4
criterion = nn.CrossEntropyLoss()
mse = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=lr)

training_info=[["epoch","acc","loss"]]
testing_info=training_info.copy()

In [None]:
def train(epoch):
    net.train()
    train_loss = np.zeros(3)
    correct = 0
    total = 0
    pbar = tqdm(trainloader)
    for batch_idx, (inputs, targets, masks) in enumerate(pbar):
        inputs, targets, masks = inputs.to(device), targets.to(device), masks.to(device)

        optimizer.zero_grad()
        # outputs = net(inputs)
        outputs, attn_map = net(inputs)
        cls_loss = criterion(outputs, targets)
        masks = F.adaptive_avg_pool2d(masks, attn_map.shape[-2:])
        att_loss = 100.* mse(attn_map , masks)
        
        loss =  cls_loss + att_loss
        
        # loss = criterion(outputs, targets)
        loss.backward()
        
        optimizer.step()

        train_loss += np.array([loss.item(), cls_loss.item(), att_loss.item()])
        _, predicted = outputs.max(1)
        
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        pbar.set_description(f"Epoch: {epoch} Acc: {(100.*correct/total):.2f}")

    train_acc = 100.*correct/total
    train_loss = train_loss/(batch_idx+1)
    print(f"Tot Loss: {train_loss[0]:.4f} CL: {train_loss[1]:.5f} AT: {train_loss[2]:.5f}; Train Acc: {train_acc:.2f}%")
    training_info.append([epoch,train_acc,train_loss])
    # trainning_accuracy.append(train_acc)
    # trainning_loss.append( train_loss )

def test(epoch, islast = False):
    global best_acc, best_epoch
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    pbar = tqdm(testloader)
    with torch.no_grad():
        for batch_idx, (inputs, targets ) in enumerate(pbar):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            
            loss = criterion(outputs, targets)
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

        test_acc = 100.*correct/total
        test_loss = test_loss/(batch_idx+1)
        print(f"Test Loss: {test_loss}, Test Acc: {test_acc:.2f}%")
        testing_info.append([epoch,test_acc,test_loss])
        # testing_accuracy.append(test_acc)
        # testing_loss.append(test_loss)
    # Save checkpoint.
    acc = 100.*correct/total
    if acc > best_acc or islast:
        if acc>best_acc:
            best_acc = acc
            best_epoch = epoch
        print('Saving..')
        state = {
            'net': net.state_dict(),
            'acc': acc,
            'best_acc': best_acc,
            'epoch': epoch,
        }
        savestr = 'best' if acc > best_acc else 'last'
        torch.save(state, os.path.join(result_dir,f'basel0-b{batch_size}-epoch{epoch}-{savestr}.pth'))

In [8]:
aa = np.zeros(3)
aa = [1,2,3]

In [9]:
print(aa)

[1, 2, 3]


In [None]:
start_epoch = 100

if start_epoch>0:
    checkpoint = torch.load(glob.glob(os.path.join(result_dir,f'basel0-b{batch_size}-epoch{start_epoch-1}-*.pth'))[0] )
    net.load_state_dict(checkpoint['net'])
    best_acc = checkpoint['acc']

for epoch in range(start_epoch, start_epoch+50):
    train(epoch)
    test(epoch, islast = epoch==start_epoch+49)

Epoch: 100 Acc: 99.13: 100%|██████████| 172/172 [00:40<00:00,  4.27it/s]


Tot Loss: 0.4230 CL: 0.02303 AT: 0.39997; Train Acc: 99.13%


100%|██████████| 74/74 [00:08<00:00,  8.40it/s]


Test Loss: 0.7113489097336659, Test Acc: 86.19%
Saving..


Epoch: 101 Acc: 99.38: 100%|██████████| 172/172 [00:45<00:00,  3.80it/s]


Tot Loss: 0.3976 CL: 0.01995 AT: 0.37766; Train Acc: 99.38%


100%|██████████| 74/74 [00:05<00:00, 12.50it/s]


Test Loss: 0.6419496171780534, Test Acc: 86.49%
Saving..


Epoch: 102 Acc: 99.07: 100%|██████████| 172/172 [00:42<00:00,  4.04it/s]


Tot Loss: 0.4135 CL: 0.02720 AT: 0.38629; Train Acc: 99.07%


100%|██████████| 74/74 [00:06<00:00, 11.51it/s]


Test Loss: 0.6329778936892949, Test Acc: 85.85%


Epoch: 103 Acc: 99.22: 100%|██████████| 172/172 [00:42<00:00,  4.00it/s]


Tot Loss: 0.4006 CL: 0.01952 AT: 0.38112; Train Acc: 99.22%


100%|██████████| 74/74 [00:05<00:00, 13.07it/s]


Test Loss: 0.6365516719257308, Test Acc: 86.96%
Saving..


Epoch: 104 Acc: 99.31: 100%|██████████| 172/172 [00:42<00:00,  4.02it/s]


Tot Loss: 0.3903 CL: 0.01954 AT: 0.37072; Train Acc: 99.31%


100%|██████████| 74/74 [00:05<00:00, 13.08it/s]


Test Loss: 0.6368144903634045, Test Acc: 87.94%
Saving..


Epoch: 105 Acc: 99.44: 100%|██████████| 172/172 [00:40<00:00,  4.22it/s]


Tot Loss: 0.3796 CL: 0.01760 AT: 0.36200; Train Acc: 99.44%


100%|██████████| 74/74 [00:05<00:00, 13.13it/s]


Test Loss: 0.6164521829955079, Test Acc: 87.85%


Epoch: 106 Acc: 99.51: 100%|██████████| 172/172 [00:39<00:00,  4.30it/s]


Tot Loss: 0.3677 CL: 0.01738 AT: 0.35034; Train Acc: 99.51%


100%|██████████| 74/74 [00:08<00:00,  8.64it/s]


Test Loss: 0.7524801374639611, Test Acc: 86.19%


Epoch: 107 Acc: 99.25: 100%|██████████| 172/172 [00:40<00:00,  4.27it/s]


Tot Loss: 0.3858 CL: 0.02300 AT: 0.36277; Train Acc: 99.25%


100%|██████████| 74/74 [00:05<00:00, 13.05it/s]


Test Loss: 0.6253162396417277, Test Acc: 86.92%


Epoch: 108 Acc: 99.02: 100%|██████████| 172/172 [00:40<00:00,  4.21it/s]


Tot Loss: 0.3857 CL: 0.02638 AT: 0.35937; Train Acc: 99.02%


100%|██████████| 74/74 [00:08<00:00,  9.18it/s]


Test Loss: 0.6644817094847157, Test Acc: 87.60%


Epoch: 109 Acc: 99.20: 100%|██████████| 172/172 [00:41<00:00,  4.19it/s]


Tot Loss: 0.3727 CL: 0.02208 AT: 0.35061; Train Acc: 99.20%


100%|██████████| 74/74 [00:05<00:00, 12.97it/s]


Test Loss: 0.676562207977514, Test Acc: 87.30%


Epoch: 110 Acc: 99.02: 100%|██████████| 172/172 [00:40<00:00,  4.29it/s]


Tot Loss: 0.3824 CL: 0.02755 AT: 0.35484; Train Acc: 99.02%


100%|██████████| 74/74 [00:05<00:00, 13.00it/s]


Test Loss: 0.6273787858505809, Test Acc: 87.13%


Epoch: 111 Acc: 99.53: 100%|██████████| 172/172 [00:42<00:00,  4.03it/s]


Tot Loss: 0.3522 CL: 0.01664 AT: 0.33558; Train Acc: 99.53%


100%|██████████| 74/74 [00:05<00:00, 13.05it/s]


Test Loss: 0.6552441336113859, Test Acc: 86.96%


Epoch: 112 Acc: 99.29: 100%|██████████| 172/172 [00:39<00:00,  4.31it/s]


Tot Loss: 0.3468 CL: 0.02199 AT: 0.32483; Train Acc: 99.29%


100%|██████████| 74/74 [00:05<00:00, 12.98it/s]


Test Loss: 0.6486997474740083, Test Acc: 87.00%


Epoch: 113 Acc: 99.44:  16%|█▋        | 28/172 [00:09<00:32,  4.40it/s]

In [None]:
traindf = pd.DataFrame(training_info[1:],columns=training_info[0])
testdf = pd.DataFrame(testing_info[1:],columns=testing_info[0])
with open(os.path.join(result_dir,f'basel0-b{batch_size}-se{start_epoch}-info.pkl'), 'wb') as file:
    pickle.dump({"train":traindf,"test":testdf}, file)

 
 #   scheduler.step()

In [20]:
start_epoch=100
glob.glob(os.path.join(result_dir,f'basel0-b{batch_size}-epoch{start_epoch-1}-*.pth'))

['/workspaces/data/lidc-idri/results/stage2/basel0_3lbMGA-tunePad/basel0-b32-epoch99-last.pth']

In [15]:
result_dir

'/workspaces/data/lidc-idri/results/stage2/basel0_3lbMGA-tunePad'

In [13]:
# with open('two_dfs.pkl', 'rb') as file:
#     loaded = pickle.load(file)

# loaded