In [16]:
#this notebook establishes baseline auc for CheXpert -to cheXphoto transfer rate
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
from PIL import Image
from matplotlib import pyplot as plt
import torchvision
import torch
import os
import random
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, roc_curve, plot_roc_curve, auc
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
from torch.utils.data import DataLoader
import pickle
from sklearn.metrics import accuracy_score
import torch.utils.data as data_utils

In [2]:
acorn = 1234
torch.manual_seed(acorn)
np.random.seed(acorn)

torch.cuda.is_available()

if torch.cuda.is_available():  
    dev = "cuda:0" 
else:  
    dev = "cpu"  
    
device = torch.device(dev)  

In [38]:
training_data = datasets.CIFAR100(
    root="data",
    train=True,
    download=True,
)

test_data = datasets.CIFAR100(
    root="data",
    train=False,
    download=True,
)

Files already downloaded and verified
Files already downloaded and verified


In [39]:
image_size = (128, 128)
resnet_mean = [0.485, 0.456, 0.406]
resnet_std = [0.229, 0.224, 0.225]

#Creating a Transformation Object
data_transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(size=image_size),
    torchvision.transforms.ToTensor(), #Converting to tensor
    torchvision.transforms.Normalize(mean = resnet_mean,
                                    std = resnet_std) 
    
])


training_data.transform = data_transform
test_data.transform = data_transform


In [40]:
indices = torch.arange(0,45000)
train = data_utils.Subset(training_data, indices)
val_indices = torch.arange(45000, 50000)
validate = data_utils.Subset(training_data, val_indices)
print(len(train), len(validate))

45000 5000


In [41]:
batch_size = 64
train_dataloader = DataLoader(train,  batch_size=batch_size, shuffle=True)
validate_dataloader = DataLoader(validate, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data,  batch_size=batch_size, shuffle=True)

In [47]:
class Resnext50(torch.nn.Module):
    def __init__(self, n_classes):
        super().__init__()

        resnet = torchvision.models.resnext50_32x4d(pretrained=True, progress=True)
        resnet.fc = torch.nn.Sequential(
            torch.nn.Dropout(p=0.2),
            torch.nn.Linear(in_features=resnet.fc.in_features, out_features=n_classes)
        )
        self.base_model = resnet
        self.soft = torch.nn.Softmax()

    def forward(self, x):
        return self.soft(self.base_model(x))

# Initialize the model
learning_rate = 1e-4

epochs = 15
model = Resnext50(100)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

model.to(device)
loss_fn = torch.nn.CrossEntropyLoss().to(device)


In [43]:
def train_loop(dataloader, model, loss_fn, optimizer, batch_size=64):
    size = len(dataloader.dataset)
    model.train()
    train_loss = 0
    counter = 0
    with tqdm(dataloader, unit="batch") as tepoch:
        counter += 1
        for X_cpu, y_cpu in tepoch:
            X, y = X_cpu.to(device), y_cpu.to(device)
            optimizer.zero_grad()
            outputs = model(X)
            loss = loss_fn(outputs, y.type(torch.long))
            loss.backward() 
            optimizer.step() 
            train_loss += loss.item() 
            avg_loss = train_loss / (counter * batch_size)
            tepoch.set_postfix(loss=avg_loss)
    return avg_loss
    
    
def eval_model(dataloader, model, return_probab=False):
    model.eval()
    predicts = []
    targets = []
    for counter, (images, labels) in enumerate(dataloader):
        imageGPU = images.to(device)
        
        outputs = torch.Tensor.cpu(model(imageGPU))
        predicts.append(outputs.detach().numpy())
        targets.append(labels)
        
    predicts = np.vstack(predicts)
    targets = np.hstack(targets)
    if return_probab:
        return predicts, targets
    else:
        predicts = np.argmax(predicts, axis=1)
        return accuracy_score(targets, predicts)

    


In [7]:
# resnext lr=1e-4 epochs=20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    print("done epoc %s : test_acc %s" %(t, eval_model(test_dataloader, model)))
print("Done!")

  0%|          | 0/782 [00:00<?, ?batch/s]

Epoch 1
-------------------------------


  
100%|██████████| 782/782 [17:38<00:00,  1.35s/batch, loss=52.7]
  0%|          | 0/782 [00:00<?, ?batch/s]

done epoc 0 : test_acc 0.4169
Epoch 2
-------------------------------


  
100%|██████████| 782/782 [17:30<00:00,  1.34s/batch, loss=50.8]
  0%|          | 0/782 [00:00<?, ?batch/s]

done epoc 1 : test_acc 0.5155
Epoch 3
-------------------------------


  
100%|██████████| 782/782 [17:29<00:00,  1.34s/batch, loss=49.6]
  0%|          | 0/782 [00:00<?, ?batch/s]

done epoc 2 : test_acc 0.5572
Epoch 4
-------------------------------


  
100%|██████████| 782/782 [17:17<00:00,  1.33s/batch, loss=49.1]
  0%|          | 0/782 [00:00<?, ?batch/s]

done epoc 3 : test_acc 0.5963
Epoch 5
-------------------------------


  
100%|██████████| 782/782 [17:40<00:00,  1.36s/batch, loss=48.4]
  0%|          | 0/782 [00:00<?, ?batch/s]

done epoc 4 : test_acc 0.6312
Epoch 6
-------------------------------


  
100%|██████████| 782/782 [17:38<00:00,  1.35s/batch, loss=47.9]
  0%|          | 0/782 [00:00<?, ?batch/s]

done epoc 5 : test_acc 0.6406
Epoch 7
-------------------------------


  
100%|██████████| 782/782 [17:38<00:00,  1.35s/batch, loss=47.6]
  0%|          | 0/782 [00:00<?, ?batch/s]

done epoc 6 : test_acc 0.6603
Epoch 8
-------------------------------


  
100%|██████████| 782/782 [17:36<00:00,  1.35s/batch, loss=47.4]
  0%|          | 0/782 [00:00<?, ?batch/s]

done epoc 7 : test_acc 0.6554
Epoch 9
-------------------------------


  
100%|██████████| 782/782 [17:35<00:00,  1.35s/batch, loss=47.2]
  0%|          | 0/782 [00:00<?, ?batch/s]

done epoc 8 : test_acc 0.6759
Epoch 10
-------------------------------


  
100%|██████████| 782/782 [17:39<00:00,  1.35s/batch, loss=47]  


done epoc 9 : test_acc 0.6759
Done!


In [48]:
#resnext lr=1e-4 epochs 15 
min_loss= 1000
max_acc = 0
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loss = train_loop(train_dataloader, model, loss_fn, optimizer)
    val_acc = eval_model(validate_dataloader, model)
    if val_acc > max_acc:
        max_acc = val_acc
        min_loss = train_loss
        torch.save(model, 'models/model_resnext50_15_1e-4.pth')
    elif val_acc == max_acc and train_loss<min_loss:
        max_acc = val_acc
        min_loss = train_loss
        torch.save(model, 'models/model_resnext50_15_1e-4.pth')
    print("validation accuracy %s and loss %s" %(max_acc, min_loss))
        
    print("done epoc %s : test_acc %s" %(t, eval_model(test_dataloader, model)))
print("Done! Saved model with validation accuracy %s and loss %s" %(max_acc, min_loss))

  0%|          | 0/704 [00:00<?, ?batch/s]

Epoch 1
-------------------------------


  
100%|██████████| 704/704 [17:45<00:00,  1.51s/batch, loss=46.7]


validation accuracy 0.518 and loss 46.73706308379769


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 0 : test_acc 0.5187
Epoch 2
-------------------------------


  
100%|██████████| 704/704 [18:17<00:00,  1.56s/batch, loss=44.6]


validation accuracy 0.595 and loss 44.64138865098357


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 1 : test_acc 0.5951
Epoch 3
-------------------------------


  
100%|██████████| 704/704 [17:48<00:00,  1.52s/batch, loss=43.7]


validation accuracy 0.6492 and loss 43.72000069916248


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 2 : test_acc 0.6375
Epoch 4
-------------------------------


  
100%|██████████| 704/704 [16:36<00:00,  1.41s/batch, loss=43.1]


validation accuracy 0.6698 and loss 43.14717735722661


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 3 : test_acc 0.6773
Epoch 5
-------------------------------


  
100%|██████████| 704/704 [16:43<00:00,  1.43s/batch, loss=42.7]


validation accuracy 0.6926 and loss 42.688105665147305


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 4 : test_acc 0.6986
Epoch 6
-------------------------------


  
100%|██████████| 704/704 [16:33<00:00,  1.41s/batch, loss=42.4]


validation accuracy 0.6972 and loss 42.41049699857831


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 5 : test_acc 0.7018
Epoch 7
-------------------------------


  
100%|██████████| 704/704 [16:38<00:00,  1.42s/batch, loss=42.2]


validation accuracy 0.7134 and loss 42.18948448076844


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 6 : test_acc 0.7089
Epoch 8
-------------------------------


  
100%|██████████| 704/704 [17:38<00:00,  1.50s/batch, loss=42]  


validation accuracy 0.722 and loss 41.984983026981354


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 7 : test_acc 0.7232
Epoch 9
-------------------------------


  
100%|██████████| 704/704 [17:42<00:00,  1.51s/batch, loss=41.8]


validation accuracy 0.7258 and loss 41.82138378918171


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 8 : test_acc 0.7149
Epoch 10
-------------------------------


  
100%|██████████| 704/704 [16:38<00:00,  1.42s/batch, loss=41.7]


validation accuracy 0.7314 and loss 41.66840525716543


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 9 : test_acc 0.7258
Epoch 11
-------------------------------


  
100%|██████████| 704/704 [17:42<00:00,  1.51s/batch, loss=41.5]


validation accuracy 0.737 and loss 41.53921537846327


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 10 : test_acc 0.7278
Epoch 12
-------------------------------


  
100%|██████████| 704/704 [17:39<00:00,  1.51s/batch, loss=41.5]


validation accuracy 0.737 and loss 41.53921537846327


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 11 : test_acc 0.7342
Epoch 13
-------------------------------


  
100%|██████████| 704/704 [17:40<00:00,  1.51s/batch, loss=41.4]


validation accuracy 0.7438 and loss 41.39629418775439


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 12 : test_acc 0.7376
Epoch 14
-------------------------------


  
100%|██████████| 704/704 [17:51<00:00,  1.52s/batch, loss=41.3]


validation accuracy 0.7438 and loss 41.39629418775439


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 13 : test_acc 0.7411
Epoch 15
-------------------------------


  
100%|██████████| 704/704 [17:36<00:00,  1.50s/batch, loss=41.2]


validation accuracy 0.7522 and loss 41.22566632181406


  


done epoc 14 : test_acc 0.7478
Done! Saved model with validation accuracy 0.7522 and loss 41.22566632181406


In [49]:
#coarse labels

class Dictlist(dict):
    def __setitem__(self, key, value):
        try:
            self[key]
        except KeyError:
            super(Dictlist, self).__setitem__(key, [])
        self[key].append(value)

def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dic = pickle.load(fo, encoding='bytes')
    return dic
x=unpickle('data/cifar-100-python/test')
d=Dictlist()

for i in range(0,len(x[b'coarse_labels'])):
    d[x[b'coarse_labels'][i]]=x[ b'fine_labels'][i]
coarse_to_fine=dict(d)
for i in coarse_to_fine.keys():
    coarse_to_fine[i]=list(dict.fromkeys(coarse_to_fine[i]))
    
fine_to_coarse = dict()
for k,v in coarse_to_fine.items():
    for each in v:
        fine_to_coarse[each] = k

In [55]:
training_data_coarse = datasets.CIFAR100(
    root="data",
    train=True,
    download=True,
)
training_data_coarse.targets = [fine_to_coarse[target] for target in training_data_coarse.targets]

test_data_coarse = datasets.CIFAR100(
    root="data",
    train=False,
    download=True,
)
test_data_coarse.targets = [fine_to_coarse[target] for target in test_data_coarse.targets]

Files already downloaded and verified
Files already downloaded and verified


In [57]:
training_data_coarse.transform = data_transform
test_data_coarse.transform = data_transform

train_coarse = data_utils.Subset(training_data_coarse, indices)
validate_coarse = data_utils.Subset(training_data_coarse, val_indices)
train_dataloader_coarse = DataLoader(train_coarse,  batch_size=batch_size, shuffle=True)
validate_dataloader_coarse = DataLoader(validate_coarse, batch_size=batch_size, shuffle=True)
test_dataloader_coarse = DataLoader(test_data_coarse,  batch_size=batch_size, shuffle=True)

In [62]:
class Resnext50(torch.nn.Module):
    def __init__(self, n_classes):
        super().__init__()

        resnet = torchvision.models.resnext50_32x4d(pretrained=True, progress=True)
        resnet.fc = torch.nn.Sequential(
            torch.nn.Dropout(p=0.2),
            torch.nn.Linear(in_features=resnet.fc.in_features, out_features=n_classes)
        )
        self.base_model = resnet
        self.soft = torch.nn.Softmax()

    def forward(self, x):
        return self.soft(self.base_model(x))

# Initialize the model
learning_rate = 1e-5

epochs = 15
model = Resnext50(20)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

model.to(device)
loss_fn = torch.nn.CrossEntropyLoss().to(device)

In [63]:
#coarse labels resnext lr=1e-4 epochs 15 
min_loss= 1000
max_acc = 0
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loss = train_loop(train_dataloader_coarse, model, loss_fn, optimizer)
    val_acc = eval_model(validate_dataloader_coarse, model)
    if train_loss < min_loss:
        max_acc = val_acc
        min_loss = train_loss
        torch.save(model, 'models/model_resnext50_15_1e-5_coarse.pth')
    elif train_loss == min_loss and val_acc>max_acc:
        max_acc = val_acc
        min_loss = train_loss
        torch.save(model, 'models/model_resnext50_15_1e-5_coarse.pth')
    print("validation accuracy %s and loss %s" %(max_acc, min_loss))
        
    print("done epoc %s : test_acc %s" %(t, eval_model(test_dataloader_coarse, model)))
print("Done! Saved model with validation accuracy %s and loss %s" %(max_acc, min_loss))

  0%|          | 0/704 [00:00<?, ?batch/s]

Epoch 1
-------------------------------


  
100%|██████████| 704/704 [19:08<00:00,  1.63s/batch, loss=28.3]


validation accuracy 0.793 and loss 28.308042630553246


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 0 : test_acc 0.7846
Epoch 2
-------------------------------


  
100%|██████████| 704/704 [18:38<00:00,  1.59s/batch, loss=24.8]


validation accuracy 0.8704 and loss 24.82554468140006


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 1 : test_acc 0.8608
Epoch 3
-------------------------------


  
 29%|██▉       | 205/704 [05:23<12:44,  1.53s/batch, loss=7.02]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 704/704 [18:35<00:00,  1.58s/batch, loss=23.2]


validation accuracy 0.897 and loss 23.314352974295616


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 6 : test_acc 0.886
Epoch 8
-------------------------------


  
100%|██████████| 704/704 [18:31<00:00,  1.58s/batch, loss=23.2]


validation accuracy 0.897 and loss 23.314352974295616


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 7 : test_acc 0.8876
Epoch 9
-------------------------------


  
100%|██████████| 704/704 [18:27<00:00,  1.57s/batch, loss=23.1]


validation accuracy 0.897 and loss 23.314352974295616


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 8 : test_acc 0.8874
Epoch 10
-------------------------------


  
100%|██████████| 704/704 [18:37<00:00,  1.59s/batch, loss=23.1]


validation accuracy 0.897 and loss 23.314352974295616


  
  0%|          | 0/704 [00:00<?, ?batch/s]

done epoc 9 : test_acc 0.8859
Epoch 11
-------------------------------


  
 44%|████▍     | 312/704 [08:20<10:25,  1.60s/batch, loss=10.2]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [66]:
max_acc

0.897