In [1]:
import pandas as pd
import torch
import time
import torchvision
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from torchvision.datasets.utils import download_url, check_integrity
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torchvision.transforms as tt
from torch.utils.data import random_split
from torchvision.utils import make_grid
import torchvision.models as models
import matplotlib.pyplot as plt
from sklearn.metrics import *
from torchsummary import summary
%matplotlib inline

In [2]:
from __future__ import print_function
from PIL import Image
import os
import os.path
import errno
import sys
if sys.version_info[0] == 2:
    import cPickle as pickle
else:
    import pickle

import torch.utils.data as data
from torchvision import datasets, transforms

class CIFAR10(data.Dataset):
    """`CIFAR10 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
    Args:
        root (string): Root directory of dataset where directory
            ``cifar-10-batches-py`` exists.
        train (bool, optional): If True, creates dataset from training set, otherwise
            creates from test set.
        transform (callable, optional): A function/transform that  takes in an PIL image
            and returns a transformed version. E.g, ``transforms.RandomCrop``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """
    base_folder = 'cifar-10-batches-py'
    url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
    filename = "cifar-10-python.tar.gz"
    tgz_md5 = 'c58f30108f718f92721af3b95e74349a'
    train_list = [
        ['data_batch_1', 'c99cafc152244af753f735de768cd75f'],
        ['data_batch_2', 'd4bba439e000b95fd0a9bffe97cbabec'],
        ['data_batch_3', '54ebc095f3ab1f0389bbae665268c751'],
        ['data_batch_4', '634d18415352ddfa80567beed471001a'],
        ['data_batch_5', '482c414d41f54cd18b22e5b47cb7c3cb'],
    ]

    test_list = [
        ['test_batch', '40351d587109b95175f43aff81a1287e'],
    ]

    meta = {
        "filename": "batches.meta",
        "key": "label_names",
        "md5": "5ff9c542aee3614f3951f8cda6e48888",
    }


    def __init__(self, root, train=True,
                 transform=None, target_transform=None,
                 download=False, coarse=False, coarseNumber=None):
        self.root = os.path.expanduser(root)
        self.transform = transform
        self.target_transform = target_transform
        self.train = train  # training set or test set
        self.coarse = coarse

        if download:
            self.download()

        if not self._check_integrity():
            raise RuntimeError('Dataset not found or corrupted.' +
                               ' You can use download=True to download it')

        # now load the picked numpy arrays
        if self.train:
            self.train_data = []
            self.train_labels = []
            self.train_coarse_labels = []
            for fentry in self.train_list:
                f = fentry[0]
                file = os.path.join(self.root, self.base_folder, f)
                fo = open(file, 'rb')
                if sys.version_info[0] == 2:
                    entry = pickle.load(fo)
                else:
                    entry = pickle.load(fo, encoding='latin1')
                self.train_data.append(entry['data'])
                if 'labels' in entry:
                    self.train_labels += entry['labels']
                else:
                    self.train_labels += entry['fine_labels']
                    if self.coarse:
                        self.train_coarse_labels += entry['coarse_labels']
                fo.close()

            self.train_data = np.concatenate(self.train_data)
            self.train_data = self.train_data.reshape((len(self.train_data), 3, 32, 32))
            self.train_data = self.train_data.transpose((0, 2, 3, 1))  # convert to HWC
        else:
            f = self.test_list[0][0]
            file = os.path.join(self.root, self.base_folder, f)
            fo = open(file, 'rb')
            if sys.version_info[0] == 2:
                entry = pickle.load(fo)
            else:
                entry = pickle.load(fo, encoding='latin1')
            self.test_data = entry['data']

            if 'labels' in entry:
                self.test_labels = entry['labels']
            else:
                self.test_labels = entry['fine_labels']
                if self.coarse:
                    self.test_coarse_labels = entry['coarse_labels']
            fo.close()
            self.test_data = self.test_data.reshape((len(self.test_data), 3, 32, 32))
            self.test_data = self.test_data.transpose((0, 2, 3, 1))  # convert to HWC

    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: (image, target) where target is index of the target class.
        """
        if self.train:
            img, target = self.train_data[index], self.train_labels[index]
            if self.coarse:
                coarse_target = self.train_coarse_labels[index]
        else:
            img, target = self.test_data[index], self.test_labels[index]
            if self.coarse:
                coarse_target = self.test_coarse_labels[index]

        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        img = Image.fromarray(img)

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)
        if not self.coarse:
            return img, target
        else:
            return img, target, coarse_target

    def __len__(self):
        if self.train:
            return len(self.train_data)
        else:
            return len(self.test_data)

    def _check_integrity(self):
        root = self.root
        for fentry in (self.train_list + self.test_list):
            filename, md5 = fentry[0], fentry[1]
            fpath = os.path.join(root, self.base_folder, filename)
            if not check_integrity(fpath, md5):
                return False
        return True

    def download(self):
        import tarfile

        if self._check_integrity():
            print('Files already downloaded and verified')
            return

        root = self.root
        download_url(self.url, root, self.filename, self.tgz_md5)

        # extract file
        cwd = os.getcwd()
        tar = tarfile.open(os.path.join(root, self.filename), "r:gz")
        os.chdir(root)
        tar.extractall()
        tar.close()
        os.chdir(cwd)


class CIFAR100(CIFAR10):
    """`CIFAR100 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
    This is a subclass of the `CIFAR10` Dataset.
    """
    base_folder = 'cifar-100-python'
    url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
    filename = "cifar-100-python.tar.gz"
    tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85'
    train_list = [
        ['train', '16019d7e3df5f24257cddd939b257f8d'],
    ]

    test_list = [
        ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'],
    ]


In [3]:
batch_size = 400
epochs = 120
max_lr = 0.001
grad_clip = 0.01
weight_decay =0.001
opt_func = torch.optim.Adam

In [4]:
# train_data = torchvision.datasets.CIFAR100('./', train=True, download=True)

# # Stick all the images together to form a 1600000 X 32 X 3 array
# x = np.concatenate([np.asarray(train_data[i][0]) for i in range(len(train_data))])

# # calculate the mean and std along the (0, 1) axes
# mean = np.mean(x, axis=(0, 1))/255
# std = np.std(x, axis=(0, 1))/255
# # the the mean and std
# mean=mean.tolist()
# std=std.tolist()

In [5]:
transform_train = tt.Compose([tt.RandomCrop(32, padding=4,padding_mode='reflect'), 
                         tt.RandomHorizontalFlip(), 
                         tt.ToTensor(), 
                         #tt.Normalize(mean,std,inplace=True)
                         ]
                         )
transform_test = tt.Compose([tt.ToTensor(), 
                             #tt.Normalize(mean,std)
                             ])

In [6]:
train_data = CIFAR100('./data', train=True,
                 transform=transform_train,
                 download=True, coarse=True, coarseNumber=2)
test_data = CIFAR100('./data', train=False,
                 transform=transform_test,
                 download=True, coarse=True, coarseNumber=2)

train_length = train_data.__len__() # Length training dataset
train_indices = np.arange(train_length)
    
train_loader = torch.utils.data.DataLoader(
                        train_data, 
                        batch_size=batch_size, 
                        num_workers=2,
                        pin_memory=True,
                    )

test_loader = torch.utils.data.DataLoader(
                        test_data, 
                        batch_size=batch_size*2,
                        num_workers=2,
                        pin_memory=True
                    )

Files already downloaded and verified
Files already downloaded and verified


In [7]:
# trainset = torchvision.datasets.CIFAR100("./",
#                                          train=True,
#                                          download=True,
#                                          transform=transform_train)
# trainloader = torch.utils.data.DataLoader(
#     trainset, batch_size, shuffle=True, num_workers=2,pin_memory=True)

# testset = torchvision.datasets.CIFAR100("./",
#                                         train=False,
#                                         download=True,
#                                         transform=transform_test)
# testloader = torch.utils.data.DataLoader(
#     testset, batch_size*2,pin_memory=True, num_workers=2)

In [8]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [9]:
device = get_default_device()
trainloader = DeviceDataLoader(train_loader, device)
testloader = DeviceDataLoader(test_loader, device)

In [10]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

class ImageClassificationBase(nn.Module):
    def __init__(self, fine):
        super(ImageClassificationBase, self).__init__()
        self.fine = fine
    
    def training_step(self, batch):
        images, fine, coarse = batch 
        # if self.fine:
        #     labels=fine
        # else:
        #     labels=coarse
        out_parent, out_child = self(images)                  # Generate predictions
        loss_parent = F.cross_entropy(out_parent, coarse)
        loss_child = F.cross_entropy(out_child, fine) # Calculate loss
        return loss_parent+1.5*loss_child
    
    def validation_step(self, batch):
        images, fine, coarse = batch 
        # if self.fine:
        #     labels=fine
        # else:
        #     labels=coarse
        #out = self(images)                    # Generate predictions
        #loss = F.cross_entropy(out, labels)
        out_parent, out_child = self(images)                  # Generate predictions
        loss_parent = F.cross_entropy(out_parent, coarse)
        loss_child = F.cross_entropy(out_child, fine)   # Calculate loss
        acc_parent = accuracy(out_parent, coarse)
        acc_child = accuracy(out_child, fine)           # Calculate accuracy
        return {'val_loss_parent': loss_parent.detach(), 'val_acc_parent': acc_parent, 'val_loss_child':loss_child.detach(), 'val_acc_child': acc_child}
        
    def validation_epoch_end(self, outputs):
        batch_losses_parent = [x['val_loss_parent'] for x in outputs]
        epoch_loss_parent = torch.stack(batch_losses_parent).mean()   # Combine losses
        #
        batch_accs_parent = [x['val_acc_parent'] for x in outputs]
        epoch_acc_parent = torch.stack(batch_accs_parent).mean()  # Combine accuracies
        ###     
        batch_losses_child = [x['val_loss_child'] for x in outputs]
        epoch_loss_child = torch.stack(batch_losses_child).mean()   # Combine losses
        #
        batch_accs_child = [x['val_acc_child'] for x in outputs]
        epoch_acc_child = torch.stack(batch_accs_child).mean() 
        return {'val_loss_parent': epoch_loss_parent.item(), 'val_acc_parent': epoch_acc_parent.item(), 'val_loss_child':epoch_loss_child.item(), 
                'val_acc_child':epoch_acc_child.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], last_lr: {:.5f}, train_loss: {:.4f}, val_loss_parent: {:.4f}, val_acc_parent: {:.4f}, val_loss_child: {:.4f}, val_acc_child: {:.4f}".format(
            epoch, result['lrs'][-1], result['train_loss'], result['val_loss_parent'], result['val_acc_parent'], result['val_loss_child'], result['val_acc_child']))
        
def conv_block(in_channels, out_channels, pool=False):
    layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), 
              nn.BatchNorm2d(out_channels), 
              nn.ReLU(inplace=True)]
    if pool: layers.append(nn.MaxPool2d(2))
    return nn.Sequential(*layers)

class ResNet9(ImageClassificationBase):
    def __init__(self, in_channels, num_classes, fine):
        super().__init__(fine)
        
        self.conv1 = conv_block(in_channels, 64)
        self.conv2 = conv_block(64, 128, pool=True) 
        self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128)) 
        
        self.conv3 = conv_block(128, 256, pool=True)
        self.conv4 = conv_block(256, 512, pool=True) 
        self.res2 = nn.Sequential(conv_block(512, 512), conv_block(512, 512)) 
        self.conv5 = conv_block(512, 1028, pool=True) 
        self.res3 = nn.Sequential(conv_block(1028, 1028), conv_block(1028, 1028))

        #-------------------
        self.classifier_parent = nn.Sequential(nn.MaxPool2d(2), # 1028 x 1 x 1
                                        nn.Flatten(), # 1028 
                                        nn.Linear(1028, 20))
                                 # 1028 -> 100 

        self.classifier_child = nn.Sequential(self.classifier_parent,
                                nn.ReLU(),
                                nn.Linear(20, num_classes)
                                ) # 1028 -> 100 
        
    def forward(self, xb):
        out = self.conv1(xb)
        out = self.conv2(out)
        out = self.res1(out) + out
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.res2(out) + out
        out = self.conv5(out)
        out = self.res3(out) + out
        parent_out=self.classifier_parent(out)
        child_out = self.classifier_child(out)
        return parent_out,child_out

model100 = to_device(ResNet9(3, 100, True), device)
model100

ResNet9(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (conv2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (res1): Sequential(
    (0): Sequential(
      (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (1): Sequential(
      (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=Tr

In [11]:
@torch.no_grad()
def evaluate(model, test_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in test_loader]
    return model.validation_epoch_end(outputs)

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def fit_one_cycle(epochs, max_lr, model, train_loader, test_loader, 
                  weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD):
    torch.cuda.empty_cache()
    history = []
    
    # Set up cutom optimizer with weight decay
    optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)
    # Set up one-cycle learning rate scheduler
    sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs, 
                                                steps_per_epoch=len(train_loader))
    
    for epoch in range(epochs):
        # Training Phase 
        model.train()
        train_losses = []
        lrs = []
        for batch in train_loader:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            
            # Gradient clipping
            if grad_clip: 
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)
            
            optimizer.step()
            optimizer.zero_grad()
            
            # Record & update learning rate
            lrs.append(get_lr(optimizer))
            sched.step()
        
        # Validation phase
        result = evaluate(model, test_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [12]:
# Initial evaluation
history = [evaluate(model100, testloader)]
history

[{'val_loss_parent': 2.9958324432373047,
  'val_acc_parent': 0.05038461834192276,
  'val_loss_child': 4.61177921295166,
  'val_acc_child': 0.010000000707805157}]

In [13]:
# Fitting the first 1/4 epochs
current_time=time.time()
history += fit_one_cycle(int(epochs/4), max_lr, model100, trainloader, testloader, 
                             grad_clip=grad_clip, 
                             weight_decay=weight_decay, 
                             opt_func=opt_func)

Epoch [0], last_lr: 0.00007, train_loss: 9.1768, val_loss_parent: 2.0767, val_acc_parent: 0.3675, val_loss_child: 4.5795, val_acc_child: 0.0148
Epoch [1], last_lr: 0.00015, train_loss: 8.7596, val_loss_parent: 1.8545, val_acc_parent: 0.4134, val_loss_child: 4.4895, val_acc_child: 0.0304
Epoch [2], last_lr: 0.00028, train_loss: 8.3348, val_loss_parent: 1.9024, val_acc_parent: 0.4151, val_loss_child: 4.1549, val_acc_child: 0.0742
Epoch [3], last_lr: 0.00044, train_loss: 7.3158, val_loss_parent: 2.0614, val_acc_parent: 0.3988, val_loss_child: 3.5674, val_acc_child: 0.1642
Epoch [4], last_lr: 0.00060, train_loss: 6.1643, val_loss_parent: 2.5394, val_acc_parent: 0.3726, val_loss_child: 3.3009, val_acc_child: 0.2140
Epoch [5], last_lr: 0.00076, train_loss: 5.4410, val_loss_parent: 2.8475, val_acc_parent: 0.3808, val_loss_child: 3.2799, val_acc_child: 0.2371
Epoch [6], last_lr: 0.00089, train_loss: 4.9254, val_loss_parent: 2.0365, val_acc_parent: 0.4755, val_loss_child: 2.7677, val_acc_child:

In [None]:
# Fitting the second 1/4 epochs
# history += fit_one_cycle(int(epochs/4), max_lr/10, model, trainloader, testloader, 
#                              grad_clip=grad_clip, 
#                              weight_decay=weight_decay, 
#                              opt_func=opt_func)

In [None]:
# history += fit_one_cycle(int(epochs/8), max_lr/100, model, trainloader, testloader, 
#                              grad_clip=grad_clip, 
#                              weight_decay=weight_decay, 
#                              opt_func=opt_func)



In [14]:
# Saving the model to h5 file
torch.save(model100.state_dict(), 'group_1028_to_parent_Loss_parent_and_child_pretrained_model.h5')