In [None]:
# from google.colab import drive

# drive.mount('/content/gdrive')

!mkdir /content/data
%cd /content/data
!wget http://cs231n.stanford.edu/tiny-imagenet-200.zip
!unzip -q tiny-imagenet-200.zip 

/content/data
--2021-09-29 08:23:14--  http://cs231n.stanford.edu/tiny-imagenet-200.zip
Resolving cs231n.stanford.edu (cs231n.stanford.edu)... 171.64.68.10
Connecting to cs231n.stanford.edu (cs231n.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 248100043 (237M) [application/zip]
Saving to: ‘tiny-imagenet-200.zip’


2021-09-29 08:23:23 (27.0 MB/s) - ‘tiny-imagenet-200.zip’ saved [248100043/248100043]



In [None]:
import time
import copy
import numpy as np
import matplotlib.pyplot as plt

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torchvision.models as models
!pip install torchinfo==1.5.3
from torchinfo import summary

Collecting torchinfo==1.5.3
  Downloading torchinfo-1.5.3-py3-none-any.whl (19 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.5.3


In [None]:
model = models.resnet18()
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2)



Files already downloaded and verified
Files already downloaded and verified


In [None]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
transform = transforms.Compose([ 
    transforms.Resize((224)), 
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    normalize
])

dataset = torchvision.datasets.ImageFolder('/content/data/tiny-imagenet-200/train', transform=transform)
#split the data
train_data, val_data, test_data = torch.utils.data.random_split(dataset, [80000, 10000, 10000], generator=torch.Generator().manual_seed(42))  ##set seed to ensure consistency
batch_size = 32
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True,num_workers=2)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False,num_workers=2)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False,num_workers=2)

In [None]:
from torch.nn import Conv2d,AvgPool2d,Linear,Sequential,Dropout,BatchNorm2d,ModuleList,BatchNorm1d
import torch.nn.functional as F
import numpy as np
import math
from torch.autograd import Variable

## https://blog.paperspace.com/attention-mechanisms-in-computer-vision-cbam/
class BasicConv(nn.Module):
    def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False):
        super(BasicConv, self).__init__()
        self.out_channels = out_planes
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
        self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) if bn else None
        self.relu = nn.ReLU() if relu else None

    def forward(self, x):
        x = self.conv(x)
        if self.bn is not None:
            x = self.bn(x)
        if self.relu is not None:
            x = self.relu(x)
        return x

class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)

class ChannelGate(nn.Module):
    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max']):
        super(ChannelGate, self).__init__()
        self.gate_channels = gate_channels
        self.mlp = nn.Sequential(
            Flatten(),
            nn.Linear(gate_channels, gate_channels // reduction_ratio),
            nn.ReLU(),
            nn.Linear(gate_channels // reduction_ratio, gate_channels)
            )
        self.pool_types = pool_types
    def forward(self, x):
        channel_att_sum = None
        for pool_type in self.pool_types:
            if pool_type=='avg':
                avg_pool = F.avg_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
                channel_att_raw = self.mlp( avg_pool )
            elif pool_type=='max':
                max_pool = F.max_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
                channel_att_raw = self.mlp( max_pool )

            if channel_att_sum is None:
                channel_att_sum = channel_att_raw
            else:
                channel_att_sum = channel_att_sum + channel_att_raw

        scale = torch.sigmoid( channel_att_sum ).unsqueeze(2).unsqueeze(3).expand_as(x)
        return x * scale

def logsumexp_2d(tensor):
    tensor_flatten = tensor.view(tensor.size(0), tensor.size(1), -1)
    s, _ = torch.max(tensor_flatten, dim=2, keepdim=True)
    outputs = s + (tensor_flatten - s).exp().sum(dim=2, keepdim=True).log()
    return outputs

class ChannelPool(nn.Module):
    def forward(self, x):
        return torch.cat( (torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 )

class SpatialGate(nn.Module):
    def __init__(self):
        super(SpatialGate, self).__init__()
        kernel_size = 7
        self.compress = ChannelPool()
        self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size-1) // 2, relu=False)
    def forward(self, x):
        x_compress = self.compress(x)
        x_out = self.spatial(x_compress)
        scale = torch.sigmoid(x_out) # broadcasting
        return x * scale

class CBAM(nn.Module):
    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False):
        super(CBAM, self).__init__()
        self.ChannelGate = ChannelGate(gate_channels, reduction_ratio, pool_types)
        self.no_spatial=no_spatial
        if not no_spatial:
            self.SpatialGate = SpatialGate()
    def forward(self, x):
        x_out = self.ChannelGate(x)
        if not self.no_spatial:
            x_out = self.SpatialGate(x_out)
        return x_out
        return x

class Base(nn.Module):
    def freeze(self):
        for param in self.base_model.parameters():
                param.requires_grad = False
    
    def unfreeze(self):
        for param in self.base_model.parameters():
                param.requires_grad = True
    
    def attach_fea_out(self,classname,input,output):
        self.features.append(output)

    def attach_fea_in(self,classname,input,output):
        self.features.append(input[0])

    def __init__(self,trainable = True,attention=False):
        super(Base,self).__init__()
        self.features = []
        self.channel_size = []
        
        self.base_model = models.resnet18(pretrained=False)
        used_blocks = ['layer1', 'layer2','layer3','layer4']
        unused_blocks = ['avgpool','fc']

        for block in used_blocks:
            getattr(self.base_model,block).register_forward_hook(self.attach_fea_out)

        for block in unused_blocks:
             setattr(self.base_model,block,nn.Identity())
        
        if not trainable:
            self.freeze()

        fake_img = torch.rand(1,3,256,256) ## pass fake img to the model to get the channel size of each inception block
        self.base_model(fake_img)
        self.channel_size = [block.size()[1] for block in self.features]
        self.features = []

    def forward(self,img):
        self.base_model(img)

    def get_MLSP(self,img,feature_type,resize = True):
        self.base_model(img)
        if resize:
            print(resize)
            if feature_type == 'narrow':
                MLSP = [F.adaptive_avg_pool2d(block, (1, 1)) for block in self.features]
                for i in range(len(MLSP)):
                    MLSP[i] = MLSP[i].squeeze(2).squeeze(2)

            if feature_type == 'wide':
                MLSP = [F.interpolate(block,mode = 'bilinear', size = 7) for block in self.features]
            
            MLSP = torch.cat(MLSP,dim = 1)
            self.features = []
        else:
            MLSP = self.features
            self.features = []
        return MLSP



class head_block():
    def conv_block(self,inc,outc,ker,padding = 1,avgpool = False):
        modules = []
        modules.append(nn.Dropout(0.5))
        if avgpool:
            modules.append(AvgPool2d(3,1,1))
        modules.append(Conv2d(inc,outc,ker,padding = padding))
        modules.append(nn.BatchNorm2d(outc))
        modules.append(nn.ReLU())
        return Sequential(*modules)

    def mlsp_cnn_gap_attn(self,num_channels,attention=True):
        blocks = []
        scale = 1
        all_channels = np.sum(num_channels)
        for num_channel in num_channels:
            blocks.append(Sequential(#self.conv_block(num_channel,num_channel//scale,1,0),
                                     #self.conv_block(num_channel//scale,num_channel//scale,3,1),
                                     CBAM(num_channel//scale,reduction_ratio=1)
                          ))

        return ModuleList(blocks)


class Head(nn.Module):
    def __init__(self,head_type,num_channel):
        super(Head, self).__init__()
        if head_type == 'mlsp_cnn_gap_attn':
            self.head = getattr(head_block(),head_type)(num_channel)
        self.head_type = head_type
        self.num_ch = num_channel
        self.dense = Sequential(Linear(960,1000))
    def forward(self,features):
        if self.head_type == 'mlsp_gap':
            x = torch.cat([F.adaptive_avg_pool2d(feature, (1, 1)) for feature in features],dim=1)
        else:
            x = torch.cat([F.adaptive_avg_pool2d(block(feature)+feature, (1, 1)) for feature,block in zip(features,self.head)],dim=1)
        x = torch.flatten(x, 1)
        x = self.dense(x)
        return x

class Fmodel(nn.Module):
    def __init__(self, head_type):
        super(Fmodel,self).__init__()
        self.bmodel = Base()
        self.head = Head(head_type,self.bmodel.channel_size)
        self.feature_type = 'narrow'    
        self.resize = False
        
    def forward(self,img):
        x = self.bmodel.get_MLSP(img,self.feature_type,self.resize)
        x = self.head(x)
        return x

    def unfreeze(self):
        self.bmodel.unfreeze()
    
    def freeze(self):
        self.bmodel.freeze()




In [None]:
from tqdm.auto import tqdm, trange

def train_model(model, dataloaders, criterion, optimizer, num_epochs=3, use_auxiliary=False):
    
    since = time.time()
    val_acc_history = []
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train', 'val']: # Each epoch has a training and validation phase
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            iterator = iter(dataloaders[phase])
            for _ in tqdm(range(len(iterator))): # Iterate over data
                inputs, labels = next(iterator) # Iterate over data
                
                inputs = inputs.to(device)

                labels = labels.to(device)

                optimizer.zero_grad() # Zero the parameter gradients

                with torch.set_grad_enabled(phase == 'train'): # Forward. Track history if only in train
                    
                    if phase == 'train': # Backward + optimize only if in training phase
                        if use_auxiliary:
                            outputs, aux1, = model(inputs)
                            loss = criterion(outputs, labels) + 0.3 * criterion(aux1, labels)
                        else:
                            outputs = model(inputs)
                            loss = criterion(outputs, labels)
                            
                        _, preds = torch.max(outputs, 1)
                        loss.backward()
                        optimizer.step()
                    
                    if phase == 'val':
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)
                        _, preds = torch.max(outputs, 1)

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            
            
            if phase == 'val': # Adjust learning rate based on val loss
                lr_scheduler.step(epoch_loss)
                
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc  >best_acc:
                best_acc = epoch_acc
                torch.save(model,'model')
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history


def test():
    correct = 0 
    test_samples= 10000
    model.eval().cuda()

    with  torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            # Make predictions.
            prediction= model(inputs)

            # Retrieve predictions indexes.
            _, predicted_class = torch.max(prediction.data, 1)

            # Compute number of correct predictions.
            correct += (predicted_class == labels).float().sum().item()

    test_accuracy = correct / test_samples
    print('Test accuracy: {}'.format(test_accuracy))


In [None]:
epochs = 15 #50
for i in range(1):
    model = Fmodel('mlsp_cnn_gap_attn')
    #model = models.resnet18(pretrained=False)
    model(torch.rand((2,3,224,224)))
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,factor=0.1, patience=1, verbose=True, min_lr =1e-6)

    model, _ = train_model(model, {"train": train_loader, "val": val_loader}, criterion, optimizer, epochs)
    test()

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch 0/14
----------


  0%|          | 0/2500 [00:00<?, ?it/s]

train Loss: 4.2788 Acc: 0.1058


  0%|          | 0/313 [00:00<?, ?it/s]

val Loss: 3.8553 Acc: 0.1512

Epoch 1/14
----------


  0%|          | 0/2500 [00:00<?, ?it/s]

train Loss: 3.4567 Acc: 0.2177


  0%|          | 0/313 [00:00<?, ?it/s]

val Loss: 3.3233 Acc: 0.2460

Epoch 2/14
----------


  0%|          | 0/2500 [00:00<?, ?it/s]

train Loss: 3.0254 Acc: 0.2941


  0%|          | 0/313 [00:00<?, ?it/s]

val Loss: 3.0083 Acc: 0.3102

Epoch 3/14
----------


  0%|          | 0/2500 [00:00<?, ?it/s]

train Loss: 2.7197 Acc: 0.3540


  0%|          | 0/313 [00:00<?, ?it/s]

val Loss: 2.8068 Acc: 0.3433

Epoch 4/14
----------


  0%|          | 0/2500 [00:00<?, ?it/s]

train Loss: 2.4628 Acc: 0.4039


  0%|          | 0/313 [00:00<?, ?it/s]

val Loss: 2.6945 Acc: 0.3727

Epoch 5/14
----------


  0%|          | 0/2500 [00:00<?, ?it/s]

train Loss: 2.2336 Acc: 0.4505


  0%|          | 0/313 [00:00<?, ?it/s]

val Loss: 2.6219 Acc: 0.3850

Epoch 6/14
----------


  0%|          | 0/2500 [00:00<?, ?it/s]

train Loss: 2.0133 Acc: 0.4958


  0%|          | 0/313 [00:00<?, ?it/s]

val Loss: 2.7060 Acc: 0.3867

Epoch 7/14
----------


  0%|          | 0/2500 [00:00<?, ?it/s]

train Loss: 1.7835 Acc: 0.5441


  0%|          | 0/313 [00:00<?, ?it/s]

val Loss: 2.5064 Acc: 0.4143

Epoch 8/14
----------


  0%|          | 0/2500 [00:00<?, ?it/s]

train Loss: 1.5525 Acc: 0.5972


  0%|          | 0/313 [00:00<?, ?it/s]

val Loss: 2.5379 Acc: 0.4141

Epoch 9/14
----------


  0%|          | 0/2500 [00:00<?, ?it/s]

train Loss: 1.3059 Acc: 0.6550


  0%|          | 0/313 [00:00<?, ?it/s]

Epoch    10: reducing learning rate of group 0 to 1.0000e-05.
val Loss: 2.6318 Acc: 0.4140

Epoch 10/14
----------


  0%|          | 0/2500 [00:00<?, ?it/s]

train Loss: 0.7023 Acc: 0.8361


  0%|          | 0/313 [00:00<?, ?it/s]

val Loss: 2.2497 Acc: 0.4798

Epoch 11/14
----------


  0%|          | 0/2500 [00:00<?, ?it/s]

train Loss: 0.5610 Acc: 0.8807


  0%|          | 0/313 [00:00<?, ?it/s]

val Loss: 2.2897 Acc: 0.4779

Epoch 12/14
----------


  0%|          | 0/2500 [00:00<?, ?it/s]

train Loss: 0.4765 Acc: 0.9050


  0%|          | 0/313 [00:00<?, ?it/s]

Epoch    13: reducing learning rate of group 0 to 1.0000e-06.
val Loss: 2.3306 Acc: 0.4676

Epoch 13/14
----------


  0%|          | 0/2500 [00:00<?, ?it/s]

train Loss: 0.3933 Acc: 0.9320


  0%|          | 0/313 [00:00<?, ?it/s]

val Loss: 2.3319 Acc: 0.4748

Epoch 14/14
----------


  0%|          | 0/2500 [00:00<?, ?it/s]

train Loss: 0.3819 Acc: 0.9364


  0%|          | 0/313 [00:00<?, ?it/s]

val Loss: 2.3323 Acc: 0.4726

Training complete in 88m 37s
Best val Acc: 0.479800
Test accuracy: 0.4763


In [None]:
model=torch.load('model')

In [None]:
model = Fmodel('mlsp_cnn_gap_attn')
summary(model,(2,3,224,224))

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Layer (type:depth-idx)                                  Output Shape              Param #
Fmodel                                                  --                        --
├─Head: 1                                               --                        --
│    └─ModuleList: 2-1                                  --                        --
├─Base: 1                                               --                        --
│    └─ResNet: 2-2                                      [2, 25088]                --
│    │    └─Conv2d: 3-1                                 [2, 64, 112, 112]         9,408
│    │    └─BatchNorm2d: 3-2                            [2, 64, 112, 112]         128
│    │    └─ReLU: 3-3                                   [2, 64, 112, 112]         --
│    │    └─MaxPool2d: 3-4                              [2, 64, 56, 56]           --
│    │    └─Sequential: 3-5                             [2, 64, 56, 56]           147,968
│    │    └─Sequential: 3-6                        