In [1]:
# !pip install torch-summary

Collecting torch-summary
  Downloading torch_summary-1.4.5-py3-none-any.whl.metadata (18 kB)
Downloading torch_summary-1.4.5-py3-none-any.whl (16 kB)
Installing collected packages: torch-summary
Successfully installed torch-summary-1.4.5


## IMPORT PACKAGES

In [2]:
import os
import torch
import torch.nn as nn
from torchsummary import summary
from tqdm import tqdm
import torch.nn.functional as F
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torchvision.transforms as tt
from torch.utils.data import ConcatDataset
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

matplotlib.rcParams['figure.facecolor'] = '#ffffff'

## PATH FOR TRAINING AND VALIDATION SET

In [7]:
train_dir = r'Seen Datasets\train'
val_dir = r'Seen Datasets\val'
classes = os.listdir(train_dir)
print(classes)

['Asian-Green-Bee-Eater', 'Brown-Headed-Barbet', 'Cattle-Egret', 'Common-Kingfisher', 'Common-Myna', 'Common-Rosefinch', 'Common-Tailorbird', 'Coppersmith-Barbet', 'Forest-Wagtail', 'Gray-Wagtail', 'Hoopoe', 'House-Crow', 'Indian-Grey-Hornbill', 'Indian-Peacock', 'Indian-Pitta', 'Indian-Roller', 'Jungle-Babbler', 'Northern-Lapwing', 'Red-Wattled-Lapwing', 'Ruddy-Shelduck', 'Rufous-Treepie', 'Sarus-Crane', 'White-Breasted-Kingfisher', 'White-Breasted-Waterhen', 'White-Wagtail']


In [8]:
image_size=(416,416)
batch_size= 16

## FINDING MEAN AND STANDARD DEVIATION

In [9]:
# train_set = ImageFolder(train_dir, transform=tt.Compose([
#                                         # tt.Resize(image_size),
#                                         tt.ToTensor()]))
# val_set = ImageFolder(val_dir, transform=tt.Compose([
#                                         # tt.Resize(image_size),
#                                         tt.ToTensor()]))
# dataset = ConcatDataset([train_set,val_set])
# dataset_dl = DataLoader(dataset, batch_size, shuffle=True)

# def get_mean_and_std(dataloader):
#     channels_sum, channels_squared_sum, num_batches = 0, 0, 0
#     for data, _ in tqdm(dataloader):

#         channels_sum += torch.mean(data, dim=[0,2,3])
#         channels_squared_sum += torch.mean(data**2, dim=[0,2,3])
#         num_batches += 1

#     mean = channels_sum / num_batches

#     std = (channels_squared_sum / num_batches - mean ** 2) ** 0.5

#     return mean, std

# mean, std = get_mean_and_std(dataset_dl)
# print('Mean = ',mean)
# print('Std = ',std)

In [10]:
mean=[0.4724, 0.4814, 0.4018]
std=[0.2450, 0.2429, 0.2691]

In [11]:
transformations_to_perform = transform=tt.Compose([
                                tt.Resize(image_size),
                                tt.ToTensor(),
                                tt.Normalize(mean, std)])

train_ds = ImageFolder(train_dir, transformations_to_perform)
valid_ds = ImageFolder(val_dir,transformations_to_perform)

In [12]:
train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=16, pin_memory=True) # DO USE NUM_WORKERS ACCORDING TO YOUR CPU CORES
valid_dl = DataLoader(valid_ds, batch_size, num_workers=16, pin_memory=True)

In [13]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [14]:
device = get_default_device()
device

device(type='cuda')

## LOADING DATA TO THE DEVIDE

In [15]:
train_dl = DeviceDataLoader(train_dl, device)
valid_dl = DeviceDataLoader(valid_dl, device)

## MODEL BUILDING

In [16]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  
        loss = F.cross_entropy(out, labels) 
        acc = accuracy(out,labels)          
        return loss,acc
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                  
        loss = F.cross_entropy(out, labels) 
        acc = accuracy(out, labels)         
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f},train_acc: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['train_loss'],result['train_acc'], result['val_loss'], result['val_acc']))

In [17]:
class SEBlock(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super(SEBlock, self).__init__()
        self.fc1 = nn.Linear(in_channels, in_channels // reduction, bias=False)
        self.fc2 = nn.Linear(in_channels // reduction, in_channels, bias=False)

    def forward(self, x):
        batch_size, num_channels, _, _ = x.size()
        y = F.adaptive_avg_pool2d(x, 1).view(batch_size, num_channels)
        y = F.relu(self.fc1(y))
        y = torch.sigmoid(self.fc2(y)).view(batch_size, num_channels, 1, 1)
        return x * y.expand_as(x)

def conv(in_channels, out_channels, kernel_size=3, stride=1, padding=1, groups=1, use_se_block=False):
    layers = [
        nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, groups=groups),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True)
    ]
    if use_se_block:
        layers.append(SEBlock(out_channels))
    return nn.Sequential(*layers)

def SeparableConv(in_channels, out_channels, use_se_block=False):
    layers = [
        nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1, groups=in_channels),
        nn.Conv2d(in_channels, out_channels, kernel_size=1),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True)
    ]
    if use_se_block:
        layers.append(SEBlock(out_channels))
    return nn.Sequential(*layers)

def linear(in_features, out_features, dropout_rate=0.3):
    return nn.Sequential(
        nn.Dropout(dropout_rate),
        nn.Linear(in_features, out_features),
        nn.BatchNorm1d(out_features),
        nn.ReLU(inplace=True)
    )

class ImgClassifier(ImageClassificationBase):
    def __init__(self, output_dim):
        super(ImgClassifier, self).__init__()

        self.features = nn.Sequential(
            conv(3, 64, stride=2, use_se_block=True),
            nn.MaxPool2d(2),

            SeparableConv(64, 128, use_se_block=True),
            nn.MaxPool2d(2),

            SeparableConv(128, 256, use_se_block=True),
            nn.MaxPool2d(2),

            SeparableConv(256, 512, use_se_block=True),
            nn.MaxPool2d(2),

            SeparableConv(512, 512, use_se_block=True),
            nn.MaxPool2d(2),

            conv(512, 512, use_se_block=True),  
        )

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        self.classifier = nn.Sequential(
            linear(512, 1024),  
            nn.Linear(1024, output_dim)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.shape[0], -1)  
        x = self.classifier(x)
        return x


In [18]:
model = to_device(ImgClassifier(25), device)
summary(model)

Layer (type:depth-idx)                   Param #
├─Sequential: 1-1                        --
|    └─Sequential: 2-1                   --
|    |    └─Conv2d: 3-1                  1,792
|    |    └─BatchNorm2d: 3-2             128
|    |    └─ReLU: 3-3                    --
|    |    └─SEBlock: 3-4                 512
|    └─MaxPool2d: 2-2                    --
|    └─Sequential: 2-3                   --
|    |    └─Conv2d: 3-5                  640
|    |    └─Conv2d: 3-6                  8,320
|    |    └─BatchNorm2d: 3-7             256
|    |    └─ReLU: 3-8                    --
|    |    └─SEBlock: 3-9                 2,048
|    └─MaxPool2d: 2-4                    --
|    └─Sequential: 2-5                   --
|    |    └─Conv2d: 3-10                 1,280
|    |    └─Conv2d: 3-11                 33,024
|    |    └─BatchNorm2d: 3-12            512
|    |    └─ReLU: 3-13                   --
|    |    └─SEBlock: 3-14                8,192
|    └─MaxPool2d: 2-6                    --
|  

Layer (type:depth-idx)                   Param #
├─Sequential: 1-1                        --
|    └─Sequential: 2-1                   --
|    |    └─Conv2d: 3-1                  1,792
|    |    └─BatchNorm2d: 3-2             128
|    |    └─ReLU: 3-3                    --
|    |    └─SEBlock: 3-4                 512
|    └─MaxPool2d: 2-2                    --
|    └─Sequential: 2-3                   --
|    |    └─Conv2d: 3-5                  640
|    |    └─Conv2d: 3-6                  8,320
|    |    └─BatchNorm2d: 3-7             256
|    |    └─ReLU: 3-8                    --
|    |    └─SEBlock: 3-9                 2,048
|    └─MaxPool2d: 2-4                    --
|    └─Sequential: 2-5                   --
|    |    └─Conv2d: 3-10                 1,280
|    |    └─Conv2d: 3-11                 33,024
|    |    └─BatchNorm2d: 3-12            512
|    |    └─ReLU: 3-13                   --
|    |    └─SEBlock: 3-14                8,192
|    └─MaxPool2d: 2-6                    --
|  

## TRAINING PIPELINE 

In [19]:
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        torch.save(model.state_dict(), 'checkpoint.pt')

@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def modeltrain(epochs, lr, model, train_loader, val_loader, 
                  weight_decay=0, grad_clip=None, opt_func=torch.optim.Adam):
    history = []
    optimizer = opt_func(model.parameters(), lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)
    early_stopping = EarlyStopping(patience=7, min_delta=0.001)
    torch.cuda.empty_cache()
    
    for epoch in range(epochs):
        model.train()
        train_losses = []
        train_accs = []

        for batch in tqdm(train_loader):
            batch = to_device(batch, device)
            loss, acc = model.training_step(batch)
            train_losses.append(loss)
            train_accs.append(acc)
            loss.backward()
            
            if grad_clip: 
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)
            
            optimizer.step()
            optimizer.zero_grad()
            
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['train_acc'] = torch.stack(train_accs).mean().item()
        scheduler.step(result['val_loss'])
        early_stopping(result['val_loss'], model)
        
        if early_stopping.early_stop:
            print("Early stopping")
            break

        model.epoch_end(epoch, result)
        history.append(result)

    model.load_state_dict(torch.load('checkpoint.pt'))

    return history


## HYPER PARAMETERS AND TRAINING

In [20]:
history = [evaluate(model, valid_dl)]
history

[{'val_loss': 3.2190473079681396, 'val_acc': 0.04051172733306885}]

In [21]:
epochs = 100
lr = 1e-3
weight_decay = 1e-4
grad_clip = 0.1
opt_func = torch.optim.Adam

In [None]:
%%time
history += modeltrain(epochs, lr, model, train_dl, valid_dl,  
                            weight_decay=weight_decay, 
                            grad_clip = grad_clip,
                            opt_func=opt_func)

## MODEL SAVING AND METRICES

In [23]:
torch.save(model,'model.pth')

In [24]:
model_scripted = torch.jit.script(model)
model_scripted.save('modelscripted.pt')

In [25]:
def plot_accuracies(history):
    train_accs =[x.get('train_acc') for x in history]
    val_accs = [x['val_acc'] for x in history]
    plt.plot(train_accs,'-b')
    plt.plot(val_accs, '-r')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.legend(['Training', 'Validation'])
    plt.title('Accuracy vs. No. of epochs')
    plt.savefig('AccuracyVsEpoch.png');

In [None]:
plot_accuracies(history)

In [27]:
def plot_losses(history):
    train_losses = [x.get('train_loss') for x in history]
    val_losses = [x['val_loss'] for x in history]
    plt.plot(train_losses, '-b')
    plt.plot(val_losses, '-r')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(['Training', 'Validation'])
    plt.title('Loss vs. No. of epochs')
    plt.savefig('LossVsEpoch.png');

In [None]:
plot_losses(history)