# **Butterfly species clssification using Pytorch**

![Image](https://www.thefactsite.com/wp-content/uploads/2016/12/butterfly-facts-702x347.jpg)

### Dataset

- Train, Test. Validation data set for 50 butterfly species. All images are 224 X 224 X 3 in jpg format .
- Train set consists of 4955 images partitioned into 50 sub directories one for each species.
- Test set consists of 250 images partitioned into 50 sub directories with 5 test images per species.
- Valid set consists of 250 images partitioned into 50 sub directories with 5 validation images per species.

## Imports
- Importing the necessay PyTorch libraries for the model and data.
- Matplotlib and seaborn for visualizations.
- Scikit-Learn for some performance metrics.

In [None]:
# libraries
import os
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
matplotlib.rcParams['figure.facecolor'] = '#ffffff'

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torchvision.transforms as T
from torch.utils.data import random_split
from torchvision.utils import make_grid
from tqdm.notebook import tqdm

import time
import pandas as pd

from sklearn.metrics import classification_report,confusion_matrix

## Dataset Preparation

In [None]:
DATA_DIR = "../input/butterfly-images40-species/butterflies"

train_dir = os.path.join(DATA_DIR,"train")
valid_dir = os.path.join(DATA_DIR,"valid")
test_dir = os.path.join(DATA_DIR,"test")

os.listdir(test_dir)[:4]

In [None]:
os.listdir(DATA_DIR + "/train")[:10]

In [None]:
# defining transforms

imagenet_stats = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
image_size = 224
batch_size = 32

train_transforms = T.Compose([
    T.Resize(image_size),
    T.RandomHorizontalFlip(),
    T.RandomRotation(20),
    T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    T.ToTensor(),
    T.Normalize(*imagenet_stats)
])

val_test_transforms = T.Compose([
    T.Resize(image_size),
    T.ToTensor(),
    T.Normalize(*imagenet_stats)
])


In [None]:
# datastes
train_ds = ImageFolder(train_dir,train_transforms)
val_ds = ImageFolder(valid_dir, val_test_transforms)
test_ds = ImageFolder(test_dir, val_test_transforms)

In [None]:
classes = train_ds.classes
len_classes = len(classes)

In [None]:
# classes and indexes belonging to them
train_ds.class_to_idx

In [None]:
# No of images in train/test and valid sets
print(f"Train : {len(train_ds)} \nValidation : {len(val_ds)} \nTest : {len(test_ds)}")

In [None]:
# Data Loaders
train_dl = DataLoader(train_ds, batch_size = batch_size, shuffle = True, num_workers = 3, pin_memory = True)
val_dl = DataLoader(val_ds, batch_size = batch_size, shuffle = False, num_workers = 3, pin_memory = True)
test_dl = DataLoader(test_ds, batch_size = batch_size, shuffle = False, num_workers = 3, pin_memory = True)

### Take a look at some of the images from train batch

In [None]:
# function to denormalize
def denormalize(images, means, stds):
    means = torch.tensor(means).reshape(1, 3, 1, 1)
    stds = torch.tensor(stds).reshape(1, 3, 1, 1)
    return images * stds + means

# to show the images
def show_images(img,label):
    plt.figure(figsize = [20,14])
    for i in range(25):
        plt.subplot(5,5,i+1)
        img[i] = denormalize(img[i], *imagenet_stats)
        plt.imshow(img[i].permute(1,2,0))
        plt.title(classes[label[i]])
        plt.axis("off")
    plt.show()

In [None]:
# one batch 
images,labels = iter(train_dl).next()

print(images.shape)

In [None]:
# show the images
show_images(images,labels)

# **MODEL**

In [None]:
# A class we can extend to use in our model.

class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}],{} train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch+1, "last_lr: {:.5f},".format(result['lrs'][-1]) if 'lrs' in result else '', 
            result['train_loss'], result['val_loss'], result['val_acc']))

        
        
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [None]:
# build the model using a pre-trained model and changing the last layer

from torchvision import models

class ButterFlyModel(ImageClassificationBase):
    def __init__(self, num_classes, pretrained=True):
        super().__init__()
        # Use a pretrained model
        self.network = models.googlenet(pretrained=True)
        # Replace last layer
        self.network.fc = nn.Linear(self.network.fc.in_features, num_classes)

    def forward(self, xb):
        return self.network(xb)

### To use a GPU

In [None]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [None]:
device = get_default_device()
device

In [None]:
train_dl = DeviceDataLoader(train_dl, device)
val_dl = DeviceDataLoader(val_dl, device)
test_dl = DeviceDataLoader(test_dl,device)

## **Training the Model**

In [None]:
import torch
from tqdm.notebook import tqdm

# for evaluation
@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

# training
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase
        model.train()
        train_losses = []
        for batch in tqdm(train_loader):
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    return history

# get the learning rate
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

# releasing resources after one epoch
def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader,
                  weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD):
    torch.cuda.empty_cache()
    history = []

    # Set up custom optimizer with weight decay
    optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)
    # Set up one-cycle learning rate scheduler
    sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs,
                                                steps_per_epoch=len(train_loader))

    for epoch in range(epochs):
        # Training Phase
        model.train()
        train_losses = []
        lrs = []
        for batch in tqdm(train_loader):
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()

            # Gradient clipping
            if grad_clip:
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)

            optimizer.step()
            optimizer.zero_grad()

            # Record & update learning rate
            lrs.append(get_lr(optimizer))
            sched.step()

        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [None]:
model = ButterFlyModel(len(train_ds.classes))
to_device(model,device)
print("Model Built..")

In [None]:
# score of the model before the training process

history = [evaluate(model, val_dl)]
history

In [None]:
# defining parameters for the model
epochs = 15
max_lr = 0.01
grad_clip = 0.1
weight_decay = 1e-4

opt_func = torch.optim.Adam

**Training**

In [None]:
print("Starting Training .. ..")
start = time.time()
history += fit_one_cycle(epochs, max_lr, model, train_dl, val_dl, 
                         grad_clip=grad_clip, 
                         weight_decay=weight_decay, 
                         opt_func=opt_func)
end = time.time()
print(f"Finished training in {end-start} seconds..")

## **Testing**

In [None]:
evaluate(model,test_dl)

In [None]:
accuracies = [x["val_acc"] for x in history]
val_loss = [x["val_loss"] for x in history]

In [None]:
train_loss = [x.get("train_loss") for x in history]

In [None]:
# accuracies

plt.plot(accuracies,marker = "*",c = "green")
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('Accuracy vs. No. of epochs');

In [None]:
# losses

plt.plot(train_loss, '-bx')
plt.plot(val_loss, '-rx')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['Training', 'Validation'])
plt.title('Loss vs. No. of epochs');

In [None]:
# learning rates

lrs = np.concatenate([x.get('lrs', []) for x in history])
plt.plot(lrs)
plt.xlabel('Batch no.')
plt.ylabel('Learning rate')
plt.title('Learning Rate vs. Batch no.');

## Predictions v/s labels

In [None]:
# function to get prediction and labels
def makePrediction(dataset, num_images = 25):
    predicted = []
    actual = []
    for i in range(num_images):
        # getting label and image
        img, label = dataset[i]
        actual.append(label)
        
        # making prediction
        img_batched = to_device(img.unsqueeze(0),device)
        
        _,pred = torch.max(model(img_batched), dim = 1)
        pred = pred[0].item()
        predicted.append(pred)
        
    return predicted,actual

# function to denormalize and permute
def denPermute_test(images, means, stds):
    means = torch.tensor(means).reshape(3, 1, 1)
    stds = torch.tensor(stds).reshape(3, 1, 1)
    images = images * stds + means
    return images.permute(1,2,0)

In [None]:
pred,actual = makePrediction(test_ds,len(test_ds))

In [None]:
def plotPredictions(testData, num=25):
    plt.figure(figsize=[22,14])
    for i in range(num):
        plt.subplot(5,5,i+1)
        image,_ = testData[i]
        image = denPermute_test(image, *imagenet_stats)
        plt.imshow(image)
        plt.xlabel(f"Actual : {classes[actual[i]]}")
        plt.ylabel(f"Pred : {classes[pred[i]]}")
        plt.xticks([])
        plt.yticks([])
    plt.show()

In [None]:
plotPredictions(test_ds)

## Confusion matrix

In [None]:
plt.figure(figsize=[25,14])
sns.heatmap(confusion_matrix(pred,actual),annot=True, fmt = "d" ,cmap = "Blues");

### **Classification Report**

In [None]:
print(classification_report(pred,actual))

## **SUMMARY**
- A classification task on a dataste containing images of different species of butterflies.
- Prepare data into train,test and validation part using pytorch's built in modules.
- Performinng data augmentation to potentially improve the performance of our model.
- Defining classes and functions for ease of visulaizing the model imporovemnts.
- Using a pre-trained model for better scores after trying CNNs and Simple Neural Nets.
- Testing and visuakizing the results.
- Dataset at : https://www.kaggle.com/gpiosenka/butterfly-images40-species
- Helper functions from : https://jovian.ai/learn/deep-learning-with-pytorch-zero-to-gans