# Bird classifier

Trains a classifier to predict the species of birds from images.

Based on [tutorial 3](https://colab.research.google.com/drive/1EBz4feoaUvz-o_yeMI27LEQBkvrXNc_4?usp=sharing) and [tutorial 4](https://colab.research.google.com/drive/1kHo8VT-onDxbtS3FM77VImG35h_K_Lav?usp=sharing) from class.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)


In [None]:
import os

# size to resize the train images to. I tried out 128, 256
image_size = 256

# a string to append to the names of generated files, to track some of the transformations I used for image augmentation and changes in hyperparameters
transform = "crop_hflip_lr_5_01_8_001_10_0001_12_41_m_09_5_05_10_01_12_0_0d"

# make checkpoints path
checkpoints = '/kaggle/working/checkpoints/size_' + str(image_size) + "-" + transform + "-"
if not os.path.exists(checkpoints):
    os.makedirs(checkpoints)

# Getting and processing the data

`get_bird_data` takes the train and test data of the bird datasets and processes them. This is where we resize the images and apply augmentations.

In [None]:
def get_bird_data(augmentation=0):
    
    # comment out augmentations we don't want to currently use
    transform_train = transforms.Compose([
        # always performed
        transforms.Resize(image_size), 
        transforms.RandomCrop(image_size, padding=8, padding_mode='edge'), # Take 128x128 crops from padded images
        
        # tried adding different additional augmentations!
        transforms.RandomHorizontalFlip(),    # 50% of time flip image along y-axis
#         transforms.TrivialAugmentWide(),
#         transforms.RandAugment(),
#         transforms.AugMix();
        transforms.RandomVerticalFlip(),    # 50% of time flip image along x-axis
        transforms.RandomPerspective(distortion_scale=0.2, p=0.15),
#         transforms.RandomAffine(degrees=(0, 180), translate=(0.01, 0.3), scale=(0.5, 0.75)),
        transforms.RandomRotation(degrees=(0, 10)),
        transforms.RandomAutocontrast(),
        transforms.ColorJitter(brightness=.5, hue=.3, contrast=0, saturation=.05),
        transforms.ToTensor(),
    ])
    
    transform_test = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
    ])
    trainset = torchvision.datasets.ImageFolder(root='/kaggle/input/birds23sp/birds/train', transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

    testset = torchvision.datasets.ImageFolder(root='/kaggle/input/birds23sp/birds/test', transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=2)
    classes = open("/kaggle/input/birds23sp/birds/names.txt").read().strip().split("\n")
    class_to_idx = trainset.class_to_idx
    idx_to_class = {int(v): int(k) for k, v in class_to_idx.items()}
    idx_to_name = {k: classes[v] for k,v in idx_to_class.items()}
    return {'train': trainloader, 'test': testloader, 'to_class': idx_to_class, 'to_name':idx_to_name}

data = get_bird_data()

In [None]:
def accuracy(net, dataloader):
  net.to(device)
  net.eval()
  correct = 0
  total = 0
  with torch.no_grad():
      for batch in dataloader:
          images, labels = batch[0].to(device), batch[1].to(device)
          outputs = net(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()
  return correct/total

In [None]:
# view some of our images after processing. Useful for seeing the effects of augmentations

dataiter = iter(data['train'])
images, labels = next(dataiter)
images = images[:8]
print(images.size())

def imshow(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print("Labels:" + ', '.join('%9s' % data['to_name'][labels[j].item()] for j in range(8)))

In [None]:
# train function
def train(net, dataloader, epochs=1, start_epoch=0, lr=0.01, momentum=0.9, decay=0.0005, 
          verbose=1, print_every=10, state=None, schedule={}, checkpoint_path=None):
    net.to(device)
    net.train()
    losses = []
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=decay)

    # Load previous training state
    if state:
        net.load_state_dict(state['net'])
        optimizer.load_state_dict(state['optimizer'])
        start_epoch = state['epoch']
        losses = state['losses']

    # Fast forward lr schedule through already trained epochs
    for epoch in range(start_epoch):
        if epoch in schedule:
            print ("Learning rate: %f"% schedule[epoch])
            for g in optimizer.param_groups:
                g['lr'] = schedule[epoch]

    for epoch in range(start_epoch, epochs):
        sum_loss = 0.0

        # Update learning rate when scheduled
        if epoch in schedule:
            print ("Learning rate: %f"% schedule[epoch])
            for g in optimizer.param_groups:
                g['lr'] = schedule[epoch]

        for i, batch in enumerate(dataloader, 0):
            inputs, labels = batch[0].to(device), batch[1].to(device)

            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()  # autograd magic, computes all the partial derivatives
            optimizer.step() # takes a step in gradient direction

            losses.append(loss.item())
            sum_loss += loss.item()

            if i % print_every == print_every-1:    # print every 10 mini-batches
                if verbose:
                  print('[%d, %5d] loss: %.3f' % (epoch, i + 1, sum_loss / print_every))
                sum_loss = 0.0
        if checkpoint_path:
            state = {'epoch': epoch+1, 'net': net.state_dict(), 'optimizer': optimizer.state_dict(), 'losses': losses}
            torch.save(state, checkpoint_path + 'checkpoint-%d.pkl'%(epoch+1))
    return losses

In [None]:
def smooth(x, size):
  return np.convolve(x, np.ones(size)/size, mode='valid')

In [None]:
# prediction function
def predict(net, dataloader, ofname):
    out = open(ofname, 'w')
    out.write("path,class\n")
    net.to(device)
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for i, (images, labels) in enumerate(dataloader, 0):
            if i%100 == 0:
                print(i)
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            fname, _ = dataloader.dataset.samples[i]
            out.write("test/{},{}\n".format(fname.split('/')[-1], data['to_class'][predicted.item()]))
    out.close()

# Training

Use ResNet-18 for pretraining and finetune on the bird train data. We can add more epochs and adjust hyperparameters here.

In [None]:
resnet = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True)
resnet.fc = nn.Linear(512, 555) # This will reinitialize the layer as well

# comment out training functions we don't want to use currently

# losses = train(resnet, data['train'], epochs=5, lr=.01, print_every=10, checkpoint_path=checkpoints)
losses = train(resnet, data['train'], decay=0, epochs=5, lr=.01, print_every=10, checkpoint_path=checkpoints)

In [None]:
resnet = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True)
resnet.fc = nn.Linear(512, 555) # This will reinitialize the layer as well
state = torch.load(checkpoints + 'checkpoint-5.pkl')
# losses = train(resnet, data['train'], epochs=12, decay=0, schedule={0:.01, 8:.001, 10:0.0001}, lr=.01, print_every=10, checkpoint_path=checkpoints, state=state)
# losses = train(resnet, data['train'], epochs=10, momentum=0.5, schedule={0:.01, 8:.001}, lr=.01, print_every=10, checkpoint_path=checkpoints, state=state)
losses = train(resnet, data['train'], epochs=10, decay=0, momentum=0.5, schedule={0:.01, 8:.001}, lr=.01, print_every=10, checkpoint_path=checkpoints, state=state)

# losses = train(resnet, data['train'], epochs=7, schedule={0:.01, 4:.001}, lr=.01, print_every=10, checkpoint_path=checkpoints, state=state)
# losses = train(resnet, data['train'], epochs=20, schedule={0:.01, 8:.001}, lr=.01, print_every=10, checkpoint_path=checkpoints, state=state)

In [None]:
resnet = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True)
resnet.fc = nn.Linear(512, 555) # This will reinitialize the layer as well
state = torch.load(checkpoints + 'checkpoint-10.pkl')
# losses = train(resnet, data['train'], epochs=12, momentum=0.1, lr=.0001, print_every=10, checkpoint_path=checkpoints, state=state)
# losses = train(resnet, data['train'], epochs=12, decay=0, momentum=0.1, lr=.0001, print_every=10, checkpoint_path=checkpoints, state=state)
losses = train(resnet, data['train'], epochs=12, decay=0, momentum=0.1, lr=.0001, print_every=10, checkpoint_path=checkpoints, state=state)

In [None]:
resnet = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True)
resnet.fc = nn.Linear(512, 555) # This will reinitialize the layer as well
state = torch.load(checkpoints + 'checkpoint-12.pkl') # change number for how many epochs we used
losses = train(resnet, data['train'], epochs=16, decay=0, momentum=0, lr=.00001, print_every=10, checkpoint_path=checkpoints, state=state)

In [None]:
# save a plot of the losses
plt.plot(smooth(losses,50))
plt.title(label="Losses for size " + str(image_size) + transform)
plt.savefig(checkpoints + 'train_losses.png')

In [None]:
# save a plot of the losses
state = torch.load(checkpoints + 'checkpoint-16.pkl') # change num for epochs used
plt.plot(smooth(state['losses'], 50))
plt.title(label="State losses for " + str(image_size) + transform)
plt.savefig(checkpoints + 'train_state.png')

# Load model from checkpoint
resnet = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True)
resnet.fc = nn.Linear(512, 555) # This will reinitialize the layer as well
state = torch.load(checkpoints + 'checkpoint-16.pkl') # change num for epochs used
resnet.load_state_dict(state['net'])

# make predictions
predict(resnet, data['test'], checkpoints + "preds_augmented.csv")

print("Training  accuracy: %f" % accuracy(resnet, data['train']))