In [1]:
import torch
import numpy as np

# check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

CUDA is available!  Training on GPU ...


In [2]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import pandas as pd

from sklearn.preprocessing import LabelEncoder

%matplotlib inline

In [3]:
df = pd.read_csv("../input/100-bird-species/birds/birds.csv")
df["filepaths"] = df["filepaths"].str.replace("\\", "/", regex=False)
prefix = "../input/100-bird-species/birds/"
df["filepaths"] = prefix + df["filepaths"]
le = LabelEncoder()
df["y"] = le.fit_transform(df["labels"])
df.head()

Unnamed: 0.1,Unnamed: 0,filepaths,labels,data set,y
0,0,../input/100-bird-species/birds/train/AFRICAN ...,AFRICAN CROWNED CRANE,train,0
1,1,../input/100-bird-species/birds/train/AFRICAN ...,AFRICAN CROWNED CRANE,train,0
2,2,../input/100-bird-species/birds/train/AFRICAN ...,AFRICAN CROWNED CRANE,train,0
3,3,../input/100-bird-species/birds/train/AFRICAN ...,AFRICAN CROWNED CRANE,train,0
4,4,../input/100-bird-species/birds/train/AFRICAN ...,AFRICAN CROWNED CRANE,train,0


In [4]:
df["data set"].value_counts(), df["labels"].value_counts()

(train    39364
 valid     1375
 test      1375
 Name: data set, dtype: int64,
 HOUSE FINCH          259
 OVENBIRD             243
 D-ARNAUDS BARBET     243
 SWINHOES PHEASANT    227
 WOOD DUCK            224
                     ... 
 SCARLET MACAW        115
 FRIGATE              115
 GRAY PARTRIDGE       113
 RED HEADED DUCK      113
 BULWERS PHEASANT     113
 Name: labels, Length: 275, dtype: int64)

In [53]:
BATCH_SIZE = 20
from torch.utils.data import Dataset, DataLoader

class Data(Dataset):
    def __init__(self, df: pd.DataFrame) -> None:
        self.files = df["filepaths"].values
        self.y = df["y"].values
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, i):
        return torchvision.io.read_image(self.files[i]) / 255.0, self.y[i]
    
train_ds = Data(df[df["data set"]=="train"])
valid_ds = Data(df[df["data set"]=="valid"])
test_ds = Data(df[df["data set"]=="test"])

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE)
valid_loader = DataLoader(valid_ds, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE)

In [67]:
import os

classes = os.listdir("../input/100-bird-species/birds/train")
classes.sort()
classes
len(classes)

275

In [54]:
import torch.nn as nn
import torch.nn.functional as F

class VGG11(nn.Module):
    def __init__(self, in_channels = 3, num_classes=275):
        super(VGG11, self).__init__()
        self.in_channels = in_channels
        self.num_classes = num_classes
        # convolutional layers 
        self.conv_layers = nn.Sequential(
            nn.Conv2d(self.in_channels, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        # fully connected linear layers
        self.linear_layers = nn.Sequential(
            nn.Linear(in_features=512*7*7, out_features=4096),
            nn.ReLU(),
            nn.Dropout2d(0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Dropout2d(0.5),
            nn.Linear(in_features=4096, out_features=self.num_classes)
        )
    def forward(self, x):
        x = self.conv_layers(x)
        # flatten to prepare for the fully connected layers
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x

**Weight Initialisation with xavier_uniform**

In [55]:
def weights_init(m):
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform_(m.weight.data)
        torch.nn.init.zeros_(m.bias.data)


In [56]:
model = VGG11()

model.apply(weights_init)
print(model)

# move tensors to GPU if CUDA is available
if train_on_gpu:
    model.cuda()

VGG11(
  (conv_layers): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU()
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU()
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (16): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), paddin

**Usin Adam gradient descent in place of SGD**

In [57]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=0.01,betas=(0.9, 0.999), eps=1e-08)

In [59]:
import torchvision
n_epochs = 10

valid_loss_min = np.Inf

for epoch in range(1,n_epochs+1):
    train_loss = 0
    valid_loss = 0
    
    model.train()
    for data, target in train_loader:
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
            
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()*data.size(0)
        
    model.eval()
    for data, target in valid_loader:
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
            
            
        output = model(data)
        loss = criterion(output,target)
        valid_loss += loss.item()*data.size(0)
    
    train_loss = train_loss/len(train_loader.dataset)
    valid_loss = valid_loss/len(valid_loader.dataset)
        
    
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch, train_loss, valid_loss))
    
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(model.state_dict(), 'model_cifar.pt')
        valid_loss_min = valid_loss

Epoch: 1 	Training Loss: 5.913771 	Validation Loss: 5.637714
Validation loss decreased (inf --> 5.637714).  Saving model ...
Epoch: 2 	Training Loss: 4307.893608 	Validation Loss: 5.653804
Epoch: 3 	Training Loss: 37.023719 	Validation Loss: 5.661044
Epoch: 4 	Training Loss: 6.488999 	Validation Loss: 5.663310
Epoch: 5 	Training Loss: 15.408896 	Validation Loss: 5.663910
Epoch: 6 	Training Loss: 20505993.778022 	Validation Loss: 5.664078
Epoch: 7 	Training Loss: 1635849.075546 	Validation Loss: 5.664132
Epoch: 8 	Training Loss: 5.957076 	Validation Loss: 5.664150
Epoch: 9 	Training Loss: 5.957089 	Validation Loss: 5.664155
Epoch: 10 	Training Loss: 5.957093 	Validation Loss: 5.664156


In [60]:
from tqdm import tqdm
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = model.eval().to(device)

y_preds = []
ys = []
for x, y in tqdm(test_loader):
    y_preds.append(model(x.to(device)).argmax(dim=-1))
    ys.append(y.to(device))
    
(torch.cat(y_preds) == torch.cat(ys)).float().mean()

100%|██████████| 69/69 [00:06<00:00, 10.44it/s]


tensor(0.0036, device='cuda:0')

In [68]:
model.eval()
class_correct = list(0. for i in range(275))
class_total = list(0. for i in range(275))

test_loss = 0.0
for images,labels in test_loader:
    if device:
        images,labels = images.cuda(),labels.cuda()
    out = model(images)
    loss = criterion(out,labels)

    test_loss += loss.item() * images.size(0)


    _,pred = torch.max(out,1)

    correct_tensor = pred.eq(labels.data.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy()) if not device else np.squeeze(correct_tensor.cpu().numpy())

    for i in range(labels.shape[0]):
        tar = labels[i].data

        class_correct[tar] += correct[i].item()
        class_total[tar] += 1

test_loss /= len(test_loader.dataset)
print("Test Loss: {:0.6f}".format(test_loss))

for i in range(275):
    if class_total[i] > 0:
        print("Test Accuracy of {}: {}% \t ({}/{})".format(
            classes[i], 100* class_correct[i]/class_total[i],np.sum(class_correct[i]),np.sum(class_total[i])
        ))

    else:
        print("Test Accuracy of {}: N/A (Not Available)".format(classes[i]))
    

print("\n Average Test Accuracy: {}% \t ({}/{})".format(
    100*np.sum(class_correct)/np.sum(class_total),np.sum(class_correct),np.sum(class_total)))

Test Loss: 5.664156
Test Accuracy of AFRICAN CROWNED CRANE: 0.0% 	 (0.0/5.0)
Test Accuracy of AFRICAN FIREFINCH: 0.0% 	 (0.0/5.0)
Test Accuracy of ALBATROSS: 0.0% 	 (0.0/5.0)
Test Accuracy of ALEXANDRINE PARAKEET: 0.0% 	 (0.0/5.0)
Test Accuracy of AMERICAN AVOCET: 0.0% 	 (0.0/5.0)
Test Accuracy of AMERICAN BITTERN: 0.0% 	 (0.0/5.0)
Test Accuracy of AMERICAN COOT: 0.0% 	 (0.0/5.0)
Test Accuracy of AMERICAN GOLDFINCH: 0.0% 	 (0.0/5.0)
Test Accuracy of AMERICAN KESTREL: 0.0% 	 (0.0/5.0)
Test Accuracy of AMERICAN PIPIT: 0.0% 	 (0.0/5.0)
Test Accuracy of AMERICAN REDSTART: 0.0% 	 (0.0/5.0)
Test Accuracy of ANHINGA: 0.0% 	 (0.0/5.0)
Test Accuracy of ANNAS HUMMINGBIRD: 0.0% 	 (0.0/5.0)
Test Accuracy of ANTBIRD: 0.0% 	 (0.0/5.0)
Test Accuracy of ARARIPE MANAKIN: 0.0% 	 (0.0/5.0)
Test Accuracy of ASIAN CRESTED IBIS: 0.0% 	 (0.0/5.0)
Test Accuracy of BALD EAGLE: 0.0% 	 (0.0/5.0)
Test Accuracy of BALI STARLING: 0.0% 	 (0.0/5.0)
Test Accuracy of BALTIMORE ORIOLE: 0.0% 	 (0.0/5.0)
Test Accuracy of 