In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from PIL import Image
from random import randint
import pandas as pd
import time
import os
# import utils

classes = {"Black-naped Oriole": 0,
              "Brown-throated Sunbird": 1,
              "Collared Kingfisher": 2, 
              "Javan Myna": 3, 
              "Olive-backed Sunbird": 4, 
              "Pink-necked Green Pigeon": 5, 
              "Spotted Dove": 6, 
              "Striated Heron": 7, 
              "White-breasted Waterhen": 8, 
              "Yellow-vented Bulbul": 9}
label_folder_name = 'labels/bboxes_after_split/'
dataset_folder_name = 'Cropped/'

In [64]:
def load_data_and_label(csv):
    convert_tensor = transforms.ToTensor()

    all_data = None
    all_label = torch.Tensor()
    labels = []

    df = pd.read_csv(label_folder_name + csv)
    df = df.drop_duplicates(subset=['image_name'])
    train_filenames = df['image_name']

    for index, row in df.iterrows():
        filename = row['image_name']
        
        # Remove transparency layer
        img = Image.open(dataset_folder_name + filename).convert('RGBA')
        background = Image.new('RGBA', img.size, (255,255,255))
        alpha_composite = Image.alpha_composite(background, img)
        img = alpha_composite.convert('RGB')
        
        data = convert_tensor(img)        
        data = torch.unsqueeze(data, 0)

        if all_data == None:
            all_data = data
        else:
            all_data = torch.cat((all_data, data), 0)

        label_name = row['label']
        label_id = classes[label_name]
        labels.append(label_id)

    all_label = torch.Tensor(labels).long()
    return all_data, all_label
    

In [65]:
train_data, train_label = load_data_and_label('train_bbs.csv')
print(train_data.size())
print(train_label.size())
print(train_label.type())

val_data, val_label = load_data_and_label('val_bbs.csv')
print(val_data.size())
print(val_label.size())
print(val_label.type())

torch.Size([700, 3, 224, 224])
torch.Size([700])
torch.LongTensor
torch.Size([150, 3, 224, 224])
torch.Size([150])
torch.LongTensor


In [66]:
def get_error(scores, labels):
    bs = scores.size(0)
    predicted_labels = scores.argmax(dim = 1)
    indicator = (predicted_labels == labels)
    num_matches = indicator.sum()
    
    return 1 - num_matches.float() / bs   

In [68]:
class one_layer_net(nn.Module):

    def __init__(self, input_size, output_size):
        super(one_layer_net , self).__init__()
        self.linear_layer = nn.Linear( input_size, output_size , bias=False)
        
    def forward(self, x):
        scores = self.linear_layer(x)
        return scores
    
net = one_layer_net(3 * 224 * 224, 10)

print(net)
# utils.display_num_param(net)

criterion = nn.CrossEntropyLoss()
batch_size = 10

def eval_on_test_set():

    running_error=0
    num_batches=0

    for i in range(0, 150, batch_size):

        minibatch_data =  val_data[i : i + batch_size]
        minibatch_label = val_label[i : i + batch_size]

        inputs = minibatch_data.view(batch_size, 3 * 224 * 224)

        scores = net(inputs) 

        error = get_error(scores , minibatch_label)

        running_error += error.item()

        num_batches += 1


    total_error = running_error / num_batches
    print( 'test error  = ', total_error*100 ,'percent')
    
start = time.time()

lr = 0.05 # initial learning rate

for epoch in range(200):
    
    # learning rate strategy : divide the learning rate by 1.5 every 10 epochs
    if epoch % 5 == 0 and epoch > 5: 
        lr = lr / 1.5
    
    # create a new optimizer at the beginning of each epoch: give the current learning rate.   
    optimizer=torch.optim.SGD(net.parameters() , lr = lr)
        
    running_loss, running_error, num_batches = 0, 0, 0
    
    shuffled_indices = torch.randperm(700)
 
    for count in range(0, 700, batch_size):
        
        # forward and backward pass
    
        optimizer.zero_grad()
        
        indices = shuffled_indices[count : count + batch_size]
        minibatch_data = train_data[indices]
        minibatch_label = train_label[indices]

        inputs = minibatch_data.view(batch_size, 3 * 224 * 224)

        inputs.requires_grad_()

        scores=net( inputs ) 

        loss =  criterion( scores , minibatch_label) 
        
        loss.backward()

        optimizer.step()
        
        
        # compute some stats
        
        running_loss += loss.detach().item()
               
        error = get_error( scores.detach() , minibatch_label)
        running_error += error.item()
        
        num_batches += 1
    
    
    # once the epoch is finished we divide the "running quantities"
    # by the number of batches
    
    total_loss = running_loss / num_batches
    total_error = running_error / num_batches
    elapsed_time = time.time() - start
    
    # every 10 epoch we display the stats 
    # and compute the error rate on the test set  
    
    if epoch % 10 == 0 : 
    
        print(' ')
        
        print('epoch=',epoch, ' time=', elapsed_time,
              ' loss=', total_loss , ' error=', total_error*100 ,'percent lr=', lr)
        
        eval_on_test_set()
        



one_layer_net(
  (linear_layer): Linear(in_features=150528, out_features=10, bias=False)
)
 
epoch= 0  time= 0.45108795166015625  loss= 727.3730938434601  error= 88.28571370669773 percent lr= 0.05
test error  =  85.99999944368997 percent
 
epoch= 10  time= 4.8259477615356445  loss= 114.09992032732282  error= 49.999999914850505 percent lr= 0.03333333333333333
test error  =  88.66666595141093 percent
 
epoch= 20  time= 9.18831205368042  loss= 9.049389348564103  error= 14.5714294058936 percent lr= 0.014814814814814815
test error  =  82.66666571299235 percent
 
epoch= 30  time= 13.54047679901123  loss= 1.184341823510139  error= 3.8571432658604214 percent lr= 0.006584362139917695
test error  =  75.99999984105428 percent
 
epoch= 40  time= 17.900568962097168  loss= 0.40374792305843543  error= 1.7142859527042933 percent lr= 0.0029263831732967535
test error  =  79.3333331743876 percent
 
epoch= 50  time= 22.27194094657898  loss= 0.1689044158476295  error= 0.7142858845846993 percent lr= 0.00130

In [None]:
# choose a picture at random
idx=randint(0, 10000-1)
im=test_data[idx]

# diplay the picture
utils.show(im)

# feed it to the net and display the confidence scores
scores =  net( im.view(1,784)) 
probs= torch.softmax(scores, dim=1)
utils.show_prob_mnist(probs)