# Proof of concept in torch

## What we know
* Images have different size 
    -fully convolutional network or resize
    - what to do with batch size
* Extra images has no labels such as right, left and top
* Image position asinput or axuiliary output 
* Possibly use CapsNet

In [109]:
import torch 
import torch.utils.data as torch_data
import torch.nn as nn
import torch.optim as optim

import torchvision.io as io
import torchvision.transforms as T

import pandas as pd
import os

import numpy as np
from tqdm import tqdm

In [58]:
# Loading data
TRAIN_CSV_PATH = "../data/train.csv"
TEST_CSV_PATH = "../data/test.csv"
EXTRA_IMAGES_PATH = "../data/extra_images.csv"
SAMPLE_SUBMISSION_PATH = "../data/sample_submission.csv"
IMAGES_PATH = "../data/images"

In [5]:
# Label map
train_csv = pd.read_csv(TRAIN_CSV_PATH)
# test_csv  = pd.read_csv(TEST_CSV_PATH)
extra_images = pd.read_csv(EXTRA_IMAGES_PATH)

all_imgs = pd.concat([train_csv, extra_images])

In [6]:
unique_ids = set(all_imgs.turtle_id)
len(unique_ids)
# filtered_set = [x for x in unique_ids if not np.isnan(x)]
all_labels = list(unique_ids)
all_labels.append("new_turtle")
len(all_labels)

2266

In [5]:
# 't_id_WLAIJIZ0' in unique_ids
# str('nan') in unique_ids

In [40]:
# str('nan')
def get_target_by_turtle_id(turtle_id):
    idx = all_labels.index(turtle_id)
    return get_target_vector(idx)


def get_target_vector(idx):
    vec = torch.tensor(np.zeros(2266))
    vec[idx] = 1
    return vec

In [92]:
class TurtleDataset(torch_data.Dataset):
    
    def __init__(self, img_labels, img_dir, 
                 img_transform = None, transform_target = None):
        self.img_labels = pd.read_csv(img_labels)
        self.img_dir = img_dir
        self.img_transform = img_transform
        self.transform_target = transform_target
   
    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self, idx):
        # Image path
        img_id = self.img_labels.iloc[idx, 0] # First col
        img_filename = img_id + ".JPG"
        img_path = os.path.join(self.img_dir, img_filename)
        
        # Reading an image
        image = io.read_image(img_path)
        image_location = self.img_labels.iloc[idx, 1]
        turtle_id = self.img_labels.iloc[idx, 2]
        
        if self.img_transform:
            image = self.img_transform(image)
        if self.transform_target:
            label = self.transform_target(label)
        
        return (image.to(torch.float) / 255).cuda(), image_location, get_target_by_turtle_id(turtle_id).cuda()        

In [93]:
transform = T.Compose([
    T.Resize((250, 250))
])

turtle_dataset = TurtleDataset(
    img_labels = TRAIN_CSV_PATH,
    img_dir = IMAGES_PATH,
    img_transform = transform
)

In [94]:
img, pos, lbl = turtle_dataset[0]

In [95]:
img.dtype

torch.float32

In [121]:
train_turtle_dl = torch_data.DataLoader(turtle_dataset, batch_size=32, shuffle=True, drop_last=True)

## Sample architecture

In [122]:
turtle_net_1 = nn.Sequential(
        nn.Conv2d(3, 10, (3, 3)),
        nn.ReLU(),
        nn.MaxPool2d(3, stride = 2),  
        nn.Conv2d(10, 100, (3, 3), stride=2),
        nn.ReLU(),
        nn.MaxPool2d(3, stride = 2),
        nn.Conv2d(100, 1000, (3, 3), stride=2),
        nn.ReLU(),
        nn.MaxPool2d(3, stride = 2),
        nn.Conv2d(1000, 1500, (3, 3), stride=2),
        nn.ReLU(),
        nn.Conv2d(1500, 2266, (2, 2), stride=2),
        nn.Softmax(1)
    ).cuda()

In [123]:
img, pos, turtle_id = next(iter(train_turtle_dl))

In [62]:
# sample_tensor = sample_batch[0]

In [124]:
# torch.cuda.empty_cache()
out = turtle_net_1(sample_img)

In [125]:
out.squeeze().shape
# get_target_by_turtle_id('t_id_F5eaYH2N')
# get_target_by_turtle_id('new_turtle')

torch.Size([32, 2266])

In [31]:
# torch.cuda.empty_cache()
# del turtle_net_1

In [126]:
# Training loop

optimizer = optim.Adam(turtle_net_1.parameters())
loss_fun = nn.CrossEntropyLoss()

epochs = 5

In [128]:
for e in range(epochs):
    train_loss = 0.0
    for img, pos, turtle_id in tqdm(train_turtle_dl):
        
        # Clear the gradients
        optimizer.zero_grad()
        # Forward Pass
        target = turtle_net_1(img).squeeze()
        # Find the Loss
        # print(target.shape, turtle_id.shape)
        loss = loss_fun(target, turtle_id)
        # Calculate gradients 
        loss.backward()
        # Update Weights
        optimizer.step()
        # Calculate Loss
        train_loss += loss.item()
    
    print(f'Epoch {e+1} \t\t Training Loss: {train_loss / len(train_turtle_dl)}')

100%|██████████| 67/67 [00:31<00:00,  2.13it/s]


Epoch 1 		 Training Loss: 7.690615419131606


100%|██████████| 67/67 [00:31<00:00,  2.12it/s]


Epoch 2 		 Training Loss: 7.690615419131606


100%|██████████| 67/67 [00:31<00:00,  2.16it/s]


Epoch 3 		 Training Loss: 7.690615419131606


100%|██████████| 67/67 [00:32<00:00,  2.08it/s]


Epoch 4 		 Training Loss: 7.690615419131606


100%|██████████| 67/67 [00:33<00:00,  2.02it/s]

Epoch 5 		 Training Loss: 7.690615419131606





In [114]:
nn.ReLU()

ReLU()

In [102]:
optimizer.zero_grad()
# Forward Pass
target = turtle_net_1(img).squeeze()
# Find the Loss
loss = loss_fun(target, turtle_id)
# Calculate gradients 
loss.backward()
# Update Weights
optimizer.step()
# Calculate Loss
# train_loss += loss.item()