<a href="https://colab.research.google.com/github/kmachina/cv_course/blob/master/leaf_classify.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Kaggle Setup

In [0]:
import os

os.environ['KAGGLE_USERNAME'] = 
os.environ['KAGGLE_KEY'] = 
CONTEST_NAME = 'leaf-classification'

!pip install --quiet --upgrade --force-reinstall --no-deps kaggle
!kaggle competitions download -q -c {CONTEST_NAME}
!unzip -q {CONTEST_NAME}.zip && rm -f {CONTEST_NAME}.zip
!unzip -q '*.zip' && rm -f *.zip

[?25l[K     |█████▋                          | 10kB 21.3MB/s eta 0:00:01[K     |███████████▎                    | 20kB 1.8MB/s eta 0:00:01[K     |█████████████████               | 30kB 2.4MB/s eta 0:00:01[K     |██████████████████████▌         | 40kB 1.7MB/s eta 0:00:01[K     |████████████████████████████▏   | 51kB 2.0MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 1.9MB/s 
[?25h  Building wheel for kaggle (setup.py) ... [?25l[?25hdone

4 archives were successfully processed.


## Import Libraries

In [0]:
from __future__ import print_function, division

import torch
import torch.nn.functional as F
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms

from IPython.core.debugger import set_trace
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Dataset

In [0]:
class LeafDataset(Dataset):
    def __init__(self, dataframe, img_tsfrm):
        self.df = dataframe
        self.img_dir = Path('/content/images')
        self.img_transforms = img_tsfrm

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.img_dir / f'{self.df.index[idx]}.jpg'
        image = self.img_transforms(Image.open(img_path).convert('RGB'))
        attr = torch.tensor(self.df.iloc[idx, 1:])
        label = torch.tensor(self.df.iloc[idx, 0], dtype=torch.long)

        return (image, attr), label

In [0]:
def show_tensor_image(img_tensor):
    fig, ax = plt.subplots()
    ax.set_axis_off()
    ax.imshow(img_tensor.numpy().squeeze(), cmap='binary_r')
    plt.show()

## Neural Network

In [0]:
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(3, 8, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(8, 32, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Flatten()
        )

        self.fc = nn.Sequential(
            nn.Linear(90080, 1000),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1000, 99)
        )
    
    def forward(self, xb):
        x, attr = xb
        conv_x = self.conv(x)
        x = torch.cat((conv_x, attr), 1)
        output = self.fc(x)

        return output

In [0]:
class FCNet(nn.Module):
    def __init__(self):
        super().__init__()
        input_size, output_size = 192, 10
        layers_size = 100

        # self.inlayer = nn.Linear(input_size, layers_size)

        # self.linear = nn.Sequential(
        #     nn.Linear(layers_size, layers_size),
        #     nn.ReLU(),
        #     nn.Dropout()
        # )

        # self.outlayer = nn.Linear(layers_size, output_size)
        self.testlayer = nn.Linear(input_size, output_size)

    def forward(self, xb):
        _, x = xb
        # x = self.inlayer(x)
        # x = self.linear(x)
        # output = self.outlayer(x)
        output = self.testlayer(x)

        return output

In [0]:
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.xavier_normal_(m.weight)
        m.bias.data.fill_(1)
    elif type(m) == nn.Conv2d:
        nn.init.xavier_normal_(m.weight.data)

## Model Configurations

### Data Loader

In [0]:
def get_data():
    # Get raw dataframe
    df = pd.read_csv('/content/train.csv', index_col='id')
    df['species'] = df['species'].astype('category').cat.codes

    # Get dataframes
    train_df = df.sample(frac=0.8,random_state=0)
    valid_df = df.drop(train_df.index)
    test_df = pd.read_csv('/content/test.csv', index_col='id')
    dataframes = {
        'train': train_df, 'valid': valid_df, 'test': test_df
    }

    # Batch sizes
    batch_sizes = {'train': 32, 'valid': 64, 'test': 64}

    # Data transformations
    img_transforms = {
        'train': transforms.Compose([
            # transforms.ToPILImage(),
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor()
        ]),
        'valid': transforms.Compose([
            # transforms.ToPILImage(),
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor()
        ])
    }
    img_transforms['test'] = img_transforms['valid']

    # Datasets
    datasets = {
        x: LeafDataset(dataframes[x], img_transforms[x])
        for x in ['train', 'valid', 'test']
    }

    # Data loaders
    dataloaders = {
        x: DataLoader(datasets[x], batch_sizes[x], shuffle=True)
        for x in ['train', 'valid', 'test']
    }

    return dataloaders

### Model

In [0]:
def get_model():
    # Neural network
    model = FCNet().to(device)
    
    # Loss function
    criterion = F.cross_entropy

    # Optimizer
    optimizer = optim.Adam(model.parameters(), lr=1e-2, betas = (0.5, 0.999))

    # Learning rate scheduler
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    return model, criterion, optimizer, scheduler

## Training and Testing

### Training

In [0]:
def fit(model, criterion, opt, scheduler, train_dl, valid_dl, epoch_num):
    history = []
    for epoch in range(epoch_num):
        model.train()
        for x_batch, y_batch in train_dl:
            loss_batch(model, criterion, x_batch, y_batch, opt, scheduler)

        model.eval()
        with torch.no_grad():
            losses, nums = zip(
                *[loss_batch(model, criterion, xb, yb) for xb, yb in valid_dl]
            )
        valid_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
        print(f'Epoch {epoch}/{epoch_num} --- Loss: {valid_loss:.5f}')
        history.append([epoch, valid_loss])

    return history

### Testing

In [0]:
def evaluate(model, criterion, test_dl):
    return 39

### Loss Calculation

In [0]:
def loss_batch(model, criterion, xb, yb, optimizer=None, scheduler=None):
    x_batch, y_batch = (xb[0].to(device), xb[1].to(device)), yb.to(device)
    pred = model(x_batch)
    set_trace()
    loss = criterion(pred, y_batch)

    if optimizer is not None:
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    if scheduler is not None:
        scheduler.step()

    return loss.item(), yb.size(0)

## Execution

In [0]:
dataloader = get_data()
model = get_model()
history = fit(*model, dataloader['train'], dataloader['valid'], epoch_num=400)