# Before begins

This notebook is written in google colab.

To see some interactive plots, please enter the colab link Below.

<a href="https://colab.research.google.com/drive/1iNMV8kik9ue6sy8DPgG73zT7wO_pQKtB?usp=sharing" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab"/></a>

There are many notebooks similar to this for various competitions, so check the github address below

<img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" width=50 align='left' alt="Open in Colab" /></a>
&nbsp; <font size="5">[Github: Kaggle-Notebook](https://github.com/JayAhn0104/Kaggle-Notebook)</font>

# Overview

<br>

## Competition description

<img src="https://drive.google.com/uc?export=view&id=1YzoUGRB3lgAZtEt-ImoMBDt-i6RzHLVX" width=40 align='left' alt="Open in Colab"/></a>
&nbsp; 
<font size="5">[Dog Breed Identification](https://www.kaggle.com/c/dog-breed-identification/overview)</font>

- Problem type: Multi-class classification for image data (with 3 channels)
  - Predicting the breed (120 classes) of dogs in the images
- Evaluation metric: [Multi Class Log Loss](https://www.kaggle.com/c/dog-breed-identification/overview/evaluation)



# 0. Preliminaries

### > Install Libraries

In [None]:
%%bash

pip install --upgrade --force-reinstall --no-deps kaggle

In [None]:
import collections
import math 
import shutil
import os
import torch
import torchvision
from torch import nn
import numpy as np
%load_ext tensorboard

# 1. Data Preprocessing

## 1-1. Load Data-set

### > Re-organize data files

In [None]:
def copyfile(filename, target_dir):
    """Copy a file into a target directory.
    Defined in :numref:`sec_kaggle_cifar10`"""
    os.makedirs(target_dir, exist_ok=True)
    shutil.copy(filename, target_dir)

def read_csv_labels(fname):
    """Read `fname` to return a filename to label dictionary.
    Defined in :numref:`sec_kaggle_cifar10`"""
    with open(fname, 'r') as f:
        # Skip the file header line (column name)
        lines = f.readlines()[1:]
    tokens = [l.rstrip().split(',') for l in lines]
    return dict(((name, label) for name, label in tokens))

def reorg_train_valid(data_dir, labels, valid_ratio):
    """Split the validation set out of the original training set.
    Defined in :numref:`sec_kaggle_cifar10`"""
    # The number of examples of the class that has the fewest examples in the
    # training dataset
    n = collections.Counter(labels.values()).most_common()[-1][1]
    # The number of examples per class for the validation set
    n_valid_per_label = max(1, math.floor(n * valid_ratio))
    label_count = {}
    for train_file in os.listdir(os.path.join(data_dir, 'train')):
        label = labels[train_file.split('.')[0]]
        fname = os.path.join(data_dir, 'train', train_file)
        copyfile(fname, os.path.join(data_dir, 'train_valid_test',
                                     'train_valid', label))
        if label not in label_count or label_count[label] < n_valid_per_label:
            copyfile(fname, os.path.join(data_dir, 'train_valid_test',
                                         'valid', label))
            label_count[label] = label_count.get(label, 0) + 1
        else:
            copyfile(fname, os.path.join(data_dir, 'train_valid_test',
                                         'train', label))
    return n_valid_per_label

def reorg_test(data_dir):
    """Organize the testing set for data loading during prediction.
    Defined in :numref:`sec_kaggle_cifar10`"""
    for test_file in os.listdir(os.path.join(data_dir, 'test')):
        copyfile(os.path.join(data_dir, 'test', test_file),
                 os.path.join(data_dir, 'train_valid_test', 'test',
                              'unknown'))        

In [None]:
def reorg_dog_data(data_dir, valid_ratio):
    labels = read_csv_labels(os.path.join(data_dir, 'labels.csv'))
    reorg_train_valid(data_dir, labels, valid_ratio)
    reorg_test(data_dir)

data_dir = os.path.join('/kaggle/input/dog-breed-identification')
valid_ratio = 0.1
reorg_dog_data(data_dir, valid_ratio)

### > Check sample images

In [None]:
from PIL import Image
folder_path = os.path.join(data_dir, 'train')
image_list = os.listdir(folder_path)

images = [np.array(Image.open(os.path.join(folder_path, file))) for file in image_list[:10]]

from matplotlib import pyplot as plt
plt.figure(figsize=(20,10))
columns = 5
for i, image in enumerate(images):
    plt.subplot(len(images) / columns + 1, columns, i + 1)
    plt.imshow(image)

## 1-2. Data Transformation

### > Define a transformation function for Train-set

In [None]:
transform_train = torchvision.transforms.Compose([
    # Randomly crop the image to obtain an image with an area of 0.08 to 1 of
    # the original area and height-to-width ratio between 3/4 and 4/3. Then,
    # scale the image to create a new 224 x 224 image
    torchvision.transforms.RandomResizedCrop(224, scale=(0.08, 1.0),
                                             ratio=(3.0/4.0, 4.0/3.0)),
    torchvision.transforms.RandomHorizontalFlip(),
    # Randomly change the brightness, contrast, and saturation
    torchvision.transforms.ColorJitter(brightness=0.4,
                                       contrast=0.4,
                                       saturation=0.4),
    torchvision.transforms.ToTensor(),
    # Standardize each channel of the image
    torchvision.transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])])

### > Define a transformation function for Test-set

In [None]:
transform_test = torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),
    # Crop a 224 x 224 square area from the center of the image
    torchvision.transforms.CenterCrop(224),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])])

### > Transform the Data-set

In [None]:
train_ds, train_valid_ds = [torchvision.datasets.ImageFolder(
    os.path.join(data_dir, 'train_valid_test', folder),
    transform=transform_train) for folder in ['train', 'train_valid']]

valid_ds, test_ds = [torchvision.datasets.ImageFolder(
    os.path.join(data_dir, 'train_valid_test', folder),
    transform=transform_test) for folder in ['valid', 'test']]

In [None]:
print('Number of observations \n'
    f'train: {len(train_ds)}\n'
    f'valid: {len(valid_ds)}\n'    
    f'train_valid: {len(train_valid_ds)}\n'
    f'test: {len(test_ds)}'
    )

## 1-3. Get the data on DataLoader format

In [None]:
batch_size = 128

train_iter, train_valid_iter = [torch.utils.data.DataLoader(
    dataset, batch_size, shuffle=True, drop_last=True)
    for dataset in (train_ds, train_valid_ds)]

valid_iter = torch.utils.data.DataLoader(valid_ds, batch_size, shuffle=False,
                                         drop_last=True)

test_iter = torch.utils.data.DataLoader(test_ds, batch_size, shuffle=False,
                                        drop_last=False)

In [None]:
class_to_idx = train_iter.dataset.class_to_idx
idx_to_class = {class_to_idx[k]:k for k in class_to_idx}

### > Check the transformed data-set

In [None]:
from matplotlib import pyplot as plt 
images, labels = next(iter(train_iter))

images = images[:10]
labels = labels[:10]
labels_name = [idx_to_class[idx.item()] for idx in labels]
plt.figure(figsize=(20,10))
columns = 5
for i, image in enumerate(images):
    plt.subplot(len(images) / columns + 1, columns, i + 1)
    plt.text(5,5, labels_name[i], bbox={'facecolor': 'white', 'pad':10})
    image = np.transpose(image, (1,2,0))
    plt.imshow(image)

# 2. Model Training (Hyper-parameter Tuning)

## 2-1. Define some functions needed in Training & Tuning process

### > Define a Trainable function that can be compatible with ray.tune

In [None]:
from ray import tune

def Trainable(config, train_loader=None, valid_loader=None, device=None, checkpoint_dir=None):
  # Model define
  model = get_net(device)
  # model.apply(init_weights)

  # Loss function define
  loss_fn = nn.CrossEntropyLoss(reduction='mean')

  # Optimizer define
  if config["optimizer"] == "Adam":
    optimizer = torch.optim.Adam((param for param in model.parameters()
                            if param.requires_grad), 
                         weight_decay=config["wd"], lr=config["lr"])
  else:
    optimizer = torch.optim.SGD((param for param in model.parameters()
                            if param.requires_grad), 
                         weight_decay=config["wd"], lr=config["lr"])
  
  # Learning rate scheduler
  # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, config['lr_period'], config['lr_decay'])

  # Load Checkpoint (if it exist)
  if checkpoint_dir:
      model_state, optimizer_state = torch.load(
          os.path.join(checkpoint_dir, "checkpoint"))
      model.load_state_dict(model_state)
      optimizer.load_state_dict(optimizer_state)

  ######################################################################
  # Train & Eval & Save Model
  ######################################################################

  for epoch in range(config['n_epochs']):
    # Train model with train_loader
    tr_loss, tr_acc = 0.0, 0
    model.train()
    for i, (X, y) in enumerate(train_loader):
      X, y = X.to(device), y.to(device)
      optimizer.zero_grad()
      output = model(X)
      l = loss_fn(output, y)
      l.backward()
      optimizer.step()
      with torch.no_grad():
        tr_acc += torch_accuracy(output, y)
        tr_loss += l.cpu().numpy()    
    
    # Eval Trained model on valid_iter
    val_loss, val_acc = 0.0, 0
    model.eval()
    for i, (X, y) in enumerate(valid_loader):
      with torch.no_grad():
        X, y = X.to(device), y.to(device)
        output = model(X)
        l = loss_fn(output, y)
        val_acc += torch_accuracy(output, y)
        val_loss += l.cpu().numpy()
    # scheduler.step()

    # Save Checkpoint
    with tune.checkpoint_dir(epoch) as checkpoint_dir:
        path = os.path.join(checkpoint_dir, "checkpoint")
        torch.save((model.state_dict(), optimizer.state_dict()), path)

    tune.report(
        tr_loss=(tr_loss / len(train_loader)), 
        tr_accuracy=(tr_acc / len(train_loader)),
        val_loss=(val_loss / len(valid_loader)), 
        val_accuracy=(val_acc / len(valid_loader))
        )
        

### > Define some miscellaneous functions

In [None]:
def torch_accuracy(pred, y):
  return (pred.argmax(1) == y).type(torch.float).sum().item() / len(y)    

def try_gpu(i=0): 
    return f'cuda:{i}' if torch.cuda.device_count() >= i + 1 else 'cpu'

def init_weights(m):
  if type(m) == nn.Linear or type(m) == nn.Conv2d:
    nn.init.xavier_uniform_(m.weight)
    nn.init.constant_(m.bias, 0)    

def eval_fn(data_iter, net, loss_fn, device=None):
  loss, acc = 0.0, 0
  net.eval()
  for i, (X, y) in enumerate(data_iter):
    with torch.no_grad():
      X, y = X.to(device), y.to(device)
      output = net(X)
      l = loss_fn(output, y)
      acc += torch_accuracy(output, y)
      loss += l.cpu().numpy()
  return loss/len(data_iter), acc/len(data_iter)    

def trial_str_creator(trial):
    return "{}_{}_123".format(trial.trainable_name, trial.trial_id)    

def get_best_model(result, metric="val_loss", mode="min", device=None):
    best_trial = result.get_best_trial(metric, mode, "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(best_trial.last_result["val_loss"]))
    print("Best trial final validation accuracy: {}".format(best_trial.last_result["val_accuracy"]))

    best_trained_model = get_net(device)

    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(os.path.join(best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

    return best_trained_model, best_trial.config, best_checkpoint_dir

### > Define a model structure

- Model: Pretrained ResNet34 

In [None]:
def get_net(device):
    finetune_net = nn.Sequential()
    finetune_net.features = torchvision.models.resnet34(pretrained=True)
    # Define a new output network (there are 120 output categories)
    finetune_net.output_new = nn.Sequential(nn.Linear(1000, 256),
                                            nn.ReLU(),
                                            nn.Linear(256, 120))
    # Move the model to devices
    finetune_net = finetune_net.to(device)
    # Freeze parameters of feature layers
    for param in finetune_net.features.parameters():
        param.requires_grad = False
    return finetune_net

## 2-2. Run Hyper-parameter Tuning (with Training)

### > Define some essential components for Tuning

In [None]:
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler

# args
args = {
    "local_dir" : os.path.abspath("Tune_Result"),
    "num_samples" : 6,
    "cpus_per_trial" : 1,
    "gpus_per_trial" : 0.5,
    "name": "ResNet34_Pretrained",
}

# config
# - define a search space
config = {
    "model": "ResNet34_Pretrained",
    "wd": tune.loguniform(1e-3, 1e-1),
    "lr": tune.loguniform(1e-3, 1e-1),
    'lr_period': 2,
    'lr_decay': 0.9,
    "batch_size": 128,
    "optimizer": "Adam",
    "n_epochs": 20
    }

# scheduler
scheduler = ASHAScheduler(
        metric="val_loss",
        mode="min",
        max_t=config["n_epochs"],
        grace_period=1,
        reduction_factor=2)

# reporter
reporter = tune.JupyterNotebookReporter(True,
    metric_columns=["tr_loss", "tr_accuracy", "val_loss", "val_accuracy", "training_iteration"])  


### > Run Tune & Get the best trained model

In [None]:
from functools import partial
# Run Tune
result = tune.run(
    partial(Trainable, train_loader=train_iter, valid_loader=valid_iter, device=try_gpu()),
    scheduler=scheduler,
    progress_reporter=reporter,  
    config=config,      
    resources_per_trial={"cpu": args["cpus_per_trial"], "gpu": args["gpus_per_trial"]},
    num_samples=args["num_samples"],
    local_dir=args["local_dir"],
    name=args["name"],
    trial_dirname_creator=trial_str_creator,
    )

# Get the best trained model
best_trained_model, best_config, best_checkpoint_dir = get_best_model(result, metric="val_loss", mode="min", device=try_gpu())

### > Visualize the Tuning & Training results with TensorBoard

In [None]:
log_dir = os.path.dirname(os.path.dirname(os.path.dirname(best_checkpoint_dir )))

%tensorboard --logdir {log_dir}

# 3. Make a prediction with the best model

## 3-1. Train model on whole Train-set with best hyper-params

In [None]:
from tqdm import notebook

device = try_gpu()
model = get_net(device)
config = best_config

# Loss function define
loss_fn = nn.CrossEntropyLoss(reduction='mean')

# Optimizer define
if config["optimizer"] == "Adam":
  optimizer = torch.optim.Adam((param for param in model.parameters()
                          if param.requires_grad), 
                        weight_decay=config["wd"], lr=config["lr"])
else:
  optimizer = torch.optim.SGD((param for param in model.parameters()
                          if param.requires_grad), 
                        weight_decay=config["wd"], lr=config["lr"])

for epoch in notebook.tqdm(range(config['n_epochs'])):
  tr_loss, tr_acc = 0.0, 0
  model.train()
  for i, (X, y) in enumerate(train_valid_iter):
    X, y = X.to(device), y.to(device)
    optimizer.zero_grad()
    output = model(X)
    l = loss_fn(output, y)
    l.backward()
    optimizer.step()
    with torch.no_grad():
      tr_acc += torch_accuracy(output, y)
      tr_loss += l.cpu().numpy()    

### > Check the prediction results

In [None]:
from matplotlib import pyplot as plt 
images, _ = next(iter(test_iter))

device = try_gpu()
preds = []
outputs = torch.nn.functional.softmax(model(images.to(device)), dim=0)
preds.extend(outputs.cpu().detach().numpy())
pred = np.array(preds).argmax(1)
pred_name = [idx_to_class[idx.item()] for idx in pred]

images = images[:10]
labels = labels[:10]

plt.figure(figsize=(20,10))
columns = 5
for i, image in enumerate(images):
    plt.subplot(len(images) / columns + 1, columns, i + 1)
    # plt.text(5,5, f'label: {labels_name[i]}', bbox={'facecolor': 'white', 'edgecolor': 'red', 'pad':10})
    plt.text(5, 200, f'pred: {pred_name[i]}', bbox={'facecolor': 'white', 'edgecolor': 'blue', 'pad':10})
    image = np.array(np.transpose(image, (1,2,0)))
    plt.imshow(image)

## 3-2. Save predictions as submission.csv

In [None]:
preds = []
for data, label in test_iter:
    output = torch.nn.functional.softmax(model(data.to(device)), dim=0)
    preds.extend(output.cpu().detach().numpy())
ids = sorted(os.listdir(
    os.path.join(data_dir, 'train_valid_test', 'test', 'unknown')))
with open('submission.csv', 'w') as f:
    f.write('id,' + ','.join(train_valid_ds.classes) + '\n')
    for i, output in zip(ids, preds):
        f.write(i.split('.')[0] + ',' + ','.join(
            [str(num) for num in output]) + '\n')