In [1]:
import torch 
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from google.colab import drive
from zipfile import ZipFile 
from sklearn.model_selection import train_test_split
import cv2
from sklearn.model_selection import StratifiedKFold

import os
import sys
from datetime import datetime
import random

package_paths = ['/content/gdrive/MyDrive/kaggle-models/efficientnet_pytorch-0.7.0', \
                 '/content/gdrive/MyDrive/kaggle-models/FMix-master']

for path in package_paths: 
  sys.path.append(path) # Q: What does this line do?

print(sys.path)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

['', '/env/python', '/usr/lib/python36.zip', '/usr/lib/python3.6', '/usr/lib/python3.6/lib-dynload', '/usr/local/lib/python3.6/dist-packages', '/usr/lib/python3/dist-packages', '/usr/local/lib/python3.6/dist-packages/IPython/extensions', '/root/.ipython', '/content/gdrive/MyDrive/kaggle-models/efficientnet_pytorch-0.7.0', '/content/gdrive/MyDrive/kaggle-models/FMix-master']
cuda


In [2]:
# clear cuda cache
torch.cuda.empty_cache()
!nvidia-smi

Wed Feb  3 21:02:38 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.39       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P0    48W / 300W |     10MiB / 16130MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
# manually kill process that takes gpu memory 
!kill 587
!ps -elf | grep python

/bin/bash: line 0: kill: (587) - No such process
4 S root          61       1  0  80   0 - 49629 epoll_ 20:16 ?        00:00:22 /usr/bin/python2 /usr/local/bin/jupyter-notebook --ip="172.28.0.2" --port=9000 --FileContentsManager.root_dir="/" --LargeFileManager.delete_to_trash=False --MappingKernelManager.root_dir="/content"
4 S root         288       1  0  80   0 -  4596 wait   20:18 ?        00:00:00 bash -c tail -n +0 -F "/root/.config/Google/DriveFS/Logs/drive_fs.txt" | python3 /opt/google/drive/drive-filter.py > "/root/.config/Google/DriveFS/Logs/timeouts.txt" 
4 S root         290     288  0  80   0 -  7304 pipe_r 20:18 ?        00:00:00 python3 /opt/google/drive/drive-filter.py
4 S root         699      61 25  80   0 - 1804721 select 21:02 ?      00:00:02 /usr/bin/python3 -m ipykernel_launcher -f /root/.local/share/jupyter/runtime/kernel-065a91d4-0f95-4082-a7a7-ff50cbffce39.json
0 S root         724     699  0  80   0 -  9800 wait   21:02 ?        00:00:00 /bin/bash -c ps -elf | 

In [4]:
!pip install tqdm --upgrade
!pip install -U albumentations
!pip install timm
import timm # Q: What does it do?
from tqdm.notebook import tqdm

Requirement already up-to-date: tqdm in /usr/local/lib/python3.6/dist-packages (4.56.0)
Requirement already up-to-date: albumentations in /usr/local/lib/python3.6/dist-packages (0.5.2)


In [5]:
CFG = {
    'img_size' : 512,
    'epoch_num' : 5,
    'fold_num' : 5,
    'lr' : 1e-3,
    'min_lr' : 1e-6,
    'momentum' : 0.9,
    'weight_decay':1e-6, # Q: How does weight decay work for Adam?
    'train_batch_size' : 8,
    'val_batch_size' : 16,
    'models' : ['tf_efficientnet_b4_ns'],

    'img_path_prefix' : '/content/train_images/',
    'train_path_prefix' : '/content/gdrive/MyDrive/kaggle-competition-datasets/cassava-leaf-disease-classification/',
    'train_dir_path' : '/content/train_images/',
    'model_save_path_prefix' : '/content/gdrive/MyDrive/kaggle-models/kaggle-leaf-classification-models/',
    'log_path_prefix' : '/content/gdrive/MyDrive/kaggle-models/kaggle-leaf-classification-models/',
    
    'range_low' : 0,
    'range_high' : 1e6,
} # CFG stands for configuration

In [6]:
# connect with google drive and unzip the zipped image folder
drive.mount('/content/gdrive')

train_path_prefix = CFG['train_path_prefix']
train_zip_path = train_path_prefix + "train_images.zip"
train_dir_path = CFG['train_dir_path']

if not os.path.isdir(train_dir_path):
  with ZipFile(train_zip_path, 'r') as zip_f: 
    zip_f.extractall(path='/content') 

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [7]:
# from fmix import sample_mask, make_low_freq_image, binarise_mask

In [8]:
# load leaf images dataset
train_images_id = pd.read_csv(train_path_prefix + 'train.csv')['image_id'].to_numpy()
train_labels = pd.read_csv(train_path_prefix + 'train.csv')['label'].to_numpy()

In [9]:
# define the image-read fucntion
def read_img_from_path(path):
    im_bgr = cv2.imread(path)
    im_rgb = im_bgr[:, :, ::-1].copy() # Q: What does ::-1 do?
    return im_rgb

test_img = read_img_from_path(CFG['train_dir_path'] + '1000015157.jpg')
print(test_img.shape)

(600, 800, 3)


In [10]:
# define image transformation functions
from albumentations.pytorch import ToTensorV2
from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout, ShiftScaleRotate, CenterCrop, Resize
) # Q: More information about this library

transform = Compose(
    [
     RandomResizedCrop(CFG['img_size'], CFG['img_size']),
     HorizontalFlip(p=0.5), # Q: What does p mean here?
     VerticalFlip(p=0.5),
     ShiftScaleRotate(p=0.1),
     RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
     Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
     ToTensorV2(p=1.0),
    ])

In [11]:
# split images as validation set and training set by the ratio of 8 : 2
# n = len(all_images_id)
# train_image_id, val_image_id, train_image_label, \
#   val_image_label = train_test_split(all_images_id, all_labels, test_size = 0.2)

In [12]:
# define the training and validaiton dataset
class TrainDataset(Dataset):
  """ Leaves Training Dataset """
  def __init__(self, train_img_id, train_img_label, transform = None):
    self.train_img_id = train_img_id
    self.train_img_label = train_img_label
    self.transform = transform
  
  def __len__(self):
    return len(self.train_img_id)

  def __getitem__(self, idx): 
    img_path = CFG['img_path_prefix'] + str(self.train_img_id[idx])
    img = read_img_from_path(img_path)
    label = self.train_img_label[idx]
    if self.transform:
      img = transform(image=img)['image']
    return (img, label)

class ValDataset(Dataset):
  """ Leaves Validation Dataset """
  def __init__(self, val_img_id, val_img_label):
    self.val_img_id = val_img_id
    self.val_img_label = val_img_label
    self.transform = transform
  
  def __len__(self):
    return len(self.val_img_id)

  def __getitem__(self, idx): 
    img_path = CFG['img_path_prefix'] + str(self.val_img_id[idx])
    img = read_img_from_path(img_path)
    if self.transform:
      img = transform(image=img)['image']   
    label = self.val_img_label[idx]
    return (img, label)

In [13]:
# define the training and validation data loader
def get_dataloaders(train_idx, val_idx, train_image_ids, train_labels):
  train_id, train_label = train_image_ids[train_idx], train_labels[train_idx]
  val_id, val_label = train_image_ids[val_idx], train_labels[val_idx]

  train_dataset = TrainDataset(train_id, train_label, transform)
  val_dataset = ValDataset(val_id, val_label)
  
  train_dataloader = DataLoader(train_dataset, batch_size=CFG['train_batch_size'], shuffle=True, num_workers=0) # Q: How does num_workers work?
  val_dataloader = DataLoader(val_dataset, batch_size=CFG['val_batch_size'], shuffle=True, num_workers=0)
  return train_dataloader, val_dataloader

In [14]:
# load a pretrained res50 network and modify its last linear layer to 
# match the number of outputs with this problem 
num_category = 5
resnet50 = torchvision.models.resnet50(pretrained=True)
resnet50.fc = nn.Linear(2048, num_category)
resnet50.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [15]:
# CustomClassifier
class CustomClassifier(nn.Module):
    def __init__(self, model_arch, num_class, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        num_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(num_features, num_class)
        '''
        self.model.classifier = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(n_features, n_class, bias=True)
        )
        '''
    def forward(self, x):
        x = self.model(x)
        return x

In [16]:
# The training function
def train(train_images_id, train_labels, num_epochs, model_name, model, criterion, optimizer, save_model=False, pretrained=False):
  cur_time = datetime.now().strftime('%Y-%m-%d-%H-%M')
  if pretrained:
    print('Using pretrained model' + model_name + '...')
  # move the training model to the gpu device
  if device == 'cuda':
    model = model.cuda()
  print('Start training...')
  for epoch in range(num_epochs): 
    folds = StratifiedKFold(n_splits=CFG['fold_num'], shuffle=True).split(np.arange(train_images_id.shape[0]), train_labels)
    for fold_idx, (train_idx, val_idx) in enumerate(folds):
      train_dataloader, val_dataloader = get_dataloaders(train_idx, val_idx, train_images_id, train_labels)
      running_loss = 0.0
      train_batch_cnt = 0
      # training process
      # TODO: Try autocast
      for data in tqdm(train_dataloader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        train_batch_cnt += 1
      print('Training Epoch: [%d] | loss: %.3f' % (epoch + 1, running_loss / train_batch_cnt))

      # validaiton process
      running_loss = 0.0
      correct_prediction_cnt = 0
      val_batch_cnt = 0
      with torch.no_grad():
        for data in tqdm(val_dataloader):
          inputs, labels = data
          inputs, labels = inputs.to(device), labels.to(device)

          outputs = model(inputs)
          loss = criterion(outputs, labels)

          running_loss += loss
          correct_prediction_cnt += torch.eq(torch.argmax(outputs, dim=1), labels).sum().item()
          val_batch_cnt += 1 
        print('Validation Epoch [%d] | loss: %.3f | accuracy: %.3f' % (epoch, running_loss / val_batch_cnt, \
            correct_prediction_cnt * 1.0 / len(val_dataloader.dataset)))
        
      # Log the current metrics
      with open(CFG['log_path_prefix'] + 'log_' + cur_time + '_' + model_name + '.txt', 'a') as f:
        f.write('Training Fold %d | Epoch %d\n' % (fold_idx, epoch))
        f.write('Training Loss: %.3f\n' % (running_loss / train_batch_cnt))
        f.write('Validation Loss: %.3f | Accuracy: %.3f \n\n' % (running_loss / val_batch_cnt, \
                                                                 correct_prediction_cnt * 1.0 / len(val_dataloader.dataset)))
    # Save the trained model for each fold of the data
    if save_model:
        model_state_save_path = CFG['model_save_path_prefix'] + 'fold_' + str(fold_idx) + '_epoch_' + str(fold_idx * CFG['epoch_num']) + '_' + model_name + '.pt'
        torch.save(model.state_dict(), model_state_save_path)

In [17]:
# Train resnet50

# criterion = nn.CrossEntropyLoss()
# resnet50_optimizer = optim.Adam(resnet50.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay']) # Q: Characteristics of Adam Optimizer?
# train(train_images_id, train_labels, CFG['epoch_num'], \
#       'resnet50' + str(random.randint(CFG['range_low'], CFG['range_high'])), resnet50, criterion, resnet50_optimizer, True)

In [None]:
# Train efficientnet
num_output_category = len(np.unique(train_labels))
criterion = nn.CrossEntropyLoss()
efficientnet = CustomClassifier(CFG['models'][0], num_output_category, pretrained=True)
efficientnet_optimizer = optim.Adam(efficientnet.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'])
train(train_images_id, train_labels, CFG['epoch_num'], \
      'efficientnet' + str(random.randint(CFG['range_low'], CFG['range_high'])), efficientnet, criterion, efficientnet_optimizer, True)

Start training...


  0%|          | 0/2140 [00:00<?, ?it/s]

In [None]:
# TODO: load the trained efficientnet model

In [None]:
# load the trained resnet50 model
pretrained_model_path = '/content/trained_epoch_3_dict_model.pt'
resnet50_copy = torchvision.models.resnet50(pretrained=False)
resnet50_copy.fc = nn.Linear(2048, num_category)
resnet50_copy.load_state_dict(torch.load(pretrained_model_path))

<All keys matched successfully>

In [None]:
# load the test images id
test_img_path = '/content/test_images'
test_img_id = list(os.listdir(test_img_path))
print(test_img_id)

# create the test dataset
class TestDataset(Dataset):
  """ Leaves Test Dataset """
  def __init__(self, test_img_id, transform = None):
    self.test_img_id = test_img_id
    self.transform = transform
  
  def __len__(self):
    return len(self.test_img_id)

  def __getitem__(self, idx): 
    img_path_prefix = '/content/test_images/'
    img_path = img_path_prefix + str(self.test_img_id[idx])
    img = read_img_from_path(img_path)
    if self.transform:
      img = self.transform(img)
    return img, self.test_img_id[idx]

# define the test dataloader
test_dataset = TestDataset(test_img_id, transform)
test_dataloader = DataLoader(test_dataset, batch_size = 1, shuffle = False, num_workers = 0)

['2216849948.jpg']


In [None]:
# infer the labels of the test dataset
res_test_pred_label = []
res_test_img_id = []
with torch.no_grad():
  for tst_img, tst_img_filename in test_dataloader:
    resnet50_pred = resnet50_copy(tst_img)
    # TODO: Add predictions from efficientnet and avoid the hard-coded ratio
    
    pred_label = torch.argmax(pred, dim=1).numpy()[0]
    tst_img_id = tst_img_filename[0][:-4]
    res_test_img_id.append(tst_img_id)
    res_test_pred_label.append(pred_label)
    
# convert the submission to csv
output_path = '/content/submission.csv'
column_header = ['image_id', 'label']
submission = pd.DataFrame(zip(res_test_img_id, res_test_pred_label), columns=column_header)
submission.to_csv(path_or_buf = output_path, index = False)