#Competition Name

In [25]:
FLAGS = {}
FLAGS['COMP_NAME'] = 'plant-pathology-2021-fgvc8'

#PGM Type

In [26]:
FLAGS['PGM_TYPE'] = "INF"

#Dependencies

In [27]:
!pip install timm



In [28]:
!pip install --upgrade --force-reinstall --no-deps albumentations

Collecting albumentations
  Using cached https://files.pythonhosted.org/packages/03/58/63fb1d742dc42d9ba2800ea741de1f2bc6bb05548d8724aa84794042eaf2/albumentations-0.5.2-py3-none-any.whl
Installing collected packages: albumentations
  Found existing installation: albumentations 0.5.2
    Uninstalling albumentations-0.5.2:
      Successfully uninstalled albumentations-0.5.2
Successfully installed albumentations-0.5.2


In [29]:
import numpy as np
import os
import time
import random

import torch
import torch.nn as nn
'''
import torch_xla
import torch_xla.core.xla_model as xm
import torch_xla.distributed.parallel_loader as pl
import torch_xla.distributed.xla_multiprocessing as xmp
'''
from torchvision import datasets, transforms

import pandas as pd
import cv2
from matplotlib import pyplot as plt

import timm

from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose
    )
from albumentations.pytorch import ToTensorV2
from torch.utils.data import DataLoader, Dataset

from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
import math
import datetime

#for INF
from tqdm.auto import tqdm

#Hardware settings

In [30]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

##GPU settings

##Colab Only

In [31]:
#Check GPU status
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Wed Mar 24 13:57:43 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.56       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    32W / 250W |   1093MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [32]:
#Check Memory size
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
  print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
  print('re-execute this cell.')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 27.4 gigabytes of available RAM

You are using a high-RAM runtime!


In [33]:
#Connect Google drive
from google.colab import drive
drive.mount('/content/drive')
! mkdir -p ~/.kaggle
! cp "drive/My Drive/kaggle/kaggle.json" ~/.kaggle/
!pip install --upgrade --force-reinstall --no-deps kaggle
! kaggle config view
ROOT_DIR="/content/drive/MyDrive"
INPUT_DIR="/kaggle/input"

%cd $ROOT_DIR$INPUT_DIR

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
cp: cannot stat 'drive/My Drive/kaggle/kaggle.json': No such file or directory
Processing /root/.cache/pip/wheels/a1/6a/26/d30b7499ff85a4a4593377a87ecf55f7d08af42f0de9b60303/kaggle-1.5.12-cp37-none-any.whl
Installing collected packages: kaggle
  Found existing installation: kaggle 1.5.12
    Uninstalling kaggle-1.5.12:
      Successfully uninstalled kaggle-1.5.12
Successfully installed kaggle-1.5.12
Configuration values from /root/.kaggle
- username: roganzu
- path: None
- proxy: None
- competition: None
/content/drive/MyDrive/kaggle/input


In [34]:
def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch()

#Parameters(Non-tuning)

In [35]:
#data
FLAGS['OS_LIST_DIR'] = f"../input/{FLAGS['COMP_NAME']}"
FLAGS['DATA_PATH'] = FLAGS['OS_LIST_DIR']+"/train_images" if FLAGS['PGM_TYPE'] == "TRIN" else FLAGS['OS_LIST_DIR']+"/test_images"
FLAGS['image_size'] = 384
#model
FLAGS['model_name'] = 'resnext50_32x4d'
FLAGS['target_size'] = 12
FLAGS['OUTPUT_DIR'] = "./"  if "KAGGLE_DATA_PROXY_TOKEN" in os.environ.keys() else f"{ROOT_DIR}/kaggle/output/{FLAGS['COMP_NAME']}"
FLAGS['MODEL_PATH'] = f"{ROOT_DIR}/kaggle/input/{FLAGS['COMP_NAME']}/pth/" if "KAGGLE_DATA_PROXY_TOKEN" in os.environ.keys() else f"{FLAGS['OUTPUT_DIR']}/pth/"

#LoadData

In [36]:
os.listdir(FLAGS['OS_LIST_DIR'] )

['sample_submission.csv',
 'train.csv',
 'plant-pathology-2021-fgvc8.zip',
 'test_images',
 'train_images']

In [37]:
database_base_path = '../input/plant-pathology-2021-fgvc8/'
train = pd.read_csv(f'{database_base_path}train.csv')
print(f'Train samples: {len(train)}')

Train samples: 18632


In [38]:
train.head(10)

Unnamed: 0,image,labels
0,800113bb65efe69e.jpg,healthy
1,8002cb321f8bfcdf.jpg,scab frog_eye_leaf_spot complex
2,80070f7fb5e2ccaa.jpg,scab
3,80077517781fb94f.jpg,scab
4,800cbf0ff87721f8.jpg,complex
5,800edef467d27c15.jpg,healthy
6,800f85dc5f407aef.jpg,cider_apple_rust
7,801d6dcd96e48ebc.jpg,healthy
8,801f78399a44e7af.jpg,complex
9,8021b94d437eb7d3.jpg,healthy


In [39]:
test = pd.read_csv(f"../input/{FLAGS['COMP_NAME']}/sample_submission.csv")
test.head()

Unnamed: 0,image,labels
0,85f8cb619c66b863.jpg,healthy
1,ad8770db05586b59.jpg,healthy
2,c7b03e718489f3ca.jpg,healthy


In [40]:
labels = list(test['labels'].value_counts().keys())
labels_dict = dict(zip(labels, range(12)))
test['labels'] = test['labels'].map(labels_dict)
test.head()

Unnamed: 0,image,labels
0,85f8cb619c66b863.jpg,0
1,ad8770db05586b59.jpg,0
2,c7b03e718489f3ca.jpg,0


#Model

In [41]:
class CustomModel(nn.Module):
    def __init__(self, model_name=FLAGS['model_name'], pretrained=True):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, FLAGS['target_size'])

    def forward(self, x):
        x = self.model(x)
        return x

##Transforming

In [42]:
def get_transforms(*, data): 
        return Compose([
            RandomResizedCrop(FLAGS['image_size'], FLAGS['image_size'], scale=(0.85, 1.0)),
            HorizontalFlip(p=0.5),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

In [43]:
class TestDataset(Dataset):
  def __init__(self, df, transform=None):
    self.df = df
    self.file_names = df['image'].values
    self.labels = df['labels'].values
    self.transform = transform
        
  def __len__(self):
    return len(self.df)

  def __getitem__(self, idx):
    file_name = self.file_names[idx]
    file_path = f"{FLAGS['DATA_PATH']}/{file_name}"
    image = cv2.imread(file_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    if self.transform:
      augmented = self.transform(image=image)
      image = augmented['image']
      #label = torch.tensor(self.labels[idx]).float()
    return image

In [44]:
test_dataset = TestDataset(test, transform=get_transforms(data='valid'))

#Inference

In [45]:
def inference(model, state, test_loader, device):
    model.to(device)
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    probs = []
    for i, (images) in tk0:
        images = images.to(device)
        avg_preds = []
        model.load_state_dict(state)
        model.eval()
        with torch.no_grad():
          y_preds = model(images)
        avg_preds.append(y_preds.argmax(1).to('cpu').numpy())
        avg_preds = np.mean(avg_preds, axis=0)
        probs.append(avg_preds)
    probs = np.concatenate(probs)
    return probs

#Parameters(Tuning)

In [46]:
#training
FLAGS['batch_size'] = 16
FLAGS['num_workers'] = 2
FLAGS['learning_rate'] = 2e-4
FLAGS['min_lr'] = 2e-5
FLAGS['T_max'] = 6e5
FLAGS['weight_decay'] = 1e-4
FLAGS['num_cores'] = 8
FLAGS['num_epochs'] = 2
FLAGS['print_freq'] = 100
FLAGS['max_grad_norm']=1e3

#Execute Main

In [47]:
model = CustomModel(pretrained=False)
state = torch.load(f"{FLAGS['MODEL_PATH']}{FLAGS['model_name']}.pth")
test_dataset = TestDataset(test, transform=get_transforms(data='valid'))
test_loader = DataLoader(test_dataset, batch_size=FLAGS['batch_size'], shuffle=False, 
                         num_workers=FLAGS['num_workers'], pin_memory=True)
predictions = inference(model, state, test_loader, device)
# submission
test['labels'] = predictions.astype(int)
test.to_csv(FLAGS['OUTPUT_DIR']+'/submission.csv', index=False)
test.head()

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Unnamed: 0,image,labels
0,85f8cb619c66b863.jpg,0
1,ad8770db05586b59.jpg,6
2,c7b03e718489f3ca.jpg,2


In [48]:
labels = list(train['labels'].value_counts().keys())
labels_dict = dict(zip(range(12), labels))
test['labels'] = test['labels'].map(labels_dict)
test.head()

Unnamed: 0,image,labels
0,85f8cb619c66b863.jpg,scab
1,ad8770db05586b59.jpg,scab frog_eye_leaf_spot
2,c7b03e718489f3ca.jpg,frog_eye_leaf_spot
