#Competition Name

In [1]:
FLAGS = {}
FLAGS['COMP_NAME'] = 'plant-pathology-2021-fgvc8'

#PGM Type

In [2]:
FLAGS['PGM_TYPE'] = "INF"

#Environment Settings

In [3]:
import os
if "KAGGLE_DATA_PROXY_TOKEN" in os.environ.keys():
  FLAGS['ENV']="kaggle"
else:
  FLAGS['ENV']="colab"

#Hardware Settings

In [4]:
if FLAGS['ENV'] == "colab":
  #Check TPU status
  if 'COLAB_TPU_ADDR' in os.environ.keys():
    from tensorflow.python.profiler import profiler_client
    tpu_profile_service_address = os.environ['COLAB_TPU_ADDR'].replace('8470', '8466')
    print(profiler_client.monitor(tpu_profile_service_address, 100, 2))

  #Connect Google drive
  from google.colab import drive
  drive.mount('/content/drive')
  !mkdir -p ~/.kaggle
  !cp "drive/My Drive/kaggle/kaggle.json" ~/.kaggle/
  !pip install --upgrade --force-reinstall --no-deps kaggle
  !kaggle config view
  ROOT_DIR="/content/drive/MyDrive"
  INPUT_DIR="/kaggle/input"

  %cd $ROOT_DIR$INPUT_DIR

Mounted at /content/drive
Collecting kaggle
[?25l  Downloading https://files.pythonhosted.org/packages/3a/e7/3bac01547d2ed3d308ac92a0878fbdb0ed0f3d41fb1906c319ccbba1bfbc/kaggle-1.5.12.tar.gz (58kB)
[K     |████████████████████████████████| 61kB 6.3MB/s 
[?25hBuilding wheels for collected packages: kaggle
  Building wheel for kaggle (setup.py) ... [?25l[?25hdone
  Created wheel for kaggle: filename=kaggle-1.5.12-cp37-none-any.whl size=73053 sha256=f718602cb22072c3048d20b9ce1c1a89d35eb02ad558146080b15579ab56cc4e
  Stored in directory: /root/.cache/pip/wheels/a1/6a/26/d30b7499ff85a4a4593377a87ecf55f7d08af42f0de9b60303
Successfully built kaggle
Installing collected packages: kaggle
  Found existing installation: kaggle 1.5.10
    Uninstalling kaggle-1.5.10:
      Successfully uninstalled kaggle-1.5.10
Successfully installed kaggle-1.5.12
Configuration values from /root/.kaggle
- username: roganzu
- path: None
- proxy: None
- competition: None
/content/drive/MyDrive/kaggle/input


In [5]:
#Check GPU status
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Sat Mar 27 12:49:05 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.56       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    25W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [6]:
#Check Memory size
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
  print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
  print('re-execute this cell.')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 27.4 gigabytes of available RAM

You are using a high-RAM runtime!


In [7]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#Dependencies

In [8]:
import numpy as np
from tensorflow.python.profiler import profiler_client
import time
import random

import torch.nn as nn
from torchvision import datasets, transforms

import pandas as pd
import cv2
from matplotlib import pyplot as plt


from torch.utils.data import DataLoader, Dataset

from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
import math
import datetime

#for INF
from tqdm.auto import tqdm

In [9]:
#xla
if 'COLAB_TPU_ADDR' in os.environ.keys():
  import torch_xla
  import torch_xla.core.xla_model as xm
  import torch_xla.distributed.parallel_loader as pl
  import torch_xla.distributed.xla_multiprocessing as xmp

In [10]:
#timm
import sys
if FLAGS['ENV'] == "kaggle":
  sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
else:
  !pip install timm
import timm

Collecting timm
[?25l  Downloading https://files.pythonhosted.org/packages/9e/89/d94f59780b5dd973154bf506d8ce598f6bfe7cc44dd445d644d6d3be8c39/timm-0.4.5-py3-none-any.whl (287kB)
[K     |█▏                              | 10kB 16.8MB/s eta 0:00:01[K     |██▎                             | 20kB 22.8MB/s eta 0:00:01[K     |███▍                            | 30kB 28.3MB/s eta 0:00:01[K     |████▋                           | 40kB 22.4MB/s eta 0:00:01[K     |█████▊                          | 51kB 16.5MB/s eta 0:00:01[K     |██████▉                         | 61kB 18.5MB/s eta 0:00:01[K     |████████                        | 71kB 15.5MB/s eta 0:00:01[K     |█████████▏                      | 81kB 13.4MB/s eta 0:00:01[K     |██████████▎                     | 92kB 13.1MB/s eta 0:00:01[K     |███████████▍                    | 102kB 12.6MB/s eta 0:00:01[K     |████████████▌                   | 112kB 12.6MB/s eta 0:00:01[K     |█████████████▊                  | 122kB 12.6MB/s e

In [11]:
#albumentations
if FLAGS['ENV'] == "kaggle":
  import albumentations
  print(albumentations.__version__)
else:
  !pip install --upgrade --force-reinstall --no-deps albumentations

from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose
    )
from albumentations.pytorch import ToTensorV2

Collecting albumentations
[?25l  Downloading https://files.pythonhosted.org/packages/03/58/63fb1d742dc42d9ba2800ea741de1f2bc6bb05548d8724aa84794042eaf2/albumentations-0.5.2-py3-none-any.whl (72kB)
[K     |████▌                           | 10kB 19.3MB/s eta 0:00:01[K     |█████████                       | 20kB 26.9MB/s eta 0:00:01[K     |█████████████▋                  | 30kB 31.3MB/s eta 0:00:01[K     |██████████████████▏             | 40kB 23.2MB/s eta 0:00:01[K     |██████████████████████▊         | 51kB 18.6MB/s eta 0:00:01[K     |███████████████████████████▏    | 61kB 17.3MB/s eta 0:00:01[K     |███████████████████████████████▊| 71kB 13.5MB/s eta 0:00:01[K     |████████████████████████████████| 81kB 6.2MB/s 
[?25hInstalling collected packages: albumentations
  Found existing installation: albumentations 0.1.12
    Uninstalling albumentations-0.1.12:
      Successfully uninstalled albumentations-0.1.12
Successfully installed albumentations-0.5.2


#Permanent Seeds

In [12]:
def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch()

#Parameters(Non-tuning)

In [13]:
#data
FLAGS['OS_LIST_DIR'] = f"../input/{FLAGS['COMP_NAME']}"
FLAGS['DATA_PATH'] = FLAGS['OS_LIST_DIR']+"/train_images" if FLAGS['PGM_TYPE'] == "TRIN" else FLAGS['OS_LIST_DIR']+"/test_images"
FLAGS['image_size'] = 384
#model
FLAGS['model_name'] = 'resnext50_32x4d'
FLAGS['target_size'] = 12
FLAGS['OUTPUT_DIR'] = "./"  if "KAGGLE_DATA_PROXY_TOKEN" in os.environ.keys() else f"{ROOT_DIR}/kaggle/output/{FLAGS['COMP_NAME']}"
FLAGS['MODEL_PATH'] = "../input/"+FLAGS['COMP_NAME']+"--"+FLAGS['model_name'].replace('_', '-')+"/" if "KAGGLE_DATA_PROXY_TOKEN" in os.environ.keys() else f"{FLAGS['OUTPUT_DIR']}/pth/"

#LoadData

In [14]:
os.listdir(FLAGS['OS_LIST_DIR'] )

['sample_submission.csv',
 'train.csv',
 'plant-pathology-2021-fgvc8.zip',
 'test_images',
 'train_images']

In [15]:
train = pd.read_csv(f'{FLAGS["OS_LIST_DIR"]}/train.csv')
print(f'Train samples: {len(train)}')

Train samples: 18632


In [16]:
train.head(10)

Unnamed: 0,image,labels
0,800113bb65efe69e.jpg,healthy
1,8002cb321f8bfcdf.jpg,scab frog_eye_leaf_spot complex
2,80070f7fb5e2ccaa.jpg,scab
3,80077517781fb94f.jpg,scab
4,800cbf0ff87721f8.jpg,complex
5,800edef467d27c15.jpg,healthy
6,800f85dc5f407aef.jpg,cider_apple_rust
7,801d6dcd96e48ebc.jpg,healthy
8,801f78399a44e7af.jpg,complex
9,8021b94d437eb7d3.jpg,healthy


In [17]:
test = pd.read_csv(f"../input/{FLAGS['COMP_NAME']}/sample_submission.csv")
test.head()

Unnamed: 0,image,labels
0,85f8cb619c66b863.jpg,healthy
1,ad8770db05586b59.jpg,healthy
2,c7b03e718489f3ca.jpg,healthy


In [18]:
labels = list(test['labels'].value_counts().keys())
labels_dict = dict(zip(labels, range(12)))
test['labels'] = test['labels'].map(labels_dict)
test.head()

Unnamed: 0,image,labels
0,85f8cb619c66b863.jpg,0
1,ad8770db05586b59.jpg,0
2,c7b03e718489f3ca.jpg,0


#Model

In [19]:
class CustomModel(nn.Module):
    def __init__(self, model_name=FLAGS['model_name'], pretrained=True):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, FLAGS['target_size'])

    def forward(self, x):
        x = self.model(x)
        return x

##Transforming

In [20]:
def get_transforms(*, data): 
        return Compose([
            RandomResizedCrop(FLAGS['image_size'], FLAGS['image_size'], scale=(0.85, 1.0)),
            HorizontalFlip(p=0.5),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

In [21]:
class TestDataset(Dataset):
  def __init__(self, df, transform=None):
    self.df = df
    self.file_names = df['image'].values
    self.labels = df['labels'].values
    self.transform = transform
        
  def __len__(self):
    return len(self.df)

  def __getitem__(self, idx):
    file_name = self.file_names[idx]
    file_path = f"{FLAGS['DATA_PATH']}/{file_name}"
    image = cv2.imread(file_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    if self.transform:
      augmented = self.transform(image=image)
      image = augmented['image']
      #label = torch.tensor(self.labels[idx]).float()
    return image

In [22]:
test_dataset = TestDataset(test, transform=get_transforms(data='valid'))

#Inference

In [23]:
def inference(model, state, test_loader, device):
    model.to(device)
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    probs = []
    for i, (images) in tk0:
        images = images.to(device)
        avg_preds = []
        model.load_state_dict(state)
        model.eval()
        with torch.no_grad():
          y_preds = model(images)
        avg_preds.append(y_preds.argmax(1).to('cpu').numpy())
        avg_preds = np.mean(avg_preds, axis=0)
        probs.append(avg_preds)
    probs = np.concatenate(probs)
    return probs

#Parameters(Tuning)

In [24]:
#training
FLAGS['batch_size'] = 16
FLAGS['num_workers'] = 2
FLAGS['learning_rate'] = 2e-4
FLAGS['min_lr'] = 2e-5
FLAGS['T_max'] = 6e5
FLAGS['weight_decay'] = 1e-4
FLAGS['num_cores'] = 8
FLAGS['num_epochs'] = 2
FLAGS['print_freq'] = 100
FLAGS['max_grad_norm']=1e3

#Execute Main

In [25]:
model = CustomModel(pretrained=False)
state = torch.load(f"{FLAGS['MODEL_PATH']}{FLAGS['model_name']}.pth")
test_dataset = TestDataset(test, transform=get_transforms(data='valid'))
test_loader = DataLoader(test_dataset, batch_size=FLAGS['batch_size'], shuffle=False, 
                         num_workers=FLAGS['num_workers'], pin_memory=True)
predictions = inference(model, state, test_loader, device)
# submission
test['labels'] = predictions.astype(int)
test.to_csv(FLAGS['OUTPUT_DIR']+'/submission.csv', index=False)
test.head()

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Unnamed: 0,image,labels
0,85f8cb619c66b863.jpg,0
1,ad8770db05586b59.jpg,6
2,c7b03e718489f3ca.jpg,2


In [26]:
labels = list(train['labels'].value_counts().keys())
labels_dict = dict(zip(range(12), labels))
test['labels'] = test['labels'].map(labels_dict)
test.head()

Unnamed: 0,image,labels
0,85f8cb619c66b863.jpg,scab
1,ad8770db05586b59.jpg,scab frog_eye_leaf_spot
2,c7b03e718489f3ca.jpg,frog_eye_leaf_spot
