<a href="https://colab.research.google.com/github/mzdwedar/plant-pathology/blob/main/Plant_Pathology.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

import pandas as pd

from sklearn.model_selection import train_test_split

import os

In [None]:
! pip install -q kaggle

! mkdir ~/.kaggle

! cp kaggle.json ~/.kaggle/

! chmod 600 ~/.kaggle/kaggle.json

# ! pip install --upgrade --force-reinstall --no-deps kaggle

In [None]:
! kaggle competitions download -c plant-pathology-2020-fgvc7

Downloading plant-pathology-2020-fgvc7.zip to /content
 98% 763M/779M [00:04<00:00, 235MB/s]
100% 779M/779M [00:04<00:00, 175MB/s]


In [None]:
! mkdir data

In [None]:
! unzip -d data/ plant-pathology-2020-fgvc7.zip 

In [None]:
train_df = pd.read_csv('data/train.csv')

In [None]:
train, val = train_test_split(train_df, train_size=0.85, shuffle=True, random_state=1)

In [None]:
train = train.reset_index(drop=True)
val = val.reset_index(drop=True)

In [None]:
len(train), len(val)

(1547, 274)

In [None]:
len(os.listdir('data/images'))

3642

In [None]:
train_df.apply(lambda x: x.unique())

image_id             [Train_0, Train_1, Train_2, Train_3, Train_4, ...
healthy                                                         [0, 1]
multiple_diseases                                               [0, 1]
rust                                                            [0, 1]
scab                                                            [1, 0]
dtype: object

# Data Pipeline

In [None]:
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

from PIL import Image

In [None]:
class CustomDataset(Dataset):
  def __init__(self, images_dir, labels, transform=None):
    self.images_dir = images_dir
    self.transform = transform
    self.labels = labels

  def __len__(self):
    return len(self.labels)

  def __getitem__(self, idx):
    image_path = os.path.join(self.images_dir,  self.labels.iloc[idx, 0])
    image = Image.open(image_path)
    labels = self.labels.iloc[idx, 1:]

    if(self.transform):
      image = self.transform(image)

    return image, labels

In [None]:
transform = transforms.Compose([
        transforms.Resize((128, 128), interpolation=transforms.InterpolationMode.NEAREST),
        transforms.ToTensor()
    ])

In [None]:
images_dir = 'data/images'

In [None]:
datasets = {'train': CustomDataset(images_dir, train, transform),
            'val': CustomDataset(images_dir, val, transform)
            }

dataloaders = {'train': DataLoader(datasets['train'], batch_size=32, shuffle=True),
               'val': DataLoader(datasets['val'], batch_size=32)
               }

dataset_sizes = {'train': len(train),
                 'val': len(val)
                 }

# Model

In [None]:
import torch.nn as nn
from torchvision.models import resnet18

In [None]:
class ResNet18(nn.Module):

  def __init__(self, n_classes):
    super().__init__()
    resnet = resnet18(pretrained=True)
    resnet.fc = nn.Linear(in_features=resnet.fc.in_features, out_features=n_classes)

    self.base_model = resnet
    self.sig = nn.Sigmoid()
  
  def forward(self, x):
    return self.sig(self.base_model(x))

In [None]:
n_classes = 4

model = ResNet18(n_classes)

In [None]:
model

ResNet18(
  (base_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track

# Train