In [None]:
import albumentations
import os
import pandas as pd
import argparse
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from sklearn import metrics
from sklearn.model_selection import train_test_split

In [None]:
# dataset.py
import cv2
class PlantDataset(Dataset):
    
    def __init__(self, df, transforms=None):
    
        self.df = df
        self.transforms=transforms
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):
        image_src = DIR_INPUT + '/images/' + self.df.loc[idx, 'image_id'] + '.jpg'
        # print(image_src)
        image = cv2.imread(image_src, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        labels = self.df.loc[idx, ['healthy', 'multiple_diseases', 'rust', 'scab']].values
        labels = torch.from_numpy(labels.astype(np.int8))
        labels = labels.unsqueeze(-1)
        
        if self.transforms:
            transformed = self.transforms(image=image)
            image = transformed['image']

        return image, labels

In [None]:
# model.py
class PlantModel(nn.Module):
    
    def __init__(self, num_classes):
        super().__init__()
        
        self.backbone = torchvision.models.resnet18(pretrained=True)
        
        in_features = self.backbone.fc.in_features
        
        self.logit = nn.ModuleList(
            [nn.Linear(in_features, c) for c in num_classes]
        )
        
    def forward(self, x):
        batch_size, C, H, W = x.shape
        
        x = self.backbone.conv1(x)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)

        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)
        
        x = F.adaptive_avg_pool2d(x,1).reshape(batch_size,-1)
        x = F.dropout(x, 0.25, self.training)

        logit = [l(x) for l in self.logit]

        return logit


In [None]:
# # Engine.py
from tqdm import tqdm
def train(dataloader, model, optimizer, device):
    model.train()
    tr_loss = 0
    tk0 = tqdm(dataloader, desc="Train")
    for step, batch in enumerate(tk0):
        inputs = batch[0]
        targets = batch[1]
        
        inputs = inputs.to(device, dtype=torch.float)
        targets = targets.to(device, dtype=torch.float)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        losses = []
        for i in range(4):
            losses.append(criterion(outputs[i], targets[:,i]))
        loss = losses[0]+losses[1]+losses[2]+losses[3]
        loss.backward()
        tr_loss += loss.item()
        optimizer.step()

def evaluate(data_loader, model, device):
    model.eval()
    val_loss = 0
    val_preds = None
    val_labels = None
    tk0 = tqdm(data_loader, desc='Validate')
    
    for step, batch in enumerate(tk0):
            
        inputs = batch[0]
        targets = batch[1]
        
        if val_labels is None:
            val_labels = targets.clone().squeeze(-1)
        else:
            val_labels = torch.cat((val_labels, targets.squeeze(-1)))
        
        inputs = inputs.to(device, dtype=torch.float)
        targets = targets.to(device, dtype=torch.float)
        with torch.no_grad():
            output = model(inputs)
            losses = []
            for i in range(4):
                losses.append(criterion(output[i], targets[:,i]))
            loss = losses[0]+losses[1]+losses[2]+losses[3]
            val_loss += loss.item()
            
            preds = torch.sigmoid(torch.stack(output).permute(1, 0, 2).cpu().squeeze(-1))
            
            if val_preds is None:
                val_preds = preds
            else:
                val_preds = torch.cat((val_preds, preds), dim=0)
        return val_loss

# Augmentatio

In [None]:
from albumentations.pytorch import ToTensorV2
transforms_train = albumentations.Compose([
    albumentations.RandomResizedCrop(height=256, width=256, p=1.0),
    albumentations.Flip(),
    albumentations.ShiftScaleRotate(rotate_limit=1.0, p=0.8),
    albumentations.Normalize(p=1.0),
    ToTensorV2(p=1.0),
])

transforms_valid = albumentations.Compose([
    albumentations.RandomResizedCrop(height=256, width=256, p=1.0),
    albumentations.Normalize(p=1.0),
    ToTensorV2(p=1.0),
])

In [None]:
#train.py
DIR_INPUT = '../input/plant-pathology-2020-fgvc7'
BATCH_SIZE = 64


train_df = pd.read_csv(DIR_INPUT + '/train.csv')
train_df['sample_type'] = 'train'

sample_idx = train_df.sample(frac=0.2, random_state=42).index
train_df.loc[sample_idx, 'sample_type'] = 'valid'

valid_df = train_df[train_df['sample_type'] == 'valid']
valid_df.reset_index(drop=True, inplace=True)

train_df = train_df[train_df['sample_type'] == 'train']
train_df.reset_index(drop=True, inplace=True)

dataset_train = PlantDataset(df=train_df, transforms=transforms_train)
dataset_valid = PlantDataset(df=valid_df, transforms=transforms_valid)

dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, num_workers=4, shuffle=True)
dataloader_valid = DataLoader(dataset_valid, batch_size=BATCH_SIZE, num_workers=4, shuffle=False)
device = torch.device("cuda:0")

model = PlantModel(num_classes=[1, 1, 1, 1])
model.to(device)

criterion = nn.BCEWithLogitsLoss()
plist = [{'params': model.parameters(), 'lr': 5e-5}]
optimizer = torch.optim.Adam(plist, lr=5e-5)

for epoch in range(5):
    train(dataloader_train, model, optimizer, device)
    val_los = evaluate(dataloader_valid, model, device)
    print(f'epoch = {epoch}, valid loss = {val_los}')

# prediction

In [None]:
submission_df = pd.read_csv(DIR_INPUT + '/sample_submission.csv')
submission_df.head()

In [None]:
dataset_test = PlantDataset(df=submission_df, transforms=transforms_valid)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, num_workers=4, shuffle=False)

In [None]:
model.eval()

tk0 = tqdm(dataloader_test, desc="Predict")
test_preds = None

for step, batch in enumerate(tk0):

    images = batch[0]
    images = images.to(device, dtype=torch.float)

    with torch.no_grad():
        outputs = model(images)

        preds = torch.sigmoid(torch.stack(outputs).permute(1, 0, 2).cpu().squeeze(-1))

        if test_preds is None:
            test_preds = preds
        else:
            test_preds = torch.cat((test_preds, preds), dim=0)


In [None]:
submission_df[['healthy', 'multiple_diseases', 'rust', 'scab']] = test_preds.numpy()
submission_df.to_csv('submission.csv', index=False)

In [None]:
submission_df