In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [3]:
class TrainDataset(Dataset):
    def __init__(self, path, image_folder, label_file, transform):
        df = pd.read_csv(path + label_file)
        classes = df.labels.astype(str)
        
        self.image_paths = [path + image_folder + f for f in df.image]
        self.cls2idx = {c:i for i, c in enumerate(sorted(classes.unique()))}
        self.idx2cls = list(sorted(classes.unique()))
        self.y = classes.map(self.cls2idx).to_numpy()
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, i):
        img = Image.open(self.image_paths[i]).convert("RGB")
        img = self.transform(img)
        label = torch.tensor(self.y[i], dtype=torch.long)
        return img, label

class TestDataset(Dataset):
    def __init__(self, path, folder, transform):
        self.path = path + folder 
        self.image_names = [f for f in os.listdir(self.path)]
        self.transform = transform

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, i):
        img = Image.open(self.path + self.image_names[i]).convert("RGB")
        img = self.transform(img)

        return img, self.image_names[i]

In [4]:
from sklearn.model_selection import StratifiedShuffleSplit
from torch.utils.data import Subset

In [10]:

import kornia as K
from kornia.augmentation import AugmentationSequential, RandomRotation, RandomVerticalFlip

aug = AugmentationSequential(
    RandomRotation(degrees=90),
    RandomVerticalFlip(p=0.5),
    data_keys=["input"],         
    same_on_batch=False,
).to(device)

train_tfms = transforms.Compose([transforms.Resize((224, 224)),
                                 transforms.ToTensor(),
                                 transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
                                ])


test_tfms = transforms.Compose([transforms.Resize((224, 224)),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
                               ])

PATH = '/kaggle/input/plant-pathology-2021-fgvc8/'

train_dataset = TrainDataset(PATH, 'train_images/', 'train.csv', train_tfms)
val_dataset = TrainDataset(PATH, 'train_images/', 'train.csv', train_tfms)
test_dataset = TestDataset(PATH, 'test_images/', test_tfms)


y = train_dataset.y  
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, val_idx = next(sss.split(np.zeros(len(y)), y))

train_subset = Subset(train_dataset, train_idx)  
val_subset = Subset(val_dataset,   val_idx)   

batch_size = 128
num_workers = 4   # start low; increase only if GPU starves
prefetch_factor = 4

train_loader = DataLoader(
    train_subset, batch_size=batch_size, shuffle=True,
    num_workers=num_workers, pin_memory=True, persistent_workers=True, 
    prefetch_factor=prefetch_factor
)
val_loader = DataLoader(
    val_subset, batch_size=batch_size,
    num_workers=num_workers, pin_memory=True, persistent_workers=True, 
    prefetch_factor=prefetch_factor
)
test_loader = DataLoader(test_dataset, batch_size=batch_size,
    num_workers=num_workers, pin_memory=True, persistent_workers=True, 
                         prefetch_factor=prefetch_factor)

full_train_loader = DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True,
    num_workers=num_workers, pin_memory=True, persistent_workers=True, 
    prefetch_factor=prefetch_factor 
)

torch.backends.cudnn.benchmark = True  # once, after imports

In [11]:
from collections import OrderedDict  
from torch import nn, optim
from torchvision.models import resnet50

In [12]:
model = resnet50(weights='DEFAULT')

model.fc = nn.Sequential(OrderedDict([
    ('fc1', nn.Linear(2048, 128)),
    ('relu1', nn.ReLU()),
    ('droupout1', nn.Dropout(0.4)),
    ('fc2', nn.Linear(128, 12))
]))

model = model.to(device)
# model = nn.DataParallel(model, device_ids=[0,1])

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=1e-3) #model.module.fc.parameters() if use both gpus
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min')

# scaler = torch.amp.GradScaler('cuda')

In [13]:
from tqdm import tqdm

In [16]:
for epoch in range(5):
    train_loss_acum = 0
    correct_count_train = 0
    for ii, (images, labels) in tqdm(enumerate(train_loader), total=len(train_loader)):
        images_gpu = images.to(device, non_blocking=True)
        labels_gpu = labels.to(device, non_blocking=True)

        images_gpu = aug(images_gpu) #augmentations with kornia
        
        optimizer.zero_grad()
        logits = model(images_gpu)
        loss = criterion(logits, labels_gpu)
        loss.backward()
        optimizer.step()
        # with torch.amp.autocast('cuda'):  
        #     logits = model(images_gpu)
        #     loss = criterion(logits, labels_gpu)
        # scaler.scale(loss).backward()
        # scaler.step(optimizer)
        # scaler.update()
        correct_count_train += (torch.argmax(logits, dim=-1) == labels_gpu).sum()
        train_loss_acum += loss * labels.shape[0] #acum loss by every elem
        
    train_loss = train_loss_acum / len(train_dataset)
    train_acc = correct_count_train/len(train_subset)
    print(f'epoch {epoch}, train acc = {train_acc}')
        
    with torch.no_grad():
        model.eval()
        val_loss_acum = 0
        correct_pred_count = 0
        for iii, (val_images, val_labels) in tqdm(enumerate(val_loader), total=len(val_loader)):
            val_images_gpu = val_images.to(device, non_blocking=True)
            val_labels_gpu = val_labels.to(device, non_blocking=True)
            # with torch.amp.autocast('cuda'):
            #     val_logits = model(val_images_gpu)
            #     val_loss += criterion(val_logits, val_labels_gpu)
            val_logits = model(val_images_gpu)
            correct_pred_count += (torch.argmax(val_logits, dim=-1) == val_labels_gpu).sum()
            val_loss_acum += criterion(val_logits, val_labels_gpu) * val_labels_gpu.shape[0]
            
        val_acc = correct_pred_count / len(val_subset) 
        val_loss = val_loss_acum / len(val_dataset)
        
    model.train()
    scheduler.step(val_loss)
    print(f'epoch {epoch} train loss = {train_loss}, val loss = {val_loss}, val acc = {val_acc}')
        

100%|██████████| 117/117 [10:21<00:00,  5.31s/it]


epoch 0, train acc = 0.6884267330169678


100%|██████████| 30/30 [03:08<00:00,  6.30s/it]


epoch 0 train loss = 117.97583770751953, val loss = 27.035797119140625, val acc = 0.727662980556488


100%|██████████| 117/117 [09:58<00:00,  5.11s/it]


epoch 1, train acc = 0.7162697315216064


100%|██████████| 30/30 [02:58<00:00,  5.96s/it]


epoch 1 train loss = 106.35456848144531, val loss = 25.089500427246094, val acc = 0.7367856502532959


100%|██████████| 117/117 [10:53<00:00,  5.58s/it]


epoch 2, train acc = 0.7299564480781555


100%|██████████| 30/30 [03:13<00:00,  6.45s/it]


epoch 2 train loss = 100.8189468383789, val loss = 24.465770721435547, val acc = 0.7445666790008545


100%|██████████| 117/117 [10:14<00:00,  5.25s/it]


epoch 3, train acc = 0.7468634843826294


100%|██████████| 30/30 [03:12<00:00,  6.42s/it]


epoch 3 train loss = 95.25563049316406, val loss = 23.675460815429688, val acc = 0.7507378458976746


100%|██████████| 117/117 [10:11<00:00,  5.23s/it]


epoch 4, train acc = 0.7510902881622314


100%|██████████| 30/30 [03:16<00:00,  6.54s/it]

epoch 4 train loss = 92.51350402832031, val loss = 22.94195556640625, val acc = 0.7590555548667908





### It will be a good thing to infere the accuracy by each class. I will probabrly find out then which class is misclassified most.

In [17]:
ckpt = {"state_dict": model.state_dict()}
torch.save(ckpt, "/kaggle/working/model_resnet50_fold0.pth")

In [None]:
# import pandas

# model.eval()
# all_files, all_idx = [], []
# for test_image, file_names in test_loader:
#     test_image = test_image.to(device)
#     preds = torch.argmax(model(test_image), dim=-1).cpu().tolist()
#     all_idx.extend(preds)
#     all_files.extend(file_names)

# df = pd.DataFrame({'image': all_files, 'lables': [train_dataset.idx2cls[i] for i in all_idx]})
# df.to_csv('submission.csv', index=False)
# df.head(5)

In [None]:
# print(device, next(model.parameters()).device)
# !nvidia-smi