In [22]:
# Imports
import os
import numpy as np
import torch
import random
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import transforms
import torchvision
from tqdm.notebook import tqdm
from torchmetrics import Accuracy
import pandas as pd
from skimage import io

In [2]:
# Random Seed Initialize
def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(42)

# Dataset and Dataloader

In [3]:
Prob = 0.5
train_tf = transforms.Compose([transforms.RandomHorizontalFlip(Prob),
                              transforms.RandomVerticalFlip(Prob),
                              transforms.RandomResizedCrop((224,224)),
                              transforms.ToTensor(),
                              transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                          std=[0.229, 0.224, 0.225])
                              ])
val_tf = transforms.Compose([transforms.Resize((224,224)),
                              transforms.ToTensor(),
                             transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                          std=[0.229, 0.224, 0.225])
                              ])

In [4]:
train_ds = ImageFolder("../input/paddy-doctor-make-train-and-val-folder/Train", transform = train_tf)
val_ds = ImageFolder("../input/paddy-doctor-make-train-and-val-folder/Val", transform = val_tf)

In [5]:
train_ds.class_to_idx

{'bacterial_leaf_blight': 0,
 'bacterial_leaf_streak': 1,
 'bacterial_panicle_blight': 2,
 'blast': 3,
 'brown_spot': 4,
 'dead_heart': 5,
 'downy_mildew': 6,
 'hispa': 7,
 'normal': 8,
 'tungro': 9}

In [6]:
val_ds.class_to_idx

{'bacterial_leaf_blight': 0,
 'bacterial_leaf_streak': 1,
 'bacterial_panicle_blight': 2,
 'blast': 3,
 'brown_spot': 4,
 'dead_heart': 5,
 'downy_mildew': 6,
 'hispa': 7,
 'normal': 8,
 'tungro': 9}

In [7]:
train_loader    = DataLoader(train_ds,batch_size= 64, shuffle = True, pin_memory=True)
val_loader      = DataLoader(val_ds,  batch_size= 64, shuffle = True, pin_memory=True)

In [8]:
# model = torchvision.models.convnext_tiny(pretrained = True)
model_dir = '../input/pytorch-model-weights/convnext_models'
model_name = 'convnext_tiny.pt'
model = torch.load(os.path.join(model_dir, model_name))

In [9]:
model.classifier[-1] = torch.nn.Linear(in_features = 768, out_features = 10)

In [10]:
def train(data, target, model, optimizer, criterion, TRAIN):

    if TRAIN:
        optimizer.zero_grad()

    # forward pass
    output = model(data)
    #loss = criterion(output, target)
    
    norms = torch.norm(output, p=2, dim=-1, keepdim=True) + 1e-7
    logit_norm = torch.div(output, norms)
    loss = criterion(logit_norm , target)

    if TRAIN:
        # backward pass
        loss.backward()  
        # Update the parameters
        optimizer.step()     

    return output, loss

In [11]:
# Training Options
optimizer = torch.optim.Adam(model.parameters(), lr= 1e-3) 
criterion = torch.nn.CrossEntropyLoss() 

# training device  ('cpu', 'cuda')
device = 'cuda'
model.to(device)

# number of training epochs
n_epochs = 1


acc = Accuracy().to(device)
print(f"\nTraining Model")
best_val_ = 1000
failure_count = 0
for epoch in tqdm(range(n_epochs), desc = "# Epochs", position= 0):
    train_loss = 0
    val_loss = 0

    train_acc = 0
    val_acc = 0

    # set to train mode
    model.train()

    for i, (data, target) in enumerate(tqdm(train_loader, desc = "Training", leave= True, position= 1)):
        # data to gpu/cpu
        data, target = data.to(device, non_blocking=True), target.to(device, non_blocking=True)

        # mini batch training
        output, loss = train(data, target, model, optimizer, criterion, TRAIN= True)

        # Track train loss by multiplying average loss by number of examples in batch
        train_loss += loss.item() * data.size(0)
        _, output = torch.max(output, dim=1) 
        acc(output, target.squeeze())

    # Compute Loss for each epoch
    train_loss = train_loss / len(train_loader.dataset)
    train_acc = acc.compute().item()
    acc.reset()

    with torch.no_grad():
        # set to evaluation mode
        model.eval()

        for i, (data, target) in enumerate(tqdm(val_loader, desc = "Validation", leave= True, position= 2)):
            # data to gpu/cpu
            data, target = data.to(device, non_blocking=True), target.to(device, non_blocking=True)
            # mini batch training
            output, loss = train(data, target, model, optimizer, criterion, TRAIN= False)
            # Track train loss by multiplying average loss by number of examples in batch
            val_loss += loss.item() * data.size(0)
            _, output = torch.max(output, dim=1) 
            acc(output, target.squeeze())

        # Compute Loss for each epoch
        val_loss = val_loss / len(val_loader.dataset)
        val_acc = acc.compute().item()
        acc.reset()
        
    if val_loss < best_val_:
        best_val_ = val_loss
        best_acc = val_acc
        failure_count = 0
        torch.save(model, 'best_model.pt')
    else:
        failure_count += 1
    
    if failure_count >= 10:
        break
        
    print(f"Epoch # {epoch+1:04d}")
    print(f"Train Loss: {train_loss: .4f},\t Val Loss: {val_loss: .4f}")
    print(f"Train Acc : {train_acc: .4f},\t Val Acc : {val_acc: .4f}")  
    print(f"Best Val Loss : {best_val_: .4f},\t Val Acc : {best_acc: .4f}")
    print(f"Failure Count = {failure_count}")



Training Model


# Epochs:   0%|          | 0/1 [00:00<?, ?it/s]

Training:   0%|          | 0/122 [00:00<?, ?it/s]

Validation:   0%|          | 0/41 [00:00<?, ?it/s]

Epoch # 0001
Train Loss:  1.9227,	 Val Loss:  1.8202
Train Acc :  0.4766,	 Val Acc :  0.5819
Best Val Acc :  1.8202


# Inference

In [14]:
train_ds.class_to_idx

{'bacterial_leaf_blight': 0,
 'bacterial_leaf_streak': 1,
 'bacterial_panicle_blight': 2,
 'blast': 3,
 'brown_spot': 4,
 'dead_heart': 5,
 'downy_mildew': 6,
 'hispa': 7,
 'normal': 8,
 'tungro': 9}

In [15]:
class_dict = {v: k for k, v in train_ds.class_to_idx.items()}
class_dict

{0: 'bacterial_leaf_blight',
 1: 'bacterial_leaf_streak',
 2: 'bacterial_panicle_blight',
 3: 'blast',
 4: 'brown_spot',
 5: 'dead_heart',
 6: 'downy_mildew',
 7: 'hispa',
 8: 'normal',
 9: 'tungro'}

In [18]:
sample_df = pd.read_csv("../input/paddy-disease-classification/sample_submission.csv")
sample_df.head()

Unnamed: 0,image_id,label
0,200001.jpg,
1,200002.jpg,
2,200003.jpg,
3,200004.jpg,
4,200005.jpg,


In [19]:
# Load Best Model
model = torch.load('best_model.pt')

In [20]:
test_dir = "../input/paddy-disease-classification/test_images"
file_list = os.listdir(test_dir)
file_list[:5]

['200607.jpg', '202733.jpg', '200207.jpg', '203024.jpg', '203056.jpg']

In [21]:
test_tfs = transforms.Compose([
                                transforms.ToPILImage(),
                                transforms.Resize((224,224)),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                          std=[0.229, 0.224, 0.225])
                                ])

In [23]:
result_df = pd.DataFrame(columns = sample_df.columns)
print("Begin Inference\n")
for ii, file_name in enumerate(tqdm(file_list)):
    image = io.imread(os.path.join(test_dir, file_name))
    image = test_tfs(image)
    image = image.unsqueeze(0)
    out = model(image.to('cuda', non_blocking=True))
    out = torch.argmax(out).item()
    label = class_dict[out]
    result_df.loc[ii,:] = [file_name, label]

Begin Inference



  0%|          | 0/3469 [00:00<?, ?it/s]

In [24]:
result_df.to_csv('submission.csv', index = False, columns = result_df.columns)
print("\nSubmission File Created!")


Submission File Created!
