In [1]:
%pip install torch pandas torchvision scikit-learn tqdm kaggle timm -q

Note: you may need to restart the kernel to use updated packages.


In [2]:
# upload kaggle.json first.
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [10]:
# !apt update -qq
# !apt install -qq unzip
# !kaggle datasets download nirmalsankalana/sugarcane-leaf-disease-dataset
# !unzip -q sugarcane-leaf-disease-dataset.zip -d data

# !kaggle datasets download  pungliyavithika/sugarcane-leaf-disease-classification
!unzip -q sugarcane-leaf-disease-classification.zip -d data

In [11]:
import os
import shutil

import pandas as pd

# Define paths
data_root = "data"
images_dir = os.path.join(data_root, "images")

# Create images directory if it doesn't exist
os.makedirs(images_dir, exist_ok=True)

# List to store image paths and labels
dataset = []

# Loop through each subfolder
for subfolder in os.listdir(data_root):
    subfolder_path = os.path.join(data_root, subfolder)

    # Ensure it's a directory
    if os.path.isdir(subfolder_path) and subfolder != "images":
        # Loop through images inside the subfolder
        for image in os.listdir(subfolder_path):
            old_image_path = os.path.join(subfolder_path, image)

            # Ensure it's a file (image)
            if os.path.isfile(old_image_path):
                # Define new image path in "data/images" directory
                new_image_path = os.path.join(images_dir, image)

                # If filename already exists, rename it to avoid conflicts
                if os.path.exists(new_image_path):
                    base, ext = os.path.splitext(image)
                    counter = 1
                    while os.path.exists(new_image_path):
                        new_image_path = os.path.join(images_dir, f"{base}_{counter}{ext}")
                        counter += 1

                # Move image
                shutil.move(old_image_path, new_image_path)

                # Append to dataset with updated path and original label
                dataset.append({"image_path": new_image_path, "label": subfolder})

        # Optionally remove empty subfolder after moving images
        os.rmdir(subfolder_path)

df = pd.DataFrame(dataset)
df = df.rename(columns={"image_path": "image_id"})
df["image_id"] = df["image_id"].str.replace("data/images/", "", regex=False)

from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["label"])

df.to_csv(os.path.join(data_root, "dataset.csv"), index=False)

label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

In [16]:
# To load the dataset again:
import pandas as pd
df = pd.read_csv('data/dataset.csv')

In [17]:
df["label"].value_counts()

0    75
2    75
1    74
Name: label, dtype: int64

In [18]:
import os

import pandas as pd
import torch
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

from dataset import Dataset


In [19]:
train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["label"])
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42, stratify=temp_df["label"])

# Change the path to the directory where the images are stored
path = "data/images"
train_dataset = Dataset(train_df, path)
test_dataset = Dataset(test_df, path)
val_dataset = Dataset(val_df, path)

In [20]:
batch_size = 8
lr = 2e-4
num_epochs = 35
num_classes = 3

from model import SoyaTrans
from train import Trainer


def run_experiment(batch_size, lr):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    model = SoyaTrans(num_classes)
    trainer = Trainer(model, train_loader, val_loader, test_loader, lr, num_epochs, batch_size=batch_size)

    trainer.train()
    trainer.test()
    torch.save(trainer.model.state_dict(), 'soyatrans.pth')

In [None]:
# import itertools
# for batch_size, lr in itertools.product(batch_sizes, lrs):
    # print(f"\nRunning experiment with batch_size={batch_size}, lr={lr}")
    # run_experiment(batch_size, lr)

run_experiment(batch_size, lr)

Epoch 1/35: 100%|██████████| 23/23 [00:04<00:00,  5.55it/s, loss=1.2043]



Train Metrics:
--------------------------------------------------
Epoch: 0
Train Loss: 1.2133
Test Loss: 1.1301
Accuracy: 0.3182
Precision: 0.1012
Recall: 0.3182
F1: 0.1536
--------------------------------------------------


Epoch 2/35: 100%|██████████| 23/23 [00:03<00:00,  6.97it/s, loss=1.2182]



Train Metrics:
--------------------------------------------------
Running experiment with batch_size=8, lr=0.0002
Epoch: 1
Train Loss: 1.2175
Test Loss: 1.2328
Accuracy: 0.3182
Precision: 0.1012
Recall: 0.3182
F1: 0.1536
--------------------------------------------------


Epoch 3/35: 100%|██████████| 23/23 [00:03<00:00,  6.96it/s, loss=1.2194]



Train Metrics:
--------------------------------------------------
Epoch: 2
Train Loss: 1.2152
Test Loss: 1.2296
Accuracy: 0.3182
Precision: 0.1012
Recall: 0.3182
F1: 0.1536
--------------------------------------------------


Epoch 4/35: 100%|██████████| 23/23 [00:03<00:00,  6.84it/s, loss=0.6494]



Train Metrics:
--------------------------------------------------
Epoch: 3
Train Loss: 1.0826
Test Loss: 1.1897
Accuracy: 0.3182
Precision: 0.1172
Recall: 0.3182
F1: 0.1713
--------------------------------------------------


Epoch 5/35: 100%|██████████| 23/23 [00:03<00:00,  6.91it/s, loss=0.6802]



Train Metrics:
--------------------------------------------------
Epoch: 4
Train Loss: 0.8266
Test Loss: 1.0500
Accuracy: 0.5000
Precision: 0.3515
Recall: 0.5000
F1: 0.4032
--------------------------------------------------


Epoch 6/35: 100%|██████████| 23/23 [00:03<00:00,  6.92it/s, loss=0.5661]



Train Metrics:
--------------------------------------------------
Epoch: 5
Train Loss: 0.9663
Test Loss: 0.8009
Accuracy: 0.7273
Precision: 0.7318
Recall: 0.7273
F1: 0.7109
--------------------------------------------------


Epoch 7/35: 100%|██████████| 23/23 [00:03<00:00,  6.89it/s, loss=0.5639]



Train Metrics:
--------------------------------------------------
Epoch: 6
Train Loss: 0.6552
Test Loss: 0.6574
Accuracy: 0.9091
Precision: 0.9148
Recall: 0.9091
F1: 0.9091
--------------------------------------------------


Epoch 8/35: 100%|██████████| 23/23 [00:03<00:00,  6.85it/s, loss=0.5570]



Train Metrics:
--------------------------------------------------
Epoch: 7
Train Loss: 0.6652
Test Loss: 0.6702
Accuracy: 0.9091
Precision: 0.9148
Recall: 0.9091
F1: 0.9091
--------------------------------------------------


Epoch 9/35: 100%|██████████| 23/23 [00:03<00:00,  6.86it/s, loss=0.5635]



Train Metrics:
--------------------------------------------------
Epoch: 8
Train Loss: 0.6156
Test Loss: 0.6460
Accuracy: 0.9091
Precision: 0.9148
Recall: 0.9091
F1: 0.9091
--------------------------------------------------


Epoch 10/35: 100%|██████████| 23/23 [00:03<00:00,  6.85it/s, loss=0.5520]



Train Metrics:
--------------------------------------------------
Epoch: 9
Train Loss: 0.5956
Test Loss: 0.6306
Accuracy: 0.9091
Precision: 0.9148
Recall: 0.9091
F1: 0.9091
--------------------------------------------------


Epoch 11/35: 100%|██████████| 23/23 [00:03<00:00,  6.81it/s, loss=0.8617]



Train Metrics:
--------------------------------------------------
Epoch: 10
Train Loss: 0.6021
Test Loss: 0.6402
Accuracy: 0.9091
Precision: 0.9293
Recall: 0.9091
F1: 0.9115
--------------------------------------------------


Epoch 12/35: 100%|██████████| 23/23 [00:03<00:00,  6.86it/s, loss=0.5518]



Train Metrics:
--------------------------------------------------
Epoch: 11
Train Loss: 0.5930
Test Loss: 0.6257
Accuracy: 0.9545
Precision: 0.9602
Recall: 0.9545
F1: 0.9545
--------------------------------------------------


Epoch 13/35: 100%|██████████| 23/23 [00:03<00:00,  6.87it/s, loss=0.8610]



Train Metrics:
--------------------------------------------------
Epoch: 12
Train Loss: 0.6009
Test Loss: 0.6327
Accuracy: 0.9091
Precision: 0.9293
Recall: 0.9091
F1: 0.9115
--------------------------------------------------


Epoch 14/35: 100%|██████████| 23/23 [00:03<00:00,  6.84it/s, loss=0.5523]



Train Metrics:
--------------------------------------------------
Epoch: 13
Train Loss: 0.5924
Test Loss: 0.6228
Accuracy: 0.9545
Precision: 0.9602
Recall: 0.9545
F1: 0.9545
--------------------------------------------------


Epoch 15/35: 100%|██████████| 23/23 [00:03<00:00,  6.84it/s, loss=0.5520]



Train Metrics:
--------------------------------------------------
Epoch: 14
Train Loss: 0.5922
Test Loss: 0.6250
Accuracy: 0.9545
Precision: 0.9602
Recall: 0.9545
F1: 0.9545
--------------------------------------------------


Epoch 16/35: 100%|██████████| 23/23 [00:03<00:00,  6.82it/s, loss=0.8593]



Train Metrics:
--------------------------------------------------
Epoch: 15
Train Loss: 0.6005
Test Loss: 0.6149
Accuracy: 0.9545
Precision: 0.9602
Recall: 0.9545
F1: 0.9545
--------------------------------------------------


Epoch 17/35: 100%|██████████| 23/23 [00:03<00:00,  6.83it/s, loss=0.5519]



Train Metrics:
--------------------------------------------------
Epoch: 16
Train Loss: 0.5920
Test Loss: 0.6242
Accuracy: 0.9545
Precision: 0.9602
Recall: 0.9545
F1: 0.9545
--------------------------------------------------


Epoch 18/35: 100%|██████████| 23/23 [00:03<00:00,  6.80it/s, loss=0.5548]



Train Metrics:
--------------------------------------------------
Epoch: 17
Train Loss: 0.5921
Test Loss: 0.6199
Accuracy: 0.9545
Precision: 0.9602
Recall: 0.9545
F1: 0.9545
--------------------------------------------------


Epoch 19/35: 100%|██████████| 23/23 [00:03<00:00,  6.66it/s, loss=0.5520]



Train Metrics:
--------------------------------------------------
Epoch: 18
Train Loss: 0.5919
Test Loss: 0.6251
Accuracy: 0.9545
Precision: 0.9602
Recall: 0.9545
F1: 0.9545
--------------------------------------------------


Epoch 20/35: 100%|██████████| 23/23 [00:03<00:00,  6.79it/s, loss=0.5527]



Train Metrics:
--------------------------------------------------
Epoch: 19
Train Loss: 0.5920
Test Loss: 0.6204
Accuracy: 0.9545
Precision: 0.9602
Recall: 0.9545
F1: 0.9545
--------------------------------------------------


Epoch 21/35: 100%|██████████| 23/23 [00:03<00:00,  6.77it/s, loss=0.5522]



Train Metrics:
--------------------------------------------------
Epoch: 20
Train Loss: 0.5919
Test Loss: 0.6228
Accuracy: 0.9545
Precision: 0.9602
Recall: 0.9545
F1: 0.9545
--------------------------------------------------


Epoch 22/35: 100%|██████████| 23/23 [00:03<00:00,  6.78it/s, loss=0.5521]



Train Metrics:
--------------------------------------------------
Epoch: 21
Train Loss: 0.5918
Test Loss: 0.6162
Accuracy: 0.9545
Precision: 0.9602
Recall: 0.9545
F1: 0.9545
--------------------------------------------------


Epoch 23/35: 100%|██████████| 23/23 [00:03<00:00,  6.75it/s, loss=0.5520]



Train Metrics:
--------------------------------------------------
Epoch: 22
Train Loss: 0.5918
Test Loss: 0.6174
Accuracy: 0.9545
Precision: 0.9602
Recall: 0.9545
F1: 0.9545
--------------------------------------------------


Epoch 24/35:   0%|          | 0/23 [00:00<?, ?it/s]