In [None]:
%pip install torch pandas torchvision scikit-learn tqdm kaggle -q

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [None]:
# upload kaggle.json first.
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [2]:
# !apt update -qq
# !apt install -qq unzip
!kaggle datasets download nirmalsankalana/sugarcane-leaf-disease-dataset
!unzip -q sugarcane-leaf-disease-dataset.zip -d data

Dataset URL: https://www.kaggle.com/datasets/nirmalsankalana/sugarcane-leaf-disease-dataset
License(s): CC0-1.0
sugarcane-leaf-disease-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


In [1]:
import os
import shutil

import pandas as pd

# Define paths
data_root = "data"
images_dir = os.path.join(data_root, "images")

# Create images directory if it doesn't exist
os.makedirs(images_dir, exist_ok=True)

# List to store image paths and labels
dataset = []

# Loop through each subfolder
for subfolder in os.listdir(data_root):
    subfolder_path = os.path.join(data_root, subfolder)

    # Ensure it's a directory
    if os.path.isdir(subfolder_path) and subfolder != "images":
        # Loop through images inside the subfolder
        for image in os.listdir(subfolder_path):
            old_image_path = os.path.join(subfolder_path, image)

            # Ensure it's a file (image)
            if os.path.isfile(old_image_path):
                # Define new image path in "data/images" directory
                new_image_path = os.path.join(images_dir, image)

                # If filename already exists, rename it to avoid conflicts
                if os.path.exists(new_image_path):
                    base, ext = os.path.splitext(image)
                    counter = 1
                    while os.path.exists(new_image_path):
                        new_image_path = os.path.join(images_dir, f"{base}_{counter}{ext}")
                        counter += 1

                # Move image
                shutil.move(old_image_path, new_image_path)

                # Append to dataset with updated path and original label
                dataset.append({"image_path": new_image_path, "label": subfolder})

        # Optionally remove empty subfolder after moving images
        os.rmdir(subfolder_path)

df = pd.DataFrame(dataset)
df = df.rename(columns={"image_path": "image_id"})
df["image_id"] = df["image_id"].str.replace("data/images/", "", regex=False)

from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["label"])

df.to_csv(os.path.join(data_root, "dataset.csv"), index=False)

label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))



In [1]:
# To load the dataset again:
import pandas as pd
df = pd.read_csv('/workspace/data1/dataset3.csv')

In [2]:
df["label"].value_counts()

0    522
2    518
3    514
4    505
1    462
Name: label, dtype: int64

In [3]:
import os

import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

from dataset import Dataset


In [6]:
train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["label"])
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42, stratify=temp_df["label"])

# Change the path to the directory where the images are stored
path = "data/images"
train_dataset = Dataset(train_df, path)
test_dataset = Dataset(test_df, path)
val_dataset = Dataset(val_df, path)

In [7]:
train_dataset[0]

(tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.6753, 0.5931, 0.5621],
          [0.0000, 0.0000, 0.0000,  ..., 0.6346, 0.6686, 0.7150],
          [0.0000, 0.0000, 0.0000,  ..., 0.6740, 0.7435, 0.6417],
          ...,
          [0.3806, 0.4715, 0.4191,  ..., 0.5292, 0.1433, 0.0600],
          [0.5943, 0.4921, 0.3943,  ..., 0.4688, 0.2192, 0.1768],
          [0.3655, 0.4114, 0.4481,  ..., 0.4241, 0.0833, 0.1517]],
 
         [[0.0000, 0.0000, 0.0000,  ..., 0.7050, 0.6796, 0.6825],
          [0.0000, 0.0000, 0.0000,  ..., 0.7065, 0.7125, 0.7299],
          [0.0000, 0.0000, 0.0000,  ..., 0.7203, 0.6052, 0.7341],
          ...,
          [0.2602, 0.2976, 0.3171,  ..., 0.3846, 0.2141, 0.1209],
          [0.3493, 0.2964, 0.3771,  ..., 0.3416, 0.1036, 0.1141],
          [0.3116, 0.3375, 0.3660,  ..., 0.3470, 0.1300, 0.2143]],
 
         [[0.0000, 0.0000, 0.0000,  ..., 0.6261, 0.6201, 0.5801],
          [0.0000, 0.0000, 0.0000,  ..., 0.6971, 0.6458, 0.6589],
          [0.0000, 0.0000, 0.0000,  ...,

In [12]:
import itertools

from model import MaiaNet
from train import Trainer

batch_sizes = [32, 16, 12, 8, 4]
lrs = [1e-4, 1e-5, 2e-5]
num_epochs = 35
num_classes = 5


def run_experiment(batch_size, lr):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    model = MaiaNet(num_classes)
    trainer = Trainer(model, train_loader, val_loader, test_loader, lr, num_epochs, batch_size=batch_size)

    trainer.train()
    trainer.test()
    torch.save(trainer.model.state_dict(), "model.pth")

In [None]:
# for batch_size, lr in itertools.product(batch_sizes, lrs):
#     print(f"\nRunning experiment with batch_size={batch_size}, lr={lr}")

batch_size = 8
lr = 2e-4
run_experiment(batch_size, lr)

Epoch 1/35:  99%|█████████▉| 249/252 [01:51<00:01,  2.32it/s, loss=1.6654]