In [None]:
%pip install torch pandas torchvision scikit-learn tqdm kaggle -q

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [None]:
# upload kaggle.json first.
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [6]:
!apt update -qq
!apt install -qq unzip
!kaggle datasets download nirmalsankalana/sugarcane-leaf-disease-dataset
!unzip -q sugarcane-leaf-disease-dataset.zip -d data

127 packages can be upgraded. Run 'apt list --upgradable' to see them.
Suggested packages:
  zip
The following NEW packages will be installed:
  unzip
0 upgraded, 1 newly installed, 0 to remove and 127 not upgraded.
Need to get 175 kB of archives.
After this operation, 386 kB of additional disk space will be used.
debconf: delaying package configuration, since apt-utils is not installed

7[0;23r8[1ASelecting previously unselected package unzip.
(Reading database ... 20729 files and directories currently installed.)
Preparing to unpack .../unzip_6.0-26ubuntu3.2_amd64.deb ...
7[24;0f[42m[30mProgress: [  0%][49m[39m [..........................................................] 87[24;0f[42m[30mProgress: [ 20%][49m[39m [###########...............................................] 8Unpacking unzip (6.0-26ubuntu3.2) ...
7[24;0f[42m[30mProgress: [ 40%][49m[39m [#######################...................................] 8Setting up unzip (6.0-26ubuntu3.2) ...
7[24;0f[

In [None]:
import os
import shutil

import pandas as pd

# Define paths
data_root = "data"
images_dir = os.path.join(data_root, "images")

# Create images directory if it doesn't exist
os.makedirs(images_dir, exist_ok=True)

# List to store image paths and labels
dataset = []

# Loop through each subfolder
for subfolder in os.listdir(data_root):
    subfolder_path = os.path.join(data_root, subfolder)

    # Ensure it's a directory
    if os.path.isdir(subfolder_path) and subfolder != "images":
        # Loop through images inside the subfolder
        for image in os.listdir(subfolder_path):
            old_image_path = os.path.join(subfolder_path, image)

            # Ensure it's a file (image)
            if os.path.isfile(old_image_path):
                # Define new image path in "data/images" directory
                new_image_path = os.path.join(images_dir, image)

                # If filename already exists, rename it to avoid conflicts
                if os.path.exists(new_image_path):
                    base, ext = os.path.splitext(image)
                    counter = 1
                    while os.path.exists(new_image_path):
                        new_image_path = os.path.join(images_dir, f"{base}_{counter}{ext}")
                        counter += 1

                # Move image
                shutil.move(old_image_path, new_image_path)

                # Append to dataset with updated path and original label
                dataset.append({"image_path": new_image_path, "label": subfolder})

        # Optionally remove empty subfolder after moving images
        os.rmdir(subfolder_path)

df = pd.DataFrame(dataset)
df = df.rename(columns={"image_path": "image_id"})
df["image_id"] = df["image_id"].str.replace("data/images/", "", regex=False)

from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["label"])

df.to_csv(os.path.join(data_root, "dataset.csv"), index=False)

label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

In [1]:
# To load the dataset again:
import pandas as pd
df = pd.read_csv('/workspace/data1/dataset3.csv')

In [2]:
df["label"].value_counts()

label
0    522
2    518
3    514
4    505
1    462
Name: count, dtype: int64

In [3]:
import os

import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

from dataset import Dataset


In [6]:
train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["label"])
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42, stratify=temp_df["label"])

# Change the path to the directory where the images are stored
path = "/workspace"
train_dataset = Dataset(train_df, path)
test_dataset = Dataset(test_df, path)
val_dataset = Dataset(val_df, path)

In [7]:
train_dataset[0]

(tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.3697, 0.2860, 0.3159],
          [0.0000, 0.0000, 0.0000,  ..., 0.4051, 0.3836, 0.4274],
          [0.0000, 0.0000, 0.0000,  ..., 0.4732, 0.5311, 0.6120],
          ...,
          [0.3315, 0.2916, 0.2550,  ..., 0.2480, 0.1837, 0.1911],
          [0.2646, 0.2746, 0.2504,  ..., 0.2729, 0.1733, 0.0865],
          [0.2399, 0.1785, 0.1543,  ..., 0.4887, 0.3729, 0.1246]],
 
         [[0.0000, 0.0000, 0.0000,  ..., 0.2494, 0.3146, 0.2992],
          [0.0000, 0.0000, 0.0000,  ..., 0.3173, 0.4177, 0.4482],
          [0.0000, 0.0000, 0.0000,  ..., 0.4257, 0.5152, 0.5343],
          ...,
          [0.4353, 0.2979, 0.2925,  ..., 0.1632, 0.1958, 0.2245],
          [0.2591, 0.2390, 0.1742,  ..., 0.4401, 0.1934, 0.2454],
          [0.2574, 0.1692, 0.1294,  ..., 0.4937, 0.3558, 0.2810]],
 
         [[0.0000, 0.0000, 0.0000,  ..., 0.2207, 0.2100, 0.1957],
          [0.0000, 0.0000, 0.0000,  ..., 0.2794, 0.4223, 0.2720],
          [0.0000, 0.0000, 0.0000,  ...,

In [8]:
import itertools

from model import MaiaNet
from train import Trainer

batch_sizes = [32, 16, 12, 8, 4]
lrs = [1e-4, 1e-5, 2e-5]
num_epochs = 35
num_classes = 5


def run_experiment(batch_size, lr):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    model = MaiaNet(num_classes)
    trainer = Trainer(model, train_loader, val_loader, test_loader, lr, num_epochs, batch_size=batch_size)

    trainer.train()
    trainer.test()

In [None]:
# for batch_size, lr in itertools.product(batch_sizes, lrs):
#     print(f"\nRunning experiment with batch_size={batch_size}, lr={lr}")

batch_size = 16
lr = 1e-4
run_experiment(batch_size, lr)


Running experiment with batch_size=32, lr=0.0001

Running experiment with batch_size=32, lr=1e-05

Running experiment with batch_size=32, lr=2e-05

Running experiment with batch_size=16, lr=0.0001

Running experiment with batch_size=16, lr=1e-05

Running experiment with batch_size=16, lr=2e-05

Running experiment with batch_size=12, lr=0.0001

Running experiment with batch_size=12, lr=1e-05

Running experiment with batch_size=12, lr=2e-05

Running experiment with batch_size=8, lr=0.0001

Running experiment with batch_size=8, lr=1e-05

Running experiment with batch_size=8, lr=2e-05

Running experiment with batch_size=4, lr=0.0001

Running experiment with batch_size=4, lr=1e-05

Running experiment with batch_size=4, lr=2e-05


Epoch 1/35: 100%|██████████| 126/126 [05:26<00:00,  2.59s/it, loss=1.5914]



Train Metrics:
--------------------------------------------------
Epoch: 0
Train Loss: 1.6117
Test Loss: 1.6072
Accuracy: 0.2143
Precision: 0.1145
Recall: 0.2143
F1: 0.0989
--------------------------------------------------


Epoch 2/35: 100%|██████████| 126/126 [05:17<00:00,  2.52s/it, loss=1.5751]



Train Metrics:
--------------------------------------------------
Running experiment with batch_size=16, lr=0.0001
Epoch: 1
Train Loss: 1.6061
Test Loss: 1.6065
Accuracy: 0.2024
Precision: 0.0410
Recall: 0.2024
F1: 0.0681
--------------------------------------------------


Epoch 3/35: 100%|██████████| 126/126 [05:15<00:00,  2.50s/it, loss=1.5901]



Train Metrics:
--------------------------------------------------
Epoch: 2
Train Loss: 1.6039
Test Loss: 1.6001
Accuracy: 0.2817
Precision: 0.1540
Recall: 0.2817
F1: 0.1773
--------------------------------------------------


Epoch 4/35: 100%|██████████| 126/126 [05:19<00:00,  2.54s/it, loss=1.5883]



Train Metrics:
--------------------------------------------------
Epoch: 3
Train Loss: 1.6000
Test Loss: 1.5995
Accuracy: 0.2659
Precision: 0.1663
Recall: 0.2659
F1: 0.1912
--------------------------------------------------


Epoch 5/35: 100%|██████████| 126/126 [05:13<00:00,  2.49s/it, loss=1.6275]



Train Metrics:
--------------------------------------------------
Epoch: 4
Train Loss: 1.5957
Test Loss: 1.5931
Accuracy: 0.2698
Precision: 0.2012
Recall: 0.2698
F1: 0.1896
--------------------------------------------------


Epoch 6/35: 100%|██████████| 126/126 [05:15<00:00,  2.50s/it, loss=1.5633]



Train Metrics:
--------------------------------------------------
Epoch: 5
Train Loss: 1.5907
Test Loss: 1.5902
Accuracy: 0.2659
Precision: 0.1250
Recall: 0.2659
F1: 0.1529
--------------------------------------------------


Epoch 7/35: 100%|██████████| 126/126 [05:22<00:00,  2.56s/it, loss=1.6039]



Train Metrics:
--------------------------------------------------
Epoch: 6
Train Loss: 1.5866
Test Loss: 1.5882
Accuracy: 0.2421
Precision: 0.1107
Recall: 0.2421
F1: 0.1228
--------------------------------------------------


Epoch 8/35: 100%|██████████| 126/126 [05:08<00:00,  2.45s/it, loss=1.5677]



Train Metrics:
--------------------------------------------------
Epoch: 7
Train Loss: 1.5799
Test Loss: 1.5850
Accuracy: 0.3175
Precision: 0.2859
Recall: 0.3175
F1: 0.2274
--------------------------------------------------


Epoch 9/35: 100%|██████████| 126/126 [05:27<00:00,  2.60s/it, loss=1.5877]



Train Metrics:
--------------------------------------------------
Epoch: 8
Train Loss: 1.5749
Test Loss: 1.5801
Accuracy: 0.3056
Precision: 0.2805
Recall: 0.3056
F1: 0.2386
--------------------------------------------------


Epoch 10/35: 100%|██████████| 126/126 [05:16<00:00,  2.51s/it, loss=1.5941]



Train Metrics:
--------------------------------------------------
Epoch: 9
Train Loss: 1.5702
Test Loss: 1.5776
Accuracy: 0.2778
Precision: 0.2210
Recall: 0.2778
F1: 0.1767
--------------------------------------------------


Epoch 11/35: 100%|██████████| 126/126 [05:14<00:00,  2.49s/it, loss=1.5377]



Train Metrics:
--------------------------------------------------
Epoch: 10
Train Loss: 1.5673
Test Loss: 1.5725
Accuracy: 0.3611
Precision: 0.5608
Recall: 0.3611
F1: 0.3101
--------------------------------------------------


Epoch 12/35: 100%|██████████| 126/126 [05:14<00:00,  2.49s/it, loss=1.5574]



Train Metrics:
--------------------------------------------------
Epoch: 11
Train Loss: 1.5607
Test Loss: 1.5678
Accuracy: 0.3214
Precision: 0.3967
Recall: 0.3214
F1: 0.2831
--------------------------------------------------


Epoch 13/35: 100%|██████████| 126/126 [05:13<00:00,  2.49s/it, loss=1.5500]



Train Metrics:
--------------------------------------------------
Epoch: 12
Train Loss: 1.5561
Test Loss: 1.5641
Accuracy: 0.3056
Precision: 0.3887
Recall: 0.3056
F1: 0.2617
--------------------------------------------------


Epoch 14/35: 100%|██████████| 126/126 [05:15<00:00,  2.50s/it, loss=1.5810]



Train Metrics:
--------------------------------------------------
Epoch: 13
Train Loss: 1.5510
Test Loss: 1.5624
Accuracy: 0.3571
Precision: 0.5094
Recall: 0.3571
F1: 0.3084
--------------------------------------------------


Epoch 15/35:  83%|████████▎ | 105/126 [04:21<00:49,  2.37s/it, loss=1.5309]


Train Metrics:
--------------------------------------------------
Epoch: 14
Train Loss: 1.5467
Test Loss: 1.5566
Accuracy: 0.3690
Precision: 0.3776
Recall: 0.3690
F1: 0.3529
--------------------------------------------------


Epoch 16/35: 100%|██████████| 126/126 [05:18<00:00,  2.53s/it, loss=1.5444]



Train Metrics:
--------------------------------------------------
Epoch: 15
Train Loss: 1.5417
Test Loss: 1.5554
Accuracy: 0.3175
Precision: 0.4628
Recall: 0.3175
F1: 0.2850
--------------------------------------------------


Epoch 17/35: 100%|██████████| 126/126 [05:19<00:00,  2.54s/it, loss=1.5374]



Train Metrics:
--------------------------------------------------
Epoch: 16
Train Loss: 1.5376
Test Loss: 1.5499
Accuracy: 0.3095
Precision: 0.2946
Recall: 0.3095
F1: 0.2525
--------------------------------------------------


Epoch 18/35: 100%|██████████| 126/126 [05:13<00:00,  2.49s/it, loss=1.5299]



Train Metrics:
--------------------------------------------------
Epoch: 17
Train Loss: 1.5326
Test Loss: 1.5440
Accuracy: 0.3492
Precision: 0.4130
Recall: 0.3492
F1: 0.2975
--------------------------------------------------


Epoch 19/35: 100%|██████████| 126/126 [06:40<00:00,  3.18s/it, loss=1.5336]



Train Metrics:
--------------------------------------------------
Epoch: 18
Train Loss: 1.5276
Test Loss: 1.5431
Accuracy: 0.4127
Precision: 0.4166
Recall: 0.4127
F1: 0.3917
--------------------------------------------------


Epoch 20/35: 100%|██████████| 126/126 [05:46<00:00,  2.75s/it, loss=1.5278]



Train Metrics:
--------------------------------------------------
Epoch: 19
Train Loss: 1.5247
Test Loss: 1.5419
Accuracy: 0.3333
Precision: 0.4717
Recall: 0.3333
F1: 0.2962
--------------------------------------------------


Epoch 21/35: 100%|██████████| 126/126 [05:49<00:00,  2.77s/it, loss=1.4796]



Train Metrics:
--------------------------------------------------
Epoch: 20
Train Loss: 1.5192
Test Loss: 1.5401
Accuracy: 0.3492
Precision: 0.3810
Recall: 0.3492
F1: 0.3200
--------------------------------------------------


Epoch 22/35: 100%|██████████| 126/126 [05:47<00:00,  2.76s/it, loss=1.4907]



Train Metrics:
--------------------------------------------------
Epoch: 21
Train Loss: 1.5159
Test Loss: 1.5351
Accuracy: 0.3611
Precision: 0.4847
Recall: 0.3611
F1: 0.3461
--------------------------------------------------


Epoch 23/35:   6%|▋         | 8/126 [00:22<04:58,  2.53s/it, loss=1.4846]