In [4]:
!pip install torch pandas torchvision scikit-learn tqdm kaggle -q

In [2]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [5]:
!kaggle datasets download nirmalsankalana/sugarcane-leaf-disease-dataset
!unzip -q sugarcane-leaf-disease-dataset.zip -d data

Downloading cassava-leaf-disease-classification.zip to /home/ubuntu
100%|██████████████████████████████████████▉| 5.76G/5.76G [00:40<00:00, 230MB/s]
100%|███████████████████████████████████████| 5.76G/5.76G [00:40<00:00, 153MB/s]


In [3]:
import os
import shutil
import pandas as pd

# Define paths
data_root = "data"
images_dir = os.path.join(data_root, "images")

# Create images directory if it doesn't exist
os.makedirs(images_dir, exist_ok=True)

# List to store image paths and labels
dataset = []

# Loop through each subfolder
for subfolder in os.listdir(data_root):
    subfolder_path = os.path.join(data_root, subfolder)
    
    # Ensure it's a directory
    if os.path.isdir(subfolder_path) and subfolder != "images":
        # Loop through images inside the subfolder
        for image in os.listdir(subfolder_path):
            old_image_path = os.path.join(subfolder_path, image)
            
            # Ensure it's a file (image)
            if os.path.isfile(old_image_path):
                # Define new image path in "data/images" directory
                new_image_path = os.path.join(images_dir, image)
                
                # If filename already exists, rename it to avoid conflicts
                if os.path.exists(new_image_path):
                    base, ext = os.path.splitext(image)
                    counter = 1
                    while os.path.exists(new_image_path):
                        new_image_path = os.path.join(images_dir, f"{base}_{counter}{ext}")
                        counter += 1
                
                # Move image
                shutil.move(old_image_path, new_image_path)

                # Append to dataset with updated path and original label
                dataset.append({"image_path": new_image_path, "label": subfolder})

        # Optionally remove empty subfolder after moving images
        os.rmdir(subfolder_path)

df = pd.DataFrame(dataset)
df = df.rename(columns={'image_path':'image_id'})
df["image_id"] = df["image_id"].str.replace("data/images/", "", regex=False)

from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["label"])

df.to_csv(os.path.join(data_root, "dataset.csv"), index=False)

label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

In [None]:
import os

import pandas as pd
import torch
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# df = pd.read_csv("train.csv")
# df = pd.read_csv("cassava_data/train.csv")

# print(df.label.value_counts())
# balanced_df = pd.DataFrame()

# for label in df["label"].unique():
#     label_df = df[df["label"] == label]
#     if len(label_df) > 1000:
#         _, sampled_df = train_test_split(label_df, test_size=500, random_state=42, stratify=label_df["label"])
#     balanced_df = pd.concat([balanced_df, sampled_df])



In [None]:
train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["label"])
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42, stratify=temp_df["label"])


train_dataset = Dataset(train_df)
test_dataset = Dataset(test_df)
val_dataset = Dataset(val_df)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# just change batch size to 12 and check
# 7 fold cross validation

In [None]:
trainer = Trainer(model, train_loader, test_loader, val_loader, 1e-4, 40)
# 35 epochs - change
# gridsearch - apply on LR, weight decay and momentum

In [None]:
trainer.train()

  with amp.autocast():
Epoch 1/40: 100%|██████████| 63/63 [01:25<00:00,  1.36s/it, loss=1.6206]
  with amp.autocast():



Epoch 1: Train Loss = 1.6118 | Val Loss = 1.6081

Validation Metrics:
--------------------------------------------------
Accuracy: 0.2055
Precision: 0.0422
Recall: 0.2055
F1: 0.0701
--------------------------------------------------
GPU Memory allocated: 2.62 GB


Epoch 2/40: 100%|██████████| 63/63 [01:16<00:00,  1.21s/it, loss=1.6312]



Epoch 2: Train Loss = 1.6081 | Val Loss = 1.6062

Validation Metrics:
--------------------------------------------------
Accuracy: 0.2095
Precision: 0.2400
Recall: 0.2095
F1: 0.0781
--------------------------------------------------
GPU Memory allocated: 2.62 GB


Epoch 3/40: 100%|██████████| 63/63 [01:16<00:00,  1.21s/it, loss=1.5919]



Epoch 3: Train Loss = 1.6066 | Val Loss = 1.6049

Validation Metrics:
--------------------------------------------------
Accuracy: 0.2055
Precision: 0.0422
Recall: 0.2055
F1: 0.0701
--------------------------------------------------
GPU Memory allocated: 2.62 GB


Epoch 4/40: 100%|██████████| 63/63 [01:16<00:00,  1.21s/it, loss=1.6083]



Epoch 4: Train Loss = 1.6069 | Val Loss = 1.6035

Validation Metrics:
--------------------------------------------------
Accuracy: 0.2055
Precision: 0.0422
Recall: 0.2055
F1: 0.0701
--------------------------------------------------
GPU Memory allocated: 2.62 GB


Epoch 5/40: 100%|██████████| 63/63 [01:16<00:00,  1.21s/it, loss=1.6052]



Epoch 5: Train Loss = 1.6029 | Val Loss = 1.6021

Validation Metrics:
--------------------------------------------------
Accuracy: 0.2451
Precision: 0.2003
Recall: 0.2451
F1: 0.1434
--------------------------------------------------
GPU Memory allocated: 2.62 GB


Epoch 6/40: 100%|██████████| 63/63 [01:16<00:00,  1.22s/it, loss=1.6167]



Epoch 6: Train Loss = 1.6032 | Val Loss = 1.6012

Validation Metrics:
--------------------------------------------------
Accuracy: 0.2095
Precision: 0.0439
Recall: 0.2095
F1: 0.0726
--------------------------------------------------
GPU Memory allocated: 2.62 GB


Epoch 7/40: 100%|██████████| 63/63 [01:16<00:00,  1.21s/it, loss=1.5872]



Epoch 7: Train Loss = 1.6019 | Val Loss = 1.5979

Validation Metrics:
--------------------------------------------------
Accuracy: 0.2925
Precision: 0.3357
Recall: 0.2925
F1: 0.2296
--------------------------------------------------
GPU Memory allocated: 2.62 GB


Epoch 8/40: 100%|██████████| 63/63 [01:16<00:00,  1.22s/it, loss=1.5970]



Epoch 8: Train Loss = 1.5988 | Val Loss = 1.5962

Validation Metrics:
--------------------------------------------------
Accuracy: 0.2885
Precision: 0.4138
Recall: 0.2885
F1: 0.1982
--------------------------------------------------
GPU Memory allocated: 2.62 GB


Epoch 9/40: 100%|██████████| 63/63 [01:17<00:00,  1.22s/it, loss=1.6189]



Epoch 9: Train Loss = 1.5970 | Val Loss = 1.5957

Validation Metrics:
--------------------------------------------------
Accuracy: 0.3083
Precision: 0.4274
Recall: 0.3083
F1: 0.2093
--------------------------------------------------
GPU Memory allocated: 2.62 GB


Epoch 10/40: 100%|██████████| 63/63 [01:16<00:00,  1.22s/it, loss=1.6052]



Epoch 10: Train Loss = 1.5958 | Val Loss = 1.5930

Validation Metrics:
--------------------------------------------------
Accuracy: 0.3123
Precision: 0.2284
Recall: 0.3123
F1: 0.2381
--------------------------------------------------
GPU Memory allocated: 2.62 GB


Epoch 11/40: 100%|██████████| 63/63 [01:16<00:00,  1.21s/it, loss=1.5938]



Epoch 11: Train Loss = 1.5935 | Val Loss = 1.5924

Validation Metrics:
--------------------------------------------------
Accuracy: 0.3597
Precision: 0.3786
Recall: 0.3597
F1: 0.3160
--------------------------------------------------
GPU Memory allocated: 2.62 GB


Epoch 12/40:  25%|██▌       | 16/63 [00:19<00:55,  1.19s/it, loss=1.5981]

In [None]:
# trainer.train_epoch(1)