In [None]:
import os, shutil, random
from glob import glob

src_root = "../data/PlantVillage"
dst_root = "../data/plantvillage_split"

train_dir = os.path.join(dst_root, "train")
val_dir = os.path.join(dst_root, "val")

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

In [None]:
classes = os.listdir(src_root)
split_ratio = 0.8  # 80% train / 20% val

for cls in classes:
    src_cls = os.path.join(src_root, cls)
    images = glob(os.path.join(src_cls, "*.jpg"))

    random.shuffle(images)
    split = int(len(images) * split_ratio)

    train_imgs = images[:split]
    val_imgs = images[split:]

    os.makedirs(os.path.join(train_dir, cls), exist_ok=True)
    os.makedirs(os.path.join(val_dir, cls), exist_ok=True)

    for img in train_imgs:
        shutil.copy(img, os.path.join(train_dir, cls))

    for img in val_imgs:
        shutil.copy(img, os.path.join(val_dir, cls))

print("✔ Train/val split completed successfully!")


In [None]:
import os
import shutil

def delete_empty_plantvillage_folders(root_dir):
    """
    Deletes folders named 'PlantVillage' inside root_dir 
    ONLY if they are empty.
    """
    os.rmdir(root_dir)

    print("Done!")
delete_empty_plantvillage_folders("../data/plantvillage_split/train/PlantVillage")
delete_empty_plantvillage_folders("../data/plantvillage_split/val/PlantVillage")


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

base = os.path.abspath("..\\data\\plantvillage_split")
train_dir = os.path.join(base, "train")
val_dir = os.path.join(base, "val")

train_tfms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor()
])

val_tfms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])


train_data = datasets.ImageFolder(train_dir, 
                                  transform=train_tfms)
val_data = datasets.ImageFolder(val_dir, transform=val_tfms)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)

num_classes = len(train_data.classes)
print("Number of disease classes:", num_classes)

In [None]:
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [None]:
import os

root = "../data/plantvillage_split/train"

for folder in os.listdir(root):
    old = os.path.join(root, folder)
    new = os.path.join(root, folder.lower())
    if old != new:
        os.rename(old, new)

root = "../data/plantvillage_split/val"

for folder in os.listdir(root):
    old = os.path.join(root, folder)
    new = os.path.join(root, folder.lower())
    if old != new:
        os.rename(old, new)

In [None]:
import os

train_path = "../data/plantvillage_split/train"
val_path = "../data/plantvillage_split/val"

def clean_names(path):
    for folder in os.listdir(path):
        old = os.path.join(path, folder)
        clean = folder.replace("__", "_")  # fix double underscores
        clean = clean.lower().strip()      # enforce lowercase
        new = os.path.join(path, clean)

        if old != new:
            print("RENAMING:", old, "→", new)
            os.rename(old, new)

clean_names(train_path)
clean_names(val_path)

In [None]:
import os
import re

root_paths = [
    "../data/plantvillage_split/train",
    "../data/plantvillage_split/val"
]

def clean_name(name):
    name = name.lower()                     # lowercase
    name = re.sub(r'_+', '_', name)         # replace multiple underscores with 1
    name = name.strip('_')                  # remove leading/trailing underscore
    return name

for root in root_paths:
    for folder in os.listdir(root):
        old = os.path.join(root, folder)
        new_name = clean_name(folder)
        new = os.path.join(root, new_name)

        if old != new:
            print("RENAMING:", folder, "--->", new_name)
            os.rename(old, new)

In [22]:
import splitfolders

splitfolders.ratio(
    "../data/PlantVillage",
    output="../data/plantvillage_split",
    seed=42,
    ratio=(.8, .2)
)

In [24]:
import os
import shutil

def delete_empty_plantvillage_folders(root_dir):
    """
    Deletes folders named 'PlantVillage' inside root_dir 
    ONLY if they are empty.
    """
    os.rmdir(root_dir)

    print("Done!")
delete_empty_plantvillage_folders("../data/plantvillage_split/train/PlantVillage")
delete_empty_plantvillage_folders("../data/plantvillage_split/val/PlantVillage")


Done!
Done!


In [25]:
train_dir = "../data/plantvillage_split/train"
val_dir   = "../data/plantvillage_split/val"

train_data = datasets.ImageFolder(train_dir, transform=train_tfms)
val_data   = datasets.ImageFolder(val_dir, transform=val_tfms)

print("Train images:", len(train_data))
print("Val images:", len(val_data))
print("Classes:", train_data.classes)

Train images: 25976
Val images: 7845
Classes: ['Pepper__bell___Bacterial_spot', 'Pepper__bell___healthy', 'Potato___Early_blight', 'Potato___Late_blight', 'Potato___healthy', 'Tomato__Target_Spot', 'Tomato__Tomato_YellowLeaf__Curl_Virus', 'Tomato__Tomato_mosaic_virus', 'pepper_bell_bacterial_spot', 'pepper_bell_healthy', 'potato_early_blight', 'potato_healthy', 'potato_late_blight', 'tomato_bacterial_spot', 'tomato_early_blight', 'tomato_healthy', 'tomato_late_blight', 'tomato_leaf_mold', 'tomato_septoria_leaf_spot', 'tomato_spider_mites_two_spotted_spider_mite', 'tomato_target_spot', 'tomato_tomato_mosaic_virus', 'tomato_tomato_yellowleaf_curl_virus']


In [26]:
from torch.utils.data import DataLoader

batch_size = 32

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_data, batch_size=batch_size)

len(train_loader), len(val_loader)

(812, 246)