In [1]:
# Upload your kaggle.json first
from google.colab import files
files.upload()  # Upload kaggle.json here

# Set up Kaggle API
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


Saving kaggle.json to kaggle.json


In [2]:
# Download and unzip PlantVillage dataset
!kaggle datasets download -d emmarex/plantdisease
!unzip -q plantdisease.zip -d data/

# Check structure
!ls data/plantvillage


Dataset URL: https://www.kaggle.com/datasets/emmarex/plantdisease
License(s): unknown
Downloading plantdisease.zip to /content
 94% 615M/658M [00:03<00:00, 235MB/s]
100% 658M/658M [00:03<00:00, 177MB/s]
PlantVillage


In [3]:
import os
import torch
import torchvision
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from tqdm import tqdm


In [4]:
# Hyperparameters
BATCH_SIZE = 32
NUM_EPOCHS = 10
LEARNING_RATE = 1e-4
FREEZE_FEATURES = True
MODEL_DIR = "/content/drive/MyDrive/ML"
MODEL_NAME = "resnet50_plant_disease.pth"

# Data paths
train_dir = "data/plantvillage/PlantVillage/Train"
val_dir = "data/plantvillage/PlantVillage/Validation"

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

def create_dataloaders(data_dir, batch_size=32, val_split=0.2):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]) 

    # Load full dataset
    full_dataset = datasets.ImageFolder(root=data_dir, transform=transform)
    class_names = full_dataset.classes
    total_size = len(full_dataset)
    val_size = int(val_split * total_size)
    train_size = total_size - val_size

    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

    return train_loader, val_loader, len(class_names)


In [7]:
data_dir = "data/plantvillage/PlantVillage"
train_loader, val_loader, num_classes = create_dataloaders(data_dir, batch_size=BATCH_SIZE)

In [8]:
def create_model(num_classes, freeze_features=True):
    model = models.resnet50(pretrained=True)
    if freeze_features:
        for param in model.parameters():
            param.requires_grad = False
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model.to(device)

model = create_model(num_classes, FREEZE_FEATURES)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 194MB/s]


In [9]:
def calculate_accuracy(y_pred, y_true):
    correct = (y_pred.argmax(dim=1) == y_true).sum().item()
    return correct / y_true.size(0)

def save_model(model, save_dir, filename):
    os.makedirs(save_dir, exist_ok=True)
    save_path = os.path.join(save_dir, filename)
    torch.save(model.state_dict(), save_path)
    print(f"✅ Model saved at: {save_path}")


In [10]:
def train_one_epoch(model, dataloader, loss_fn, optimizer, device):
    model.train()
    running_loss = 0.0

    for inputs, targets in tqdm(dataloader, desc="Training"):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    return running_loss / len(dataloader)

def eval_model(model, dataloader, loss_fn, device):
    model.eval()
    total_loss = 0.0
    total_acc = 0.0
    total_samples = 0

    with torch.no_grad():
        for inputs, targets in tqdm(dataloader, desc="Evaluating"):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)
            acc = calculate_accuracy(outputs, targets)

            total_loss += loss.item()
            total_acc += acc * inputs.size(0)
            total_samples += inputs.size(0)

    return total_loss / len(dataloader), total_acc / total_samples


In [11]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=LEARNING_RATE)

for epoch in range(NUM_EPOCHS):
    print(f"\n🔁 Epoch {epoch+1}/{NUM_EPOCHS}")
    train_loss = train_one_epoch(model, train_loader, loss_fn, optimizer, device)
    val_loss, val_acc = eval_model(model, val_loader, loss_fn, device)

    print(f"📊 Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2%}")



🔁 Epoch 1/10


Training: 100%|██████████| 516/516 [01:01<00:00,  8.41it/s]
Evaluating: 100%|██████████| 129/129 [00:15<00:00,  8.10it/s]


📊 Train Loss: 1.6594 | Val Loss: 1.0414 | Val Acc: 78.41%

🔁 Epoch 2/10


Training: 100%|██████████| 516/516 [01:01<00:00,  8.43it/s]
Evaluating: 100%|██████████| 129/129 [00:14<00:00,  8.87it/s]


📊 Train Loss: 0.9081 | Val Loss: 0.6961 | Val Acc: 85.39%

🔁 Epoch 3/10


Training: 100%|██████████| 516/516 [01:00<00:00,  8.48it/s]
Evaluating: 100%|██████████| 129/129 [00:14<00:00,  8.78it/s]


📊 Train Loss: 0.6737 | Val Loss: 0.5456 | Val Acc: 87.01%

🔁 Epoch 4/10


Training: 100%|██████████| 516/516 [01:02<00:00,  8.32it/s]
Evaluating: 100%|██████████| 129/129 [00:14<00:00,  8.75it/s]


📊 Train Loss: 0.5627 | Val Loss: 0.4699 | Val Acc: 88.81%

🔁 Epoch 5/10


Training: 100%|██████████| 516/516 [01:00<00:00,  8.48it/s]
Evaluating: 100%|██████████| 129/129 [00:14<00:00,  8.66it/s]


📊 Train Loss: 0.4910 | Val Loss: 0.4156 | Val Acc: 89.85%

🔁 Epoch 6/10


Training: 100%|██████████| 516/516 [01:01<00:00,  8.45it/s]
Evaluating: 100%|██████████| 129/129 [00:14<00:00,  8.71it/s]


📊 Train Loss: 0.4425 | Val Loss: 0.3790 | Val Acc: 90.62%

🔁 Epoch 7/10


Training: 100%|██████████| 516/516 [01:00<00:00,  8.48it/s]
Evaluating: 100%|██████████| 129/129 [00:14<00:00,  8.81it/s]


📊 Train Loss: 0.4072 | Val Loss: 0.3522 | Val Acc: 90.99%

🔁 Epoch 8/10


Training: 100%|██████████| 516/516 [01:00<00:00,  8.51it/s]
Evaluating: 100%|██████████| 129/129 [00:14<00:00,  8.80it/s]


📊 Train Loss: 0.3779 | Val Loss: 0.3291 | Val Acc: 90.74%

🔁 Epoch 9/10


Training: 100%|██████████| 516/516 [01:02<00:00,  8.30it/s]
Evaluating: 100%|██████████| 129/129 [00:14<00:00,  8.76it/s]


📊 Train Loss: 0.3541 | Val Loss: 0.3119 | Val Acc: 91.96%

🔁 Epoch 10/10


Training: 100%|██████████| 516/516 [01:00<00:00,  8.50it/s]
Evaluating: 100%|██████████| 129/129 [00:14<00:00,  8.82it/s]

📊 Train Loss: 0.3342 | Val Loss: 0.2988 | Val Acc: 91.54%





In [12]:
save_model(model, MODEL_DIR, MODEL_NAME)

✅ Model saved at: /content/drive/MyDrive/ML/resnet50_plant_disease.pth
