In [None]:
%cd /content/drive/MyDrive/CropLeafDiseaseDetectionApp/crop-leaf-disease-detection

# Plant Disease dataset download from Kaggle

In [None]:
!pip install kaggle

In [None]:
# 1) Upload kaggle.json once per session
from google.colab import files
files.upload()  # choose kaggle.json

# 2) Configure
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# 3) Download + unzip into data/
!kaggle datasets download -d emmarex/plantdisease -p data/
!unzip data/plantdisease.zip -d data/
!rm -rf data/plantdisease.zip

# Crop Leaf Disease Detection — Starter Notebook

This Colab notebook initializes your environment and sets up the project structure for Fastai/PyTorch.

**Sections:**
1. Setup & Dependencies  
2. Dataset Download & Organization  
3. Data Exploration & Visualization  
4. Baseline Model (Fastai CNN)  
5. Evaluation & Metrics  
6. Improvements (Transfer Learning, Augmentations)  


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [3]:
# ===============================
# 1) Setup & Dependencies
# ===============================
import torch, torchvision
from fastai.vision.all import *
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
print('✅ Setup complete. Torch:', torch.__version__, '| CUDA available:', torch.cuda.is_available())


✅ Setup complete. Torch: 2.8.0+cu126 | CUDA available: True


In [None]:
# ===============================
# 2) GPU check (optional)
# ===============================
!nvidia-smi || echo 'No NVIDIA GPU available in this runtime.'


In [5]:
# ===============================
# 3) (Optional) Mount Google Drive if your data is stored there
# ===============================
USE_DRIVE = False  # set to True if you want to mount
if USE_DRIVE:
    from google.colab import drive
    drive.mount('/content/drive')
    print('Drive mounted.')


## Next Steps
- **Dataset Download & Organization**: Put your dataset under `data/` with subfolders per class (e.g., `data/train/<class_name>/...`, `data/valid/<class_name>/...`).
- **Exploration**: Visualize a few images and check class counts.
- **Baseline**: Create a `DataBlock` and a `Learner` with a simple CNN (or use transfer learning with ResNet-34/50).
- **Metrics**: Track accuracy, confusion matrix.
- **Improvements**: Augmentations, LR finder, unfreezing, Grad-CAM.


In [5]:
!pip -q install fastai==2.*  # safe version pin

from fastai.vision.all import *
from pathlib import Path

In [None]:
# fastai helper for counting classes & images in data/PlantVillage/<class>/*.ext
data_path = Path('data/PlantVillage')  # adjust if needed
assert data_path.exists(), f"Path not found: {data_path.resolve()}"

# List class folders at top-level
class_dirs = [p for p in data_path.iterdir() if p.is_dir()]
print(f"Found {len(class_dirs)} classes")

# Per-class counts (recursive; handles jpg/jpeg/png/bmp/tif/webp, case-insensitive)
#get_image_files(...) is fastai’s robust image finder:
#it searches recursively and recognizes common image extensions regardless of case
per_class_counts = {}
for d in class_dirs:
    per_class_counts[d.name] = len(get_image_files(d))

# Preview first 10 classes
for name in sorted(per_class_counts)[:10]:
    print(f" - {name} → {per_class_counts[name]}")

# Totals
total_imgs = sum(per_class_counts.values())
print(f"\nTotal images across all classes: {total_imgs}")

# (Optional) show a few example file paths to sanity-check
some_imgs = get_image_files(data_path)[:5]
print("\nSample files:")
for f in some_imgs:
    print(" •", f.relative_to(data_path))


In [None]:
# ===============================
# 4) Fastai DataBlock (template)
# ===============================
from fastai.vision.all import *
data_path = Path('data')  # adjust if your dataset lives elsewhere

# Example: assumes ImageFolder structure under data/
dls = ImageDataLoaders.from_folder(
    data_path,
    train='train',
    valid='valid',
    item_tfms=Resize(224)
)
dls.show_batch(max_n=8)

# Baseline learner with a pretrained model (ResNet-34)
learn = vision_learner(dls, resnet34, metrics=accuracy)
learn.fine_tune(1)


In [None]:
# ===============================
# 5) Evaluation
# ===============================
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(6,6))
interp.most_confused(min_val=2)


In [11]:
!git status

Refresh index:  87% (7/8)Refresh index: 100% (8/8)Refresh index: 100% (8/8), done.
On branch main
Your branch is up to date with 'origin/main'.

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   notebooks/starter.ipynb[m

no changes added to commit (use "git add" and/or "git commit -a")


In [12]:
!git add notebooks/starter.ipynb
!git commit -m "Sanity checks and quick preview."
!git push

Author identity unknown

*** Please tell me who you are.

Run

  git config --global user.email "you@example.com"
  git config --global user.name "Your Name"

to set your account's default identity.
Omit --global to set the identity only in this repository.

fatal: unable to auto-detect email address (got 'root@6dc6c9744f75.(none)')
Everything up-to-date
