# üçÖ Tomato Leaf Disease Detection - YOLOv11 Training (30 Epochs)

This notebook trains a YOLOv11 model on PlantVillage tomato disease dataset.

**Target Classes:**
1. Tomato_Bacterial_spot
2. Tomato_Early_blight
3. Tomato_Late_blight
4. Tomato_Septoria_leaf_spot
5. Tomato_Tomato_mosaic_virus
6. Tomato_healthy

**Training Configuration:**
- Epochs: 30
- Image Size: 640
- Batch Size: 16
- Model: YOLOv11n (nano)

## üìã Step 1: Setup Environment

In [None]:
# Check GPU availability
!nvidia-smi

In [None]:
# Install required packages
!pip install -q ultralytics kaggle

# Import libraries
import os
import shutil
from pathlib import Path
from google.colab import files
import yaml
from ultralytics import YOLO

print("‚úÖ Environment setup complete!")

## üîë Step 2: Setup Kaggle API Credentials

In [None]:
# Upload your kaggle.json file
print("üì§ Please upload your kaggle.json file")
print("   Get it from: https://www.kaggle.com/account")
uploaded = files.upload()

# Setup Kaggle credentials
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

print("‚úÖ Kaggle API configured!")

## üì• Step 3: Download PlantVillage Dataset

In [None]:
# Download PlantVillage dataset from Kaggle
!kaggle datasets download -d arjuntejaswi/plant-village --unzip -p /content/plantvillage

print("‚úÖ Dataset downloaded!")

## üçÖ Step 4: Filter Tomato Classes

In [None]:
# Define target tomato classes
TOMATO_CLASSES = [
    'Tomato_Bacterial_spot',
    'Tomato_Early_blight',
    'Tomato_Late_blight',
    'Tomato_Septoria_leaf_spot',
    'Tomato_Tomato_mosaic_virus',
    'Tomato_healthy'
]

# Find dataset root
dataset_root = Path('/content/plantvillage')
possible_roots = [
    dataset_root / 'PlantVillage',
    dataset_root / 'New Plant Diseases Dataset(Augmented)' / 'New Plant Diseases Dataset(Augmented)',
    dataset_root
]

source_dir = None
for root in possible_roots:
    if root.exists():
        tomato_folders = [f for f in root.iterdir() if f.is_dir() and 'Tomato' in f.name]
        if tomato_folders:
            source_dir = root
            break

if source_dir is None:
    # Search recursively
    for item in dataset_root.rglob('*'):
        if item.is_dir() and 'Tomato' in item.name:
            source_dir = item.parent
            break

print(f"üìÅ Dataset found at: {source_dir}")

# Filter and copy tomato classes
filtered_dir = Path('/content/tomato_filtered')
filtered_dir.mkdir(exist_ok=True)

stats = {}
for class_name in TOMATO_CLASSES:
    source_class = source_dir / class_name
    if source_class.exists():
        dest_class = filtered_dir / class_name
        shutil.copytree(source_class, dest_class, dirs_exist_ok=True)
        
        image_count = len(list(dest_class.glob('*.jpg'))) + len(list(dest_class.glob('*.JPG')))
        stats[class_name] = image_count
        print(f"‚úÖ {class_name}: {image_count} images")

print(f"\nüìä Total: {sum(stats.values())} images across {len(stats)} classes")

## üì¶ Step 5: Convert to YOLO Format

In [None]:
import random
from PIL import Image

# Create YOLO directory structure
yolo_dir = Path('/content/tomato_yolo')
for split in ['train', 'val', 'test']:
    (yolo_dir / split / 'images').mkdir(parents=True, exist_ok=True)
    (yolo_dir / split / 'labels').mkdir(parents=True, exist_ok=True)

# Split ratios
TRAIN_RATIO = 0.7
VAL_RATIO = 0.2
TEST_RATIO = 0.1

# Process each class
class_mapping = {name: idx for idx, name in enumerate(TOMATO_CLASSES)}
split_counts = {'train': 0, 'val': 0, 'test': 0}

for class_name, class_id in class_mapping.items():
    class_dir = filtered_dir / class_name
    images = list(class_dir.glob('*.jpg')) + list(class_dir.glob('*.JPG'))
    random.shuffle(images)
    
    n_train = int(len(images) * TRAIN_RATIO)
    n_val = int(len(images) * VAL_RATIO)
    
    splits = {
        'train': images[:n_train],
        'val': images[n_train:n_train+n_val],
        'test': images[n_train+n_val:]
    }
    
    for split_name, split_images in splits.items():
        for img_path in split_images:
            # Copy image
            img_name = f"{class_name}_{img_path.stem}{img_path.suffix}"
            dest_img = yolo_dir / split_name / 'images' / img_name
            shutil.copy2(img_path, dest_img)
            
            # Create label (full image bounding box)
            label_name = f"{class_name}_{img_path.stem}.txt"
            label_path = yolo_dir / split_name / 'labels' / label_name
            with open(label_path, 'w') as f:
                f.write(f"{class_id} 0.5 0.5 1.0 1.0\n")
            
            split_counts[split_name] += 1

print("üìä Dataset Split:")
print(f"   Train: {split_counts['train']} images")
print(f"   Val: {split_counts['val']} images")
print(f"   Test: {split_counts['test']} images")
print("‚úÖ YOLO format conversion complete!")

## üìù Step 6: Create Dataset YAML

In [None]:
# Create dataset.yaml
dataset_yaml = {
    'path': str(yolo_dir),
    'train': 'train/images',
    'val': 'val/images',
    'test': 'test/images',
    'nc': len(TOMATO_CLASSES),
    'names': TOMATO_CLASSES
}

yaml_path = yolo_dir / 'dataset.yaml'
with open(yaml_path, 'w') as f:
    yaml.dump(dataset_yaml, f, default_flow_style=False)

print(f"‚úÖ Dataset YAML created at: {yaml_path}")
print("\nüìÑ Content:")
print(yaml.dump(dataset_yaml, default_flow_style=False))

## üöÄ Step 7: Train YOLOv11 Model (30 Epochs)

In [None]:
# Initialize YOLOv11n model
model = YOLO('yolo11n.pt')

# Train the model
results = model.train(
    data=str(yaml_path),
    epochs=30,
    imgsz=640,
    batch=16,
    device=0,
    name='tomato_disease_yolo11n',
    augment=True,
    degrees=15,
    scale=0.2,
    translate=0.1,
    flipud=0.2,
    fliplr=0.5,
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4
)

print("‚úÖ Training complete!")

## üìä Step 8: Validate Model

In [None]:
# Validate on test set
metrics = model.val(data=str(yaml_path), split='test')

print("\nüìä Validation Metrics:")
print(f"   mAP50: {metrics.box.map50:.4f}")
print(f"   mAP50-95: {metrics.box.map:.4f}")
print(f"   Precision: {metrics.box.mp:.4f}")
print(f"   Recall: {metrics.box.mr:.4f}")

## üíæ Step 9: Export Model

In [None]:
# Export to TFLite
model.export(format='tflite', int8=True, data=str(yaml_path))

print("‚úÖ Model exported to TFLite format!")

## üì• Step 10: Download Trained Model

In [None]:
# Find the best model
best_model_path = Path('/content/runs/detect/tomato_disease_yolo11n/weights/best.pt')
tflite_model_path = Path('/content/runs/detect/tomato_disease_yolo11n/weights/best_saved_model/best_int8.tflite')

# Download PyTorch model
if best_model_path.exists():
    files.download(str(best_model_path))
    print(f"‚úÖ Downloaded: {best_model_path.name}")

# Download TFLite model
if tflite_model_path.exists():
    files.download(str(tflite_model_path))
    print(f"‚úÖ Downloaded: {tflite_model_path.name}")

# Download training results
results_img = Path('/content/runs/detect/tomato_disease_yolo11n/results.png')
if results_img.exists():
    files.download(str(results_img))
    print(f"‚úÖ Downloaded: results.png")

## üéØ Step 11: Test Inference

In [None]:
# Test on a sample image
test_images = list((yolo_dir / 'test' / 'images').glob('*.jpg'))[:5]

for test_img in test_images:
    results = model.predict(source=str(test_img), save=True, conf=0.5)
    print(f"‚úÖ Processed: {test_img.name}")

print("\nüìÅ Results saved to: /content/runs/detect/predict")

## üìä Step 12: Display Training Results

In [None]:
from IPython.display import Image as IPImage, display

# Display training curves
results_path = Path('/content/runs/detect/tomato_disease_yolo11n/results.png')
if results_path.exists():
    display(IPImage(filename=str(results_path)))

# Display confusion matrix
confusion_matrix_path = Path('/content/runs/detect/tomato_disease_yolo11n/confusion_matrix.png')
if confusion_matrix_path.exists():
    display(IPImage(filename=str(confusion_matrix_path)))

## ‚úÖ Training Complete!

Your YOLOv11 model has been trained for 30 epochs on the 6 tomato disease classes.

**Files Generated:**
- `best.pt` - Best PyTorch model
- `best_int8.tflite` - Quantized TFLite model for mobile deployment
- `results.png` - Training metrics visualization
- `confusion_matrix.png` - Model performance analysis

**Next Steps:**
1. Download the models using the cells above
2. Integrate into your Android app
3. Test with real tomato leaf images