# 🍅 Tomato Disease Detection - Professional Training Notebook
This notebook trains a ResNet50 model on 20,000 images (5,000 per class).

**Classes:** Healthy, Others, Leaf Blight, Bacterial Spot

In [None]:
# 1. Install dependencies
!pip install -q opencv-python tensorflow tqdm

In [None]:
# 2. Imports
import os
import numpy as np
import tensorflow as tf
import cv2
from google.colab import drive, files
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, models
import shutil
import json

In [None]:
# 3. Connect Drive & Extract
drive.mount('/content/drive')

# Install 7-Zip for fast extraction
!apt-get install -y p7zip-full

print("Copying tomato.7z from Drive... (This might take a minute)")
!cp /content/drive/MyDrive/tomato.7z /content/tomato.7z

print("Extracting dataset... (20k images)")
!7z x /content/tomato.7z -o/content/ -y

print("\n✅ Folder extracted successfully!")

In [None]:
# 4. Configuration
DATA_DIR = '/content/tomato'
MODEL_SAVE_PATH = '/content/tomato_disease_model.h5'
MAPPING_SAVE_PATH = '/content/tomato_class_indices.json'
IMG_SIZE = (224, 224)
BATCH_SIZE = 32 # Balanced for 16GB RAM

# Reorganize folders to flatten Unhealthy children into main classes
FINAL_DATA_DIR = '/content/tomato_final'
if not os.path.exists(FINAL_DATA_DIR):
    os.makedirs(FINAL_DATA_DIR)
    # Move Healthy and Others
    shutil.copytree(f'{DATA_DIR}/Healthy', f'{FINAL_DATA_DIR}/Healthy')
    shutil.copytree(f'{DATA_DIR}/Others', f'{FINAL_DATA_DIR}/Others')
    # Move specific unhealthy classes
    unhealthy_path = f'{DATA_DIR}/Unhealthy'
    folders = [f for f in os.listdir(unhealthy_path) if os.path.isdir(os.path.join(unhealthy_path, f))]
    for f in folders:
        shutil.copytree(os.path.join(unhealthy_path, f), os.path.join(FINAL_DATA_DIR, f))

print(f"Dataset ready at: {FINAL_DATA_DIR}")
print("Classes found:", os.listdir(FINAL_DATA_DIR))

In [None]:
# 5. Data Generators (Memory Efficient)
datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet.preprocess_input,
    validation_split=0.2,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

train_generator = datagen.flow_from_directory(
    FINAL_DATA_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

val_generator = datagen.flow_from_directory(
    FINAL_DATA_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

class_names = list(train_generator.class_indices.keys())
print(f"\n✅ Success! Dataset contains {train_generator.samples + val_generator.samples} images across {len(class_names)} classes.")

In [None]:
# 6. Build the ResNet50 Model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

inputs = tf.keras.Input(shape=(224, 224, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.4)(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.4)(x)
outputs = layers.Dense(len(class_names), activation='softmax')(x)

model = models.Model(inputs, outputs)
model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
# 7. Phase 1: Train Head Layers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True, monitor='val_accuracy'),
    ModelCheckpoint(MODEL_SAVE_PATH, save_best_only=True, monitor='val_accuracy'),
    ReduceLROnPlateau(factor=0.2, patience=3, monitor='val_loss')
]

print("Starting PHASE 1...")
history1 = model.fit(train_generator, validation_data=val_generator, epochs=15, callbacks=callbacks)

In [None]:
# 8. Phase 2: Fine-Tuning
print("Starting PHASE 2 (Fine-Tuning Top 30 Layers)...")
base_model.trainable = True
for layer in base_model.layers[:-30]:
    layer.trainable = False

model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), loss='categorical_crossentropy', metrics=['accuracy'])

history2 = model.fit(train_generator, validation_data=val_generator, epochs=20, callbacks=callbacks)

In [None]:
# 9. Save and Download
class_mapping = {name: int(idx) for name, idx in train_generator.class_indices.items()}
with open(MAPPING_SAVE_PATH, 'w') as f:
    json.dump(class_mapping, f)

print(f"✅ Class Mapping: {class_mapping}")
print("📥 Downloading files...")
from google.colab import files
files.download(MODEL_SAVE_PATH)
files.download(MAPPING_SAVE_PATH)