# Model 2 Training: Corn & Blackgram Disease Detection

**Goal:** Train a robust disease classification model for Corn and Blackgram crops

**Key Improvements:**
- ‚úÖ Documented preprocessing (must match prediction code!)
- ‚úÖ Support for negative samples (non-crop images)
- ‚úÖ Data augmentation
- ‚úÖ Validation split
- ‚úÖ Early stopping
- ‚úÖ Model checkpointing

## 1. Setup & Imports

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

# Check TensorFlow version
print(f'TensorFlow version: {tf.__version__}')
print(f'GPU Available: {tf.config.list_physical_devices("GPU")}')

## 2. Configuration

**IMPORTANT:** These settings must match your prediction code!

In [None]:
# ==================== CRITICAL SETTINGS ====================
# These MUST match the prediction code in model_manager.py!

IMG_SIZE = 224  # Image dimensions (224x224)
BATCH_SIZE = 32
EPOCHS = 50

# PREPROCESSING: Choose ONE and document it!
# Option 1: Normalize to [0, 1]
RESCALE = 1./255.0
# Option 2: ResNet preprocessing (use preprocess_input)
# from tensorflow.keras.applications.resnet50 import preprocess_input

# ===========================================================

# Dataset paths
DATA_DIR = 'datasets/corn_blackgram'  # Update this path!

# Class names (must match folder names in dataset)
CLASS_NAMES = [
    'Blackgram_Healthy',
    'Blackgram_Anthracnose',
    'Blackgram_Yellow_Mosaic',
    'Blackgram_Leaf_Crinkle',
    'Blackgram_Powdery_Mildew',
    'Corn_Healthy',
    'Corn_Common_Rust',
    'Corn_Gray_Leaf_Spot',
    'Corn_Blight',
    # 'Not_A_Plant'  # Add this if you have negative samples!
]

NUM_CLASSES = len(CLASS_NAMES)
print(f'Number of classes: {NUM_CLASSES}')
print(f'Class names: {CLASS_NAMES}')

## 3. Dataset Structure Check

Expected folder structure:
```
datasets/corn_blackgram/
‚îú‚îÄ‚îÄ train/
‚îÇ   ‚îú‚îÄ‚îÄ Blackgram_Healthy/
‚îÇ   ‚îú‚îÄ‚îÄ Blackgram_Anthracnose/
‚îÇ   ‚îú‚îÄ‚îÄ Corn_Healthy/
‚îÇ   ‚îî‚îÄ‚îÄ ...
‚îî‚îÄ‚îÄ validation/  (optional, will split from train if not present)
    ‚îú‚îÄ‚îÄ Blackgram_Healthy/
    ‚îî‚îÄ‚îÄ ...
```

In [None]:
# Check if dataset exists
data_path = Path(DATA_DIR)
if not data_path.exists():
    print(f"‚ùå Dataset not found at: {DATA_DIR}")
    print("Please create the dataset folder and organize images by class!")
else:
    print(f"‚úÖ Dataset found at: {DATA_DIR}")
    
    # Check for train/validation split
    train_path = data_path / 'train'
    val_path = data_path / 'validation'
    
    if train_path.exists():
        print(f"‚úÖ Train folder found")
        # Count images per class
        for class_name in CLASS_NAMES:
            class_path = train_path / class_name
            if class_path.exists():
                num_images = len(list(class_path.glob('*.jpg'))) + len(list(class_path.glob('*.png')))
                print(f"   - {class_name}: {num_images} images")
    else:
        print("‚ö†Ô∏è No 'train' folder - will use entire dataset")
    
    if val_path.exists():
        print(f"‚úÖ Validation folder found")
    else:
        print("‚ö†Ô∏è No 'validation' folder - will split from training data (80/20)")

## 4. Data Generators with Augmentation

In [None]:
# Training data augmentation
train_datagen = ImageDataGenerator(
    rescale=RESCALE,  # CRITICAL: Must match prediction!
    validation_split=0.2,  # 80% train, 20% validation
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Validation data (no augmentation, only rescaling)
val_datagen = ImageDataGenerator(
    rescale=RESCALE,  # CRITICAL: Must match prediction!
    validation_split=0.2
)

# Create train generator
train_generator = train_datagen.flow_from_directory(
    DATA_DIR if not (Path(DATA_DIR) / 'train').exists() else str(Path(DATA_DIR) / 'train'),
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True,
    seed=42
)

# Create validation generator
val_generator = val_datagen.flow_from_directory(
    DATA_DIR if not (Path(DATA_DIR) / 'train').exists() else str(Path(DATA_DIR) / 'train'),
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False,
    seed=42
)

print(f"\n‚úÖ Data generators created:")
print(f"   Training samples: {train_generator.samples}")
print(f"   Validation samples: {val_generator.samples}")
print(f"   Class indices: {train_generator.class_indices}")

## 5. Visualize Sample Images

In [None]:
# Get a batch of images
sample_images, sample_labels = next(train_generator)

# Plot first 9 images
plt.figure(figsize=(12, 12))
for i in range(9):
    plt.subplot(3, 3, i + 1)
    # Denormalize for display
    img = sample_images[i]
    if RESCALE == 1./255.0:
        img = img  # Already in [0, 1] for imshow
    plt.imshow(img)
    
    # Get class name
    class_idx = np.argmax(sample_labels[i])
    class_name = list(train_generator.class_indices.keys())[class_idx]
    plt.title(class_name)
    plt.axis('off')
plt.tight_layout()
plt.show()

## 6. Build Model Architecture

Using Transfer Learning with ResNet50 (pre-trained on ImageNet)

In [None]:
from tensorflow.keras.applications import ResNet50

# Load pre-trained ResNet50 (without top layers)
base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(IMG_SIZE, IMG_SIZE, 3)
)

# Freeze base model layers (optional: unfreeze later for fine-tuning)
base_model.trainable = False

# Build complete model
model = keras.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.5),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(NUM_CLASSES, activation='softmax')
])

# Compile model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy', keras.metrics.TopKCategoricalAccuracy(k=3, name='top_3_accuracy')]
)

model.summary()

## 7. Setup Callbacks

In [None]:
# Create checkpoints directory
os.makedirs('checkpoints', exist_ok=True)

callbacks = [
    # Save best model
    ModelCheckpoint(
        'checkpoints/model2_best.h5',
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=1
    ),
    
    # Early stopping
    EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    
    # Reduce learning rate on plateau
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-7,
        verbose=1
    )
]

print("‚úÖ Callbacks configured")

## 8. Train Model

In [None]:
print("üöÄ Starting training...\n")

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    callbacks=callbacks,
    verbose=1
)

print("\n‚úÖ Training complete!")

## 9. Visualize Training History

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Accuracy
axes[0].plot(history.history['accuracy'], label='Train Accuracy')
axes[0].plot(history.history['val_accuracy'], label='Val Accuracy')
axes[0].set_title('Model Accuracy')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True)

# Loss
axes[1].plot(history.history['loss'], label='Train Loss')
axes[1].plot(history.history['val_loss'], label='Val Loss')
axes[1].set_title('Model Loss')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.savefig('training_history.png')
plt.show()

# Print final metrics
print(f"\nFinal Metrics:")
print(f"Train Accuracy: {history.history['accuracy'][-1]:.4f}")
print(f"Val Accuracy: {history.history['val_accuracy'][-1]:.4f}")
print(f"Train Loss: {history.history['loss'][-1]:.4f}")
print(f"Val Loss: {history.history['val_loss'][-1]:.4f}")

## 10. Save Final Model & Preprocessing Info

In [None]:
# Save the final model
model.save('Model2(Corn and Blackgram).h5')
print("‚úÖ Model saved as 'Model2(Corn and Blackgram).h5'")

# CRITICAL: Save preprocessing info
import json

preprocessing_info = {
    "model_name": "Model2(Corn and Blackgram)",
    "img_size": IMG_SIZE,
    "rescale": RESCALE,
    "preprocessing": "Normalize to [0, 1] by dividing by 255.0",
    "classes": list(train_generator.class_indices.keys()),
    "num_classes": NUM_CLASSES,
    "training_accuracy": float(history.history['accuracy'][-1]),
    "validation_accuracy": float(history.history['val_accuracy'][-1])
}

with open('model2_info.json', 'w') as f:
    json.dump(preprocessing_info, f, indent=2)

print("‚úÖ Preprocessing info saved to 'model2_info.json'")
print("\n‚ö†Ô∏è IMPORTANT: Use this EXACT preprocessing in prediction code!")
print(f"   Rescale: {RESCALE}")
print(f"   Image size: {IMG_SIZE}x{IMG_SIZE}")

## 11. Test Prediction (Sample)

Test the model on a sample image to verify it works

In [None]:
from tensorflow.keras.preprocessing import image as keras_image

# Load a test image (update path)
test_image_path = 'path/to/test/image.jpg'  # CHANGE THIS!

if os.path.exists(test_image_path):
    # Load and preprocess
    img = keras_image.load_img(test_image_path, target_size=(IMG_SIZE, IMG_SIZE))
    img_array = keras_image.img_to_array(img)
    img_array = img_array * RESCALE  # Apply same preprocessing!
    img_array = np.expand_dims(img_array, axis=0)
    
    # Predict
    predictions = model.predict(img_array)
    
    # Get top prediction
    class_idx = np.argmax(predictions[0])
    confidence = predictions[0][class_idx]
    class_name = list(train_generator.class_indices.keys())[class_idx]
    
    # Display
    plt.figure(figsize=(8, 6))
    plt.imshow(img)
    plt.title(f"Prediction: {class_name}\nConfidence: {confidence*100:.2f}%")
    plt.axis('off')
    plt.show()
    
    # Show top 3 predictions
    top_3_idx = np.argsort(predictions[0])[-3:][::-1]
    print("\nTop 3 Predictions:")
    for idx in top_3_idx:
        class_name = list(train_generator.class_indices.keys())[idx]
        conf = predictions[0][idx] * 100
        print(f"  {class_name}: {conf:.2f}%")
else:
    print(f"Test image not found: {test_image_path}")
    print("Update the path to test prediction!")

## 12. Next Steps

1. ‚úÖ **Copy model to backend:**
   ```bash
   cp Model2(Corn\ and\ Blackgram).h5 ../backend/models/
   ```

2. ‚úÖ **Update prediction code** in `model_manager.py`:
   - Set `RESCALE = 1./255.0` (or match your choice)
   - Set `IMG_SIZE = 224`

3. ‚úÖ **Update class labels** in `class_labels.json`:
   ```json
   "corn_blackgram": [
     "Blackgram_Healthy",
     "Blackgram_Anthracnose",
     ...
   ]
   ```

4. ‚úÖ **Test with real images** to verify predictions!

---

**Training Complete! üéâ**