<a href="https://colab.research.google.com/github/shuknolonka/Assignment-1-Python-Sem-1/blob/main/Crop_Disease_Detector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
pip install tensorflow keras matplotlib opencv-python scikit-learn



Content

In [11]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download the PlantVillage dataset (full dataset)
# You might need to find the specific Kaggle dataset ID for PlantVillage.
# A common one is 'vipoooool/new-plant-diseases-dataset' or similar.
# Let's assume you're using 'https://www.kaggle.com/datasets/vipoooool/new-plant-diseases-dataset'
!kaggle datasets download -d vipoooool/new-plant-diseases-dataset -p /content/

Dataset URL: https://www.kaggle.com/datasets/vipoooool/new-plant-diseases-dataset
License(s): copyright-authors
Downloading new-plant-diseases-dataset.zip to /content
100% 2.69G/2.70G [00:30<00:00, 197MB/s]
100% 2.70G/2.70G [00:30<00:00, 96.2MB/s]


In [12]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
# After setting up Kaggle API key (kaggle.json) as described before:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download the dataset using its Kaggle ID
# The ID is 'emmarex/plantdisease'
!kaggle datasets download -d emmarex/plantdisease -p /content/

Dataset URL: https://www.kaggle.com/datasets/emmarex/plantdisease
License(s): unknown
Downloading plantdisease.zip to /content
 97% 636M/658M [00:02<00:00, 224MB/s]
100% 658M/658M [00:02<00:00, 305MB/s]


In [14]:
!unzip -q /content/plantdisease.zip -d /content/plant_disease_data_emmarex

In [15]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
import os

# Define paths to your dataset
# IMPORTANT: Adjust these paths based on the actual structure after unzipping
# Based on typical 'emmarex/plantdisease' structure:
train_dir = '/content/plant_disease_data_emmarex/PlantVillage/train'
valid_dir = '/content/plant_disease_data_emmarex/PlantVillage/validation'

# Check if directories exist
if not os.path.exists(train_dir):
    print(f"Error: Training directory not found at {train_dir}")
    print("Please inspect the unzipped dataset structure using '!ls -R /content/plant_disease_data_emmarex' and adjust paths.")
if not os.path.exists(valid_dir):
    print(f"Error: Validation directory not found at {valid_dir}")
    print("Please inspect the unzipped dataset structure using '!ls -R /content/plant_disease_data_emmarex' and adjust paths.")


# --- Image Preprocessing and Augmentation ---
# Rescale all images by 1./255 to normalize pixel values
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(rescale=1./255) # Only rescale for validation

print("Setting up data generators...")
# Flow training images in batches of 32 using train_datagen generator
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150), # All images will be resized to 150x150
    batch_size=32,
    class_mode='categorical' # For multi-class classification
)

# Flow validation images in batches of 32 using validation_datagen generator
validation_generator = validation_datagen.flow_from_directory(
    valid_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

# Get the number of classes and their mapping
num_classes = len(train_generator.class_indices)
class_labels_mapping = train_generator.class_indices
# Invert the mapping to get class names from indices for later use
idx_to_class = {v: k for k, v in class_labels_mapping.items()}
sorted_class_names = [idx_to_class[i] for i in range(num_classes)]


print(f"Number of classes detected: {num_classes}")
print(f"Class labels mapping (name: index): {class_labels_mapping}")
print(f"Sorted Class Names (for app.py): {sorted_class_names}") # This is what you'll use in app.py

# --- Build the CNN Model ---
print("Building CNN model...")
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dropout(0.5), # Dropout for regularization
    Dense(512, activation='relu'),
    Dense(num_classes, activation='softmax') # Output layer with num_classes neurons
])

# --- Compile the Model ---
model.compile(loss='categorical_crossentropy',
                  optimizer=Adam(learning_rate=0.001),
                  metrics=['accuracy'])

# --- Model Summary ---
model.summary()

# --- Train the Model ---
print("Starting model training...")
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=20, # Start with 20 epochs, adjust as needed
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size
)
print("Model training complete.")

# --- Plot Training History ---
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(len(acc))

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, 'r', label='Training accuracy')
plt.plot(epochs_range, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, 'r', label='Training Loss')
plt.plot(epochs_range, val_loss, 'b', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

# --- Save the Trained Model ---
# Ensure the directory exists in Google Drive
model_save_dir = '/content/drive/My Drive/Crop_Disease_Detector' # Or any other path in your Drive
os.makedirs(model_save_dir, exist_ok=True)

model_save_path = os.path.join(model_save_dir, 'plant_disease_detector_model')
tf.saved_model.save(model, model_save_path)
print(f"Model saved to {model_save_path}")

Error: Training directory not found at /content/plant_disease_data_emmarex/PlantVillage/train
Please inspect the unzipped dataset structure using '!ls -R /content/plant_disease_data_emmarex' and adjust paths.
Error: Validation directory not found at /content/plant_disease_data_emmarex/PlantVillage/validation
Please inspect the unzipped dataset structure using '!ls -R /content/plant_disease_data_emmarex' and adjust paths.
Setting up data generators...


FileNotFoundError: [Errno 2] No such file or directory: '/content/plant_disease_data_emmarex/PlantVillage/train'