# Importing Dependencies

In [23]:
import os
import numpy as np
import cv2
import joblib
import pandas as pd
from skimage.feature import hog
from skimage.color import rgb2gray
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

print(cv2.__version__)
# Skimage version
import skimage
print("Scikit-Image Version:", skimage.__version__)

# Scikit-Learn version
import sklearn
print("Scikit-Learn Version:", sklearn.__version__)

4.9.0
Scikit-Image Version: 0.22.0
Scikit-Learn Version: 1.4.1.post1


# Importing Dataset

In [2]:
# Set dataset path
dataset_path = "PlantVillage"

# Define labels (assumes folder names are class names)
class_labels = os.listdir(dataset_path)
class_mapping = {label: idx for idx, label in enumerate(class_labels)}

# Feature Extraction

In [3]:
# Load dataset
X, y = [], []
for label in class_labels:
    label_path = os.path.join(dataset_path, label)
    for image_name in os.listdir(label_path):
        image_path = os.path.join(label_path, image_name)
        image = cv2.imread(image_path)
        image = cv2.resize(image, (128, 128))  # Resize for uniformity
        gray_image = rgb2gray(image)
        
        # Extract HOG features
        features = hog(gray_image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True)
        X.append(features)
        y.append(class_mapping[label])

In [4]:
print("Training feature shape:", np.array(X[0]).shape)

Training feature shape: (8100,)


# Data Preprocessing

In [5]:
# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

In [6]:
X

array([[0.21914252, 0.09194738, 0.09200222, ..., 0.05635325, 0.17036981,
        0.0647693 ],
       [0.08789893, 0.11020072, 0.25716682, ..., 0.06868001, 0.02156584,
        0.09055157],
       [0.04898647, 0.009554  , 0.04676138, ..., 0.12077287, 0.09226068,
        0.04502144],
       ...,
       [0.22794594, 0.0759268 , 0.22081584, ..., 0.1948833 , 0.16589092,
        0.15211519],
       [0.07804403, 0.08874369, 0.06068256, ..., 0.1288943 , 0.10333153,
        0.02777336],
       [0.23915725, 0.16436975, 0.0828637 , ..., 0.11682877, 0.06591926,
        0.16462864]])

In [7]:
unique_classes = pd.Series(y).unique()
print("Unique Classes:", unique_classes)

Unique Classes: [0 1 2]


# Splitting the data

In [8]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training models

In [16]:
# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

In [17]:
# Train SVM model
svm_model = SVC(kernel='linear', probability=True)
svm_model.fit(X_train, y_train)

In [11]:
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_model.fit(X_train, y_train)

# Model Evaluation

In [18]:
# List of trained models and their names
models = [
    ("GradientBoostingClassifier", gb_model),
    ("SVM", svm_model),
    ("Random Forest", rf_model)
]

# Loop through each model and print evaluation metrics
for name, model in models:
    y_pred = model.predict(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=1)
    rec = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    print(f"Model: {name}")
    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall: {rec:.4f}")
    print(f"F1-score: {f1:.4f}")
    print("="*40)

Model: GradientBoostingClassifier
Accuracy: 0.7657
Precision: 0.7212
Recall: 0.7657
F1-score: 0.7427
Model: SVM
Accuracy: 0.7912
Precision: 0.7848
Recall: 0.7912
F1-score: 0.7854
Model: Random Forest
Accuracy: 0.7262
Precision: 0.7445
Recall: 0.7262
F1-score: 0.7033


# Predictive System

In [13]:
def predict_image(image_path, model):
    # Read and preprocess the image
    image = cv2.imread(image_path)
    image = cv2.resize(image, (128, 128))  # Ensure the same size as in training
    gray_image = rgb2gray(image)

    # Extract HOG features with the SAME settings as training
    features = hog(
        gray_image, 
        pixels_per_cell=(8, 8),  # Ensure it matches training
        cells_per_block=(2, 2),  # Ensure it matches training
        feature_vector=True
    )
    features = np.array(features).reshape(1, -1)

    # Debug: Check feature shape before prediction
    print(f"Extracted feature shape: {features.shape}, Model expects: {model.n_features_in_}")

    # Ensure that the extracted feature size matches model input size
    if features.shape[1] != model.n_features_in_:
        raise ValueError(f"Feature size mismatch! Extracted: {features.shape[1]}, Model expects: {model.n_features_in_}")

    # Make prediction using the passed model
    prediction = model.predict(features)[0]

    # Convert prediction to label
    predicted_label = list(class_mapping.keys())[list(class_mapping.values()).index(prediction)]
    
    return predicted_label

In [20]:
test_image_path = "PlantVillage/Potato___healthy/046641c1-f837-49eb-b5f2-4109910a027c___RS_HL 1878.JPG"  # Change this to the actual image path
predicted_disease = predict_image(test_image_path, gb_model)  # Pass gb_model directly
print(f"Predicted Disease: {predicted_disease}")

Extracted feature shape: (1, 8100), Model expects: 8100
Predicted Disease: Potato___healthy


# Saving the best model

In [21]:
# Save the best model
joblib.dump(svm_model, "potato_leaf_modelSVM.pkl")

['potato_leaf_modelSVM.pkl']