# Imports and Constants

In [1]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from skimage.feature import local_binary_pattern, graycomatrix, graycoprops

DATA_DIR = "/Users/oneaboveall/Downloads/MangoLeafBD"
CATEGORIES = []
used_features = {
    'hsv': True,  # Enable HSV features
    'lbp': True,  # Enable LBP features
    'glcm': True  # Enable GLCM features
}

# Extract Features

In [2]:
def extract_features(img_path, img_size=(256, 256)):
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, img_size)
    features = []

    if used_features['hsv']:
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        hist_h = cv2.calcHist([hsv], [0], None, [32], [0, 180]).flatten()
        hist_s = cv2.calcHist([hsv], [1], None, [32], [0, 256]).flatten()
        hist_v = cv2.calcHist([hsv], [2], None, [32], [0, 256]).flatten()
        hist_h /= (hist_h.sum() + 1e-7)
        hist_s /= (hist_s.sum() + 1e-7)
        hist_v /= (hist_v.sum() + 1e-7)
        features.extend([hist_h, hist_s, hist_v])

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    if used_features['lbp']:
        lbp = local_binary_pattern(gray, P=8, R=1, method='uniform')
        lbp_hist, _ = np.histogram(lbp, bins=np.arange(0, 11), range=(0, 10))
        lbp_hist = lbp_hist / (lbp_hist.sum() + 1e-7)
        features.append(lbp_hist)

    if used_features['glcm']:
        glcm = graycomatrix(gray, distances=[1], angles=[0], symmetric=True, normed=True)
        contrast = graycoprops(glcm, prop='contrast').flatten()
        correlation = graycoprops(glcm, prop='correlation').flatten()
        energy = graycoprops(glcm, prop='energy').flatten()
        homogeneity = graycoprops(glcm, prop='homogeneity').flatten()
        features.extend([contrast, correlation, energy, homogeneity])

    return np.concatenate(features)

# Load Data

In [3]:
x, y = [], []
CATEGORIES = os.listdir(DATA_DIR)
CATEGORIES.remove('.DS_Store')
for idx, category in enumerate(CATEGORIES):
    folder = os.path.join(DATA_DIR, category)
    for file in os.listdir(folder):
        filepath = os.path.join(folder, file)
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):
            x.append(filepath)
            y.append(idx)
x = np.array(x)
y = np.array(y)

extracted_features = []
for img_path in x:
    features = extract_features(img_path)
    extracted_features.append(features)
extracted_features = np.array(extracted_features)

x_train, x_test, y_train, y_test = train_test_split(extracted_features, y, test_size=0.2, random_state=42, stratify=y)

# MLP Classifier

In [11]:
mlp_param_grid = {
    'hidden_layer_sizes': [(32, ), (64,), (128, ), (100,), (100, 50), (100, 100, 50)],
    'activation': ['relu', 'tanh', 'logistic'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'invscaling', 'adaptive'],
    'max_iter': [200, 500, 1000]
}
mlp = MLPClassifier(random_state=42)
mlp_grid_search = GridSearchCV(mlp, mlp_param_grid, cv=5, n_jobs=-1, verbose=2)
mlp_grid_search.fit(x_train, y_train)
print("MLP Best parameters found:", mlp_grid_search.best_params_)

Fitting 5 folds for each of 972 candidates, totalling 4860 fits


MLP Best parameters found: {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (32,), 'learning_rate': 'constant', 'max_iter': 1000, 'solver': 'adam'}


# Gaussian Naive Bayes

In [6]:
gnb_param_grid = {
    'var_smoothing': np.logspace(-9, -6, 4)
}
gnb = GaussianNB()
gnb_grid_search = GridSearchCV(gnb, {'var_smoothing': gnb_param_grid['var_smoothing']}, cv=5, n_jobs=-1, verbose=2)
gnb_grid_search.fit(x_train, y_train)
print("GNB Best parameters found:", gnb_grid_search.best_params_)

Fitting 5 folds for each of 4 candidates, totalling 20 fits
GNB Best parameters found: {'var_smoothing': 1e-09}


# SVM Classifier

In [7]:
svm_param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto']
}
svm = SVC(probability=True)
svm_grid_search = GridSearchCV(svm, svm_param_grid, cv=5, n_jobs=-1, verbose=2)
svm_grid_search.fit(x_train, y_train)
print("SVM Best parameters found:", svm_grid_search.best_params_)

Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=500, solver=adam; total time=  25.5s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=  23.8s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=invscaling, max_iter=200, solver=sgd; total time=  14.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=invscaling, max_iter=1000, solver=sgd; total time=  16.1s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=200, solver=adam; total time=  22.7s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=500, solver=sgd; total time=  53.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 100, 50), learning_rate=constan

# KNN Classifier

In [8]:
knn_param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski']
}
knn = KNeighborsClassifier()
knn_grid_search = GridSearchCV(knn, knn_param_grid, cv=5, n_jobs=-1, verbose=2)
knn_grid_search.fit(x_train, y_train)
print("KNN Best parameters found:", knn_grid_search.best_params_)

Fitting 5 folds for each of 24 candidates, totalling 120 fits
KNN Best parameters found: {'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}


# Evaluate models

In [12]:
models = {
    'MLP': mlp_grid_search.best_estimator_,
    'KNN': knn_grid_search.best_estimator_,
    'SVM': svm_grid_search.best_estimator_,
    'GNB': gnb_grid_search.best_estimator_
}

for model_name, model in models.items():
    y_pred = model.predict(x_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"{model_name} Accuracy: {acc}")
    print(f"{model_name} Classification Report:\n", classification_report(y_test, y_pred, target_names=CATEGORIES))
    print(f"{model_name} Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

MLP Accuracy: 0.98
MLP Classification Report:
                   precision    recall  f1-score   support

Bacterial Canker       1.00      0.99      0.99       100
     Anthracnose       0.99      0.97      0.98       100
         Healthy       0.98      0.98      0.98       100
  Powdery Mildew       0.97      0.99      0.98       100
  Cutting Weevil       1.00      1.00      1.00       100
        Die Back       0.98      1.00      0.99       100
     Sooty Mould       0.96      0.95      0.95       100
      Gall Midge       0.96      0.96      0.96       100

        accuracy                           0.98       800
       macro avg       0.98      0.98      0.98       800
    weighted avg       0.98      0.98      0.98       800

MLP Confusion Matrix:
 [[ 99   0   0   0   0   0   1   0]
 [  0  97   1   0   0   0   0   2]
 [  0   0  98   0   0   1   1   0]
 [  0   0   0  99   0   0   1   0]
 [  0   0   0   0 100   0   0   0]
 [  0   0   0   0   0 100   0   0]
 [  0   0   1   2   0