In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler, label_binarize
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, auc, precision_recall_curve
from imblearn.over_sampling import SMOTE
from skimage.feature import local_binary_pattern, hog
from skimage.filters import gabor
from scipy.spatial.distance import cdist
import joblib

# Dataset path
base_path = r"C:\Users\91974\Downloads\Working projects\IPlantSpec\G7"

# Enhanced Feature Extraction Function
def extract_features_enhanced(image):
    image = cv2.resize(image, (128, 128))
    
    # Color histogram
    hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()

    # Grayscale conversion
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Local Binary Pattern (LBP)
    lbp = local_binary_pattern(gray, P=24, R=3, method="uniform")
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 27), range=(0, 26))
    lbp_hist = lbp_hist.astype("float") / (lbp_hist.sum() + 1e-6)

    # Gabor filters
    gabor_features = []
    for theta in range(4):
        theta = theta / 4. * np.pi
        filt_real, filt_imag = gabor(gray, frequency=0.6, theta=theta)
        gabor_features.append(filt_real.mean())
        gabor_features.append(filt_real.std())
        gabor_features.append(filt_imag.mean())
        gabor_features.append(filt_imag.std())

    # HOG features
    hog_features = hog(gray, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm='L2-Hys')

    # Combine features
    combined_features = np.hstack([hist, lbp_hist, gabor_features, hog_features])
    return combined_features

# Load dataset
data, labels = [], []
for species in os.listdir(base_path):
    species_path = os.path.join(base_path, species)
    if os.path.isdir(species_path):
        for img_file in os.listdir(species_path):
            img_path = os.path.join(species_path, img_file)
            img = cv2.imread(img_path)
            if img is not None:
                features = extract_features_enhanced(img)
                data.append(features)
                labels.append(species)

data = np.array(data)
labels = np.array(labels)

# Encode labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)
labels_binarized = label_binarize(labels, classes=np.unique(labels))

# Handle class imbalance
smote = SMOTE(random_state=42)
data, labels = smote.fit_resample(data, labels)

# Scale features
scaler = StandardScaler()
data = scaler.fit_transform(data)

# Define models
models = {
    'SVM': SVC(kernel='rbf', probability=True, class_weight='balanced', C=10, gamma=0.1),
    'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced'),
    'KNN': KNeighborsClassifier(n_neighbors=5),
    'LogisticRegression': LogisticRegression(max_iter=1000, class_weight='balanced'),
    'NaiveBayes': GaussianNB(),
    'DecisionTree': DecisionTreeClassifier(random_state=42, class_weight='balanced'),
    'GradientBoosting': GradientBoostingClassifier(random_state=42),
    'AdaBoost': AdaBoostClassifier(random_state=42),
}

# Cross-validation setup
cv_results = {}
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Evaluate models
for name, model in models.items():
    scores = cross_validate(model, data, labels, cv=skf, scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'], return_train_score=True)
    cv_results[name] = {
        'Train Accuracy': np.mean(scores['train_accuracy']),
        'Test Accuracy': np.mean(scores['test_accuracy']),
        'Precision': np.mean(scores['test_precision_macro']),
        'Recall': np.mean(scores['test_recall_macro']),
        'F1 Score': np.mean(scores['test_f1_macro'])
    }

# Train ensemble model
ensemble = VotingClassifier(estimators=[(name, model) for name, model in models.items()], voting='soft')
ensemble.fit(data, labels)

# Save results
output_dir = "model_results"
os.makedirs(output_dir, exist_ok=True)

# Save cross-validation results
with open(os.path.join(output_dir, "cv_results.txt"), "w") as f:
    for model_name, metrics in cv_results.items():
        f.write(f"Model: {model_name}\n")
        for metric_name, value in metrics.items():
            f.write(f"  {metric_name}: {value:.4f}\n")
        f.write("\n")

# Save ensemble model
joblib.dump(ensemble, os.path.join(output_dir, "ensemble_model.pkl"))

# Save label encoder and scaler
joblib.dump(label_encoder, os.path.join(output_dir, "label_encoder.pkl"))
joblib.dump(scaler, os.path.join(output_dir, "scaler.pkl"))

# Inter-class similarity matrix
class_means = []
for class_id in np.unique(labels):
    class_features = data[labels == class_id]
    class_means.append(np.mean(class_features, axis=0))

class_means = np.array(class_means)
similarity_matrix = 1 - cdist(class_means, class_means, metric='cosine')
np.savetxt(os.path.join(output_dir, "inter_class_similarity.csv"), similarity_matrix, delimiter=",", fmt="%.4f")

print("Cross-validation completed. Results saved to 'model_results' directory.")


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
