In [3]:
import os
import cv2
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


# Define the directory path to your dataset
base_path = r'D:\Vegetable_Images'


# Define subdirectories for train, validation, and test
train_dir = os.path.join(base_path, 'train')
validation_dir = os.path.join(base_path, 'validation')
test_dir = os.path.join(base_path, 'test')


# Initialize empty lists to store data
data = []
labels = []


#feature extraction
def preprocess_and_extract_features(image, image_size=(64, 64)):

    # Resize the image to a fixed size
    image = cv2.resize(image, image_size)
    
    # Extract color histogram features 
    hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist


# Function to load data from a specific directory
def load_data_from_directory(data_dir):
    data = []
    labels = []
    for vegetable_class in os.listdir(data_dir):
        class_path = os.path.join(data_dir, vegetable_class)
        for image_file in os.listdir(class_path):
            image_path = os.path.join(class_path, image_file)
            image = cv2.imread(image_path)
            features = preprocess_and_extract_features(image)  # Replace with your feature extraction code
            data.append(features)
            labels.append(vegetable_class)
    return data, labels


# Load data for training
train_data, train_labels = load_data_from_directory(train_dir)


# Load data for validation
validation_data, validation_labels = load_data_from_directory(validation_dir)


# Load data for testing
test_data, test_labels = load_data_from_directory(test_dir)


# Convert data and labels to NumPy arrays
train_data = np.array(train_data)
train_labels = np.array(train_labels)


validation_data = np.array(validation_data)
validation_labels = np.array(validation_labels)


test_data = np.array(test_data)
test_labels = np.array(test_labels)


# Initialize and train the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(train_data, train_labels)


# Make predictions on the validation and test data
validation_predictions = rf_classifier.predict(validation_data)
test_predictions = rf_classifier.predict(test_data)


# Evaluate the classifier on the validation data
validation_accuracy = accuracy_score(validation_labels, validation_predictions)
validation_confusion = confusion_matrix(validation_labels, validation_predictions)
validation_report = classification_report(validation_labels, validation_predictions)


# Evaluate the classifier on the test data
test_accuracy = accuracy_score(test_labels, test_predictions)
test_confusion = confusion_matrix(test_labels, test_predictions)
test_report = classification_report(test_labels, test_predictions)


# Print the results
print("Validation Accuracy:", validation_accuracy)
print("Validation Confusion Matrix:")
print(validation_confusion)
print("Validation Classification Report:")
print(validation_report)


print("Test Accuracy:", test_accuracy)
print("Test Confusion Matrix:")
print(test_confusion)
print("Test Classification Report:")
print(test_report)


Validation Accuracy: 0.9816666666666667
Validation Confusion Matrix:
[[194   0   3   0   0   1   0   0   0   0   1   0   0   0   1]
 [  0 200   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0 199   0   0   0   0   0   0   0   1   0   0   0   0]
 [  0   0   0 195   2   2   0   0   1   0   0   0   0   0   0]
 [  1   0   1   0 195   0   1   0   0   0   0   0   0   0   2]
 [  0   0   0   0   1 197   0   0   0   0   0   0   2   0   0]
 [  0   0   0   0   0   0 200   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0 200   0   0   0   0   0   0   0]
 [  0   0   0   0   0   1   0   0 197   0   0   0   2   0   0]
 [  0   0   0   0   0   0   0   0   0 199   0   0   0   0   1]
 [  1   1   1   1   0   0   0   0   0   0 196   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0 200   0   0   0]
 [  0   0   1   1   4   0   2   0   2   0   0   0 186   0   4]
 [  0   0   0   0   0   0   0   0   1   0   0   1   0 198   0]
 [  1   0   0   0   4   1   0   3   0   0   1   0