In [None]:
import cv2
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [None]:
def extract_color_histogram(image, bins=(32, 32, 32)):
    # Convert the image from BGR to HSV color space
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # Compute a 3D color histogram in the HSV color space using the specified number of bins per channel
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    # Normalize the histogram to make the feature vector scale-invariant
    cv2.normalize(hist, hist)
    # Return the flattened histogram as the feature vector
    return hist.flatten()

In [None]:
# Assuming you're using Google Colab to mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Define paths to training and testing data directories
train_data_path = '/content/drive/MyDrive/dataset/FruitData/Test'
test_data_path = '/content/drive/MyDrive/dataset/FruitData/Training'
# List all directories (classes) in the training data path
data_dir_list = os.listdir(train_data_path)
print(data_dir_list)

['apple_test.jpg', 'banana_test.jpg']


In [None]:
# Initialize lists to store feature vectors and corresponding class labels
features = []
classLabels = []

In [None]:
# Loop over each class directory
for dataset in data_dir_list:
    # List all images in the current class directory
    img_list = os.listdir(os.path.join(train_data_path, dataset))
    print(f'Loaded the images of dataset-{dataset}\n')
    # Loop over each image in the current class directory
    for img in img_list:
        # Construct the full path to the image
        image_path = os.path.join(train_data_path, dataset, img)
        # Read the image
        image = cv2.imread(image_path)
        if image is not None:
            # Get the class label for the image
            label = dataset
            # Extract the color histogram from the image
            hist = extract_color_histogram(image)
            # Append the histogram to the features list
            features.append(hist)
            # Append the class label to the classLabels list
            classLabels.append(label)
        else:
            print(f"Could not read image {image_path}")

NotADirectoryError: [Errno 20] Not a directory: '/content/drive/MyDrive/dataset/FruitData/Test/apple_test.jpg'

In [None]:
# Convert features and labels to numpy arrays
features = np.array(features)
classLabels = np.array(classLabels)

In [None]:
# Split the data into training and testing sets (80% training, 20% testing)
trainFeat, testFeat, trainLabels, testLabels = train_test_split(features, classLabels, test_size=0.20)


In [None]:
 K-Nearest Neighbors (k-NN) Classifier
print("\n[INFO] evaluating k-NN...")
k = 9
# Initialize the k-NN classifier with k neighbors
knn_model = KNeighborsClassifier(n_neighbors=k)
# Train the k-NN classifier on the training data
knn_model.fit(trainFeat, trainLabels)
# Evaluate the classifier on the testing data
knn_acc = knn_model.score(testFeat, testLabels)
print(f"[INFO] k-NN classifier: k = {k}")
print(f"[INFO] accuracy: {knn_acc * 100:.2f}%")

In [None]:
# Evaluation Report for k-NN
knn_predLabels = knn_model.predict(testFeat)
print(confusion_matrix(testLabels, knn_predLabels))
print(classification_report(testLabels, knn_predLabels))


In [None]:
 Support Vector Classifier (SVC)
print("\n[INFO] evaluating SVC...")
# Initialize the SVC with balanced class weights and a maximum of 1000 iterations
svc_model = SVC(max_iter=1000, class_weight='balanced')
# Train the SVC on the training data
svc_model.fit(trainFeat, trainLabels)
# Evaluate the classifier on the testing data
svc_acc = svc_model.score(testFeat, testLabels)
print("[INFO] SVC classifier")
print(f"[INFO] accuracy: {svc_acc * 100:.2f}%")

In [None]:
# Evaluation Report for SVC
svc_predLabels = svc_model.predict(testFeat)
print(confusion_matrix(testLabels, svc_predLabels))
print(classification_report(testLabels, svc_predLabels))


In [None]:
# Artificial Neural Network (ANN)
print("\n[INFO] evaluating ANN...")
# Initialize the MLPClassifier (ANN) with specified hidden layer sizes, max iterations, solver, and learning rate
nn_model = MLPClassifier(hidden_layer_sizes=(10, 10), max_iter=1000, solver='sgd', learning_rate_init=.1)
# Train the ANN on the training data
nn_model.fit(trainFeat, trainLabels)
# Evaluate the classifier on the testing data
nn_acc = nn_model.score(testFeat, testLabels)
print(f"[INFO] Neural Network accuracy: {nn_acc * 100:.2f}%")

In [None]:
# Evaluation Report for ANN
nn_predLabels = nn_model.predict(testFeat)
print(confusion_matrix(testLabels, nn_predLabels))
print(classification_report(testLabels, nn_predLabels))

In [None]:
 Test Predictions for each model
test_img_list = os.listdir(test_data_path)
print("\n[INFO] Test Predictions:")
for img in test_img_list:
    # Construct the full path to the test image
    image_path = os.path.join(test_data_path, img)
    # Read the test image
    image = cv2.imread(image_path)
    if image is not None:
        # Extract the color histogram from the test image
        hist = extract_color_histogram(image)

        # Predict the class label using each trained model
        knn_prediction = knn_model.predict([hist])
        svc_prediction = svc_model.predict([hist])
        nn_prediction = nn_model.predict([hist])
         # Print the predictions from each model
        print(f"Image: {img}")
        print(f"  k-NN Predicted Class Label: {knn_prediction[0]}")
        print(f"  SVC Predicted Class Label: {svc_prediction[0]}")
        print(f"  ANN Predicted Class Label: {nn_prediction[0]}")
    else:
        print(f"Could not read image {image_path}")