# Naive Bayes Image Classifier

## Import Libraries

In [1]:
import os
import numpy as np
from PIL import Image
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from skimage.color import rgb2gray
from skimage.filters import sobel
from skimage import exposure
from skimage.feature import graycomatrix, graycoprops
import matplotlib.pyplot as plt
from scipy.ndimage import shift

def pad_array(arr, n):
    #if len(arr) >= n:
        #return arr[:n]  # Trim the array if it's longer than n
    #else:
    pad_width = (0, n - len(arr))  # Calculate the padding width
    return np.pad(arr, pad_width, mode='constant', constant_values=0)

def extract_features(image, output_folder, file_name):
    # Convert the image to grayscale
    gray_image = rgb2gray(image)
    
    # Convert the image to unsigned integer type
    gray_image = (gray_image * 255).astype(np.uint8)
    
    # Save the grayscale image
    grayscale_path = os.path.join(output_folder, file_name+'grayscale.png')
    plt.imsave(grayscale_path, gray_image, cmap='gray')
    
    # Extract edge features using Sobel filter
    edge_features = sobel(gray_image)
    
    # Save the edge features image
    edge_path = os.path.join(output_folder, file_name+'edge_features.png')
    plt.imsave(edge_path, edge_features, cmap='gray')
    
    # Calculate histogram features
    hist, _ = exposure.histogram(gray_image, nbins=256)
    
    # Calculate texture features using GLCM (Grey-Level Co-occurrence Matrix)
    glcm = graycomatrix(gray_image, [5], [0], 256, symmetric=True, normed=True)
    contrast = graycoprops(glcm, 'contrast')[0, 0]
    energy = graycoprops(glcm, 'energy')[0, 0]
    correlation = graycoprops(glcm, 'correlation')[0, 0]

    # Save the histogram features plot
    hist_path = os.path.join(output_folder, file_name+'histogram.png')
    plt.plot(hist)
    plt.xlabel('Intensity')
    plt.ylabel('Frequency')
    plt.savefig(hist_path)
    plt.close()
    
    # Extract shift features
    shifted_image_1 = shift(gray_image, (10, 10))
    shifted_image_2 = shift(gray_image, (-10, -10))

    # Save the shifted images
#     shifted_image_1_path = os.path.join(output_folder, file_name+'shifted_image_1.png')
#     shifted_image_2_path = os.path.join(output_folder, file_name+'shifted_image_2.png')
#     plt.imsave(shifted_image_1_path, shifted_image_1, cmap='gray')
#     plt.imsave(shifted_image_2_path, shifted_image_2, cmap='gray')
    
    # Concatenate all features into a single feature vector
    features = np.concatenate([edge_features.flatten(), hist, [contrast, energy, correlation]])
    
    return features

## Data Preprocessing

In [2]:
# Set the path to the image folders
folder_path = '../deeplearning-model/data/known_images/'
output_folder = '../deeplearning-model/data/known_images_out/'

# Set the target image size
target_size = (256, 256)  # Adjust this as needed

# Load the image data and labels
X = []
y = []
class_labels = ['cassava', 'maize', 'banana', 'weed']
#class_labels=['x']
for label in class_labels:
    folder = os.path.join(folder_path, label)
    images = os.listdir(folder)
    for image_file in images:
        img_path = os.path.join(folder, image_file)
        img = Image.open(img_path)
        img = img.convert('RGB')  # Convert to RGB if needed
        img = img.resize(target_size)  # Resize the image
        
        # Extract the features from the image
        img_features = extract_features(np.array(img), output_folder, label+image_file)
        flattened_img_features = pad_array(img_features.flatten(), 70000)
        
        #print(len(flattened_img_features))
        
        #img_array = np.array(img)
        #X.append(img_array.flatten())  # Flatten the image to a 1D vector
        X.append(flattened_img_features)
        y.append(label)
        #break
    #break

# Convert data to NumPy arrays
X = np.array(X)
y = np.array(y)

In [5]:
print(X.shape)

(133, 70000)


In [203]:

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Naive Bayes classifier
model = GaussianNB()
model.fit(X_train.reshape(X_train.shape[0], -1), y_train)
#model.fit(X_train, y_train)

# Predict the labels for the test set
y_pred = model.predict(X_test.reshape(X_test.shape[0], -1))
#y_pred = model.predict(X_test)



## Evaluate Model

In [204]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_report = classification_report(y_test, y_pred, zero_division=1)

# Print the results
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report)

Accuracy: 0.7037037037037037
Classification Report:
              precision    recall  f1-score   support

      banana       1.00      0.00      0.00         2
     cassava       0.70      0.93      0.80        15
       maize       0.80      0.57      0.67         7
        weed       0.50      0.33      0.40         3

    accuracy                           0.70        27
   macro avg       0.75      0.46      0.47        27
weighted avg       0.73      0.70      0.66        27

