#  color_histogram with no resize 1/2 image



In [2]:
import cv2
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import os
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
image_paths = []
labels = []

folder_path = './images/'
csv_file_path = 'pollen_data.csv'

data = pd.read_csv(csv_file_path)

image_paths = [os.path.join(folder_path, filename) for filename in data['filename']]
labels = data['pollen_carrying'].values.astype(int)

def load_images():
    honeybee_images = []
    non_honeybee_images = []

    for path, label in zip(image_paths, labels):
        img = cv2.imread(path)

        # Crop the image to take only the bottom half
        height, width, _ = img.shape
        img_cropped = img[height // 2:, :]

        if label == 1:
            honeybee_images.append(img_cropped)
        else:
            non_honeybee_images.append(img_cropped)

    return honeybee_images, non_honeybee_images

def resize_images(images, width, height):
    resized_images = []
    
    for img in images:
        resized_img = cv2.resize(img, (width, height))
        resized_images.append(resized_img)
        
    return resized_images

def extract_color_histogram_local(images, num_grid_x, num_grid_y):
    histograms = []
    
    for img in images:
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        
        # Chia hình ảnh thành các ô lưới và tính histogram cho mỗi ô lưới
        grid_size_x = hsv.shape[1] // num_grid_x
        grid_size_y = hsv.shape[0] // num_grid_y
        hist_features = []
        
        for y in range(num_grid_y):
            for x in range(num_grid_x):
                grid = hsv[y*grid_size_y:(y+1)*grid_size_y, x*grid_size_x:(x+1)*grid_size_x, :]
                hist_h = cv2.calcHist([grid], [0], None, [16], [0, 180])  # Hue histogram
                hist_s = cv2.calcHist([grid], [1], None, [8], [0, 256])   # Saturation histogram
                hist_v = cv2.calcHist([grid], [2], None, [8], [0, 256])   # Value histogram
                
                # Chuẩn hóa histogram bằng cách chia cho số lượng pixel trong mỗi ô lưới
                hist_h /= grid_size_x * grid_size_y
                hist_s /= grid_size_x * grid_size_y
                hist_v /= grid_size_x * grid_size_y
                
                # Ghép các histogram thành một vector đặc trưng
                hist_features.extend(hist_h.flatten())
                hist_features.extend(hist_s.flatten())
                hist_features.extend(hist_v.flatten())
        
        histograms.append(hist_features)

    return histograms

def find_optimal_grid_size(images, labels):
    best_accuracy = 0
    best_num_grid_x = 14
    best_num_grid_y = 7

#     for num_grid_x in range(10, 15):  
#         for num_grid_y in range(5, 8): 
#             hist_features = extract_color_histogram_local(images, num_grid_x, num_grid_y)

#             X_train, X_test, y_train, y_test = train_test_split(hist_features, labels, test_size=0.2, random_state=42)

#             # Đào tạo và đánh giá mô hình SVM
#             svm_model = SVC(kernel='rbf', C=1.0, gamma='scale')
#             svm_model.fit(X_train, y_train)
#             accuracy, _, _, _, _ = evaluate_model(svm_model, X_test, y_test)

#             if accuracy > best_accuracy:
#                 best_accuracy = accuracy
#                 best_num_grid_x = num_grid_x
#                 best_num_grid_y = num_grid_y

    return best_num_grid_x, best_num_grid_y

def prepare_data(honeybee_images, non_honeybee_images):
    honeybee_labels = np.ones(len(honeybee_images))
    non_honeybee_labels = np.zeros(len(non_honeybee_images))

    images = honeybee_images + non_honeybee_images
    labels = np.concatenate([honeybee_labels, non_honeybee_labels])

    # Resize images
    resized_images = resize_images(images, 150, 90)

    # Find optimal grid size
    num_grid_x, num_grid_y = find_optimal_grid_size(resized_images, labels)
    print(f"Optimal num_grid_x: {num_grid_x}")
    print(f"Optimal num_grid_y: {num_grid_y}")

    # Extract color histogram local features using the optimal grid size
    hist_features = extract_color_histogram_local(resized_images, num_grid_x, num_grid_y)

    X_train, X_test, y_train, y_test = train_test_split(hist_features, labels, test_size=0.2, random_state=42)

    return X_train, X_test, y_train, y_test

def train_svm(X_train, y_train):
    svm = SVC(kernel='rbf', C=1.0, gamma='scale')
    svm.fit(X_train, y_train)
    return svm

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    confusion_mat = confusion_matrix(y_test, y_pred)
    FP = confusion_mat[0, 1]
    TP = confusion_mat[1, 1]
    FN = confusion_mat[1, 0]
    TN = confusion_mat[0, 0]
    return accuracy, TP, FP, TN, FN


honeybee_images, non_honeybee_images = load_images()

# Prepare data
X_train, X_test, y_train, y_test = prepare_data(honeybee_images, non_honeybee_images)

# Train and evaluate SVM
svm_model = train_svm(X_train, y_train)
svm_accuracy, svm_TP, svm_FP, svm_TN, svm_FN = evaluate_model(svm_model, X_test, y_test)

def train_knn(X_train, y_train, n_neighbors=5):
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(X_train, y_train)
    return knn

def train_nb(X_train, y_train):
    nb = GaussianNB()
    nb.fit(X_train, y_train)
    return nb

# Train and evaluate KNN
knn_model = train_knn(X_train, y_train)
knn_accuracy, knn_TP, knn_FP, knn_TN, knn_FN = evaluate_model(knn_model, X_test, y_test)

# Train and evaluate NB
nb_model = train_nb(X_train, y_train)
nb_accuracy, nb_TP, nb_FP, nb_TN, nb_FN = evaluate_model(nb_model, X_test, y_test)


# Print the results
print("Results:")
print("SVM:")
print(f"Accuracy: {svm_accuracy}")
print(f"True Positive: {svm_TP}")
print(f"False Positive: {svm_FP}")
print(f"True Negative: {svm_TN}")
print(f"False Negative: {svm_FN}")

print("\nKNN:")
print(f"Accuracy: {knn_accuracy}")
print(f"True Positive: {knn_TP}")
print(f"False Positive: {knn_FP}")
print(f"True Negative: {knn_TN}")
print(f"False Negative: {knn_FN}")

print("\nNaive Bayes:")
print(f"Accuracy: {nb_accuracy}")
print(f"True Positive: {nb_TP}")
print(f"False Positive: {nb_FP}")
print(f"True Negative: {nb_TN}")
print(f"False Negative: {nb_FN}")


Optimal num_grid_x: 14
Optimal num_grid_y: 7
Results:
SVM:
Accuracy: 0.8881118881118881
True Positive: 75
False Positive: 8
True Negative: 52
False Negative: 8

KNN:
Accuracy: 0.7902097902097902
True Positive: 61
False Positive: 8
True Negative: 52
False Negative: 22

Naive Bayes:
Accuracy: 0.6783216783216783
True Positive: 41
False Positive: 4
True Negative: 56
False Negative: 42


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
