# Import necessary libraries

In [2]:
import os
import cv2
import numpy as np
import pandas as pd 
from sklearn.svm import SVC
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from skimage.feature import hog
import lightgbm as lgb

# Get paths

In [3]:
# Directories
train_dir = 'Data/train'
val_dir = 'Data/val'
test_dir = 'Data/test'

# Data processing

In [4]:
def compute_color_histogram(image, bins=(8, 8, 8)):
    # Convert the image to HSV color space
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # Compute the color histogram
    hist = cv2.calcHist([hsv_image], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    # Normalize the histogram
    hist = cv2.normalize(hist, hist).flatten()
    return hist

def load_images(folder_path, flower_types):
    features = []
    labels = []
    for flower in flower_types:
        flower_folder = os.path.join(folder_path, flower)
        for img in os.listdir(flower_folder):
            img_path = os.path.join(flower_folder, img)
            image = cv2.imread(img_path)
            if image is None:
                continue
            image = np.array(image).astype('uint8')
            image = cv2.resize(image, (64, 64))
            grey_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            # Extract HOG features
            hog_features = hog(grey_image, orientations=9, pixels_per_cell=(8, 8),
                               cells_per_block=(2, 2), transform_sqrt=True, block_norm="L2")

            # Extract color histogram features
            color_hist = compute_color_histogram(image)

            # Concatenate HOG and color histogram features
            combined_features = np.hstack((hog_features, color_hist))

            features.append(combined_features)
            labels.append(flower)

    return np.array(features), np.array(labels)

In [5]:
flower_types = [folder for folder in os.listdir(train_dir)]

X_train, y_train = load_images(train_dir, flower_types)
X_val, y_val = load_images(val_dir, flower_types)
X_test, y_test = load_images(test_dir, flower_types)

# Kiểm tra dữ liệu sau khi tải
print(f"Number of training samples: {len(X_train)}")
print(f"Number of validation samples: {len(X_val)}")
print(f"Number of testing samples: {len(X_test)}")

Number of training samples: 5981
Number of validation samples: 1709
Number of testing samples: 857


In [6]:
# Encode labels
encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)
y_val_encoded = encoder.transform(y_val)
y_test_encoded = encoder.transform(y_test)

print("Label to Encoded Mapping:")
for original_label, encoded_label in zip(encoder.classes_, range(len(encoder.classes_))):
    print(f"{original_label} -> {encoded_label}")

# Standardize data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

Label to Encoded Mapping:
daisy -> 0
lily -> 1
orchid -> 2
sunflower -> 3
tulip -> 4


In [7]:
# Apply PCA
pca = PCA(n_components=0.95)
print(X_train.shape)
X_train_pca = pca.fit_transform(X_train_scaled)
print(X_train_pca.shape)
X_val_pca = pca.transform(X_val_scaled)
X_test_pca = pca.transform(X_test_scaled)

(5981, 2276)
(5981, 782)


# Initiating necessary functions

In [8]:
def evaluate_model(model, X_train, X_val, y_train, y_val):
    # Huấn luyện mô hình
    model.fit(X_train, y_train)
    # Dự đoán trên tập validation
    y_pred = model.predict(X_val)

    # Tính các chỉ số đánh giá
    accuracy = accuracy_score(y_val, y_pred)

    return accuracy

: 

# SVM

In [9]:
kernels = ['linear', 'rbf', 'poly']
C_values  = [0.01, 0.1, 1, 5, 10, 15, 20] 
df_svm = pd.DataFrame(index=C_values, columns=kernels)
output_dir = r'finetunning_result\svm.csv' 

with open(output_dir, 'w') as f:
    for kernel in kernels: 
        for C in C_values : 
            model = SVC(kernel=kernel, C = C) 
            accuracy = evaluate_model(model=model, X_train= X_train_pca, X_val=X_val_pca, y_train=y_train_encoded, y_val= y_val_encoded)
            df_svm.at[C, kernel] = accuracy
            print(f'Accuracy with kernel = {kernel}, C = {C}: {accuracy}')
# Save the DataFrame to a CSV file
df_svm.to_csv(output_dir, index=True)

Accuracy with kernel = linear, C = 0.01: 0.7899356348741955
Accuracy with kernel = linear, C = 0.1: 0.7893504973668812
Accuracy with kernel = linear, C = 1: 0.7893504973668812
Accuracy with kernel = linear, C = 5: 0.7893504973668812
Accuracy with kernel = linear, C = 10: 0.7893504973668812
Accuracy with kernel = linear, C = 15: 0.7893504973668812
Accuracy with kernel = linear, C = 20: 0.7893504973668812
Accuracy with kernel = rbf, C = 0.01: 0.21650087770626097
Accuracy with kernel = rbf, C = 0.1: 0.7612638970157987
Accuracy with kernel = rbf, C = 1: 0.8624926857811586
Accuracy with kernel = rbf, C = 5: 0.8671737858396723
Accuracy with kernel = rbf, C = 10: 0.8671737858396723
Accuracy with kernel = rbf, C = 15: 0.8671737858396723
Accuracy with kernel = rbf, C = 20: 0.8671737858396723
Accuracy with kernel = poly, C = 0.01: 0.21650087770626097
Accuracy with kernel = poly, C = 0.1: 0.21825629022820361
Accuracy with kernel = poly, C = 1: 0.7027501462843768
Accuracy with kernel = poly, C = 5

: 

# LightGBM

In [9]:
learning_rates =  [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4] 
output_dir = r'finetunning_result\lightGBM.csv'
df_lightGBM = pd.DataFrame(columns=learning_rates)


for lr in learning_rates: 
    model = lgb.LGBMClassifier(objective= 'multiclass',
                              num_class = 5, 
                              learning_rate = lr, 
                              )
    accuracy = evaluate_model(model, X_train=X_train_pca, X_val=X_val_pca, y_train = y_train_encoded, y_val = y_val_encoded)
    print(f'Accuracy with learning_rate = {lr}: {accuracy}')
    df_lightGBM[0,lr] = accuracy

df_lightGBM.to_csv(output_dir, index=True)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.056763 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 199410
[LightGBM] [Info] Number of data points in the train set: 5981, number of used features: 782
[LightGBM] [Info] Start training from score -1.580761
[LightGBM] [Info] Start training from score -1.664602
[LightGBM] [Info] Start training from score -1.530077
[LightGBM] [Info] Start training from score -1.607100
[LightGBM] [Info] Start training from score -1.671694
Accuracy with learning_rate = 0.05: 0.7864248098303102
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.027827 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 199410
[LightGBM] [Info] Number of data points in the train set: 5981, number of used features: 782
[LightGBM] [Info] Start training from score -1.580761
[LightGBM] [Info] Start training from s

# Random Forest

In [None]:
# Define the parameters
num_estimators =  [300, 500, 800, 1000, 1200]  
output_dir = r'finetunning_result\random_forest.csv'
df_random_forest = pd.DataFrame(columns=num_estimators)

# Iterate over the number of estimators and evaluate the model
for n in num_estimators:
    model = RandomForestClassifier(n_estimators=n)
    accuracy = evaluate_model(model, X_train=X_train_pca, X_val=X_val_pca, y_train=y_train_encoded, y_val=y_val_encoded)
    print(f'Accuracy with n_estimators = {n}: {accuracy}')
    df_random_forest.at[0, n] = accuracy

# Save the DataFrame to a CSV file
df_random_forest.to_csv(output_dir, index=False)