# PRE-TRAIN

## COVERT IMAGE TO CSV WITH HOG FEATURES

In [1]:
import os
import cv2
import pandas as pd
from skimage import feature

def convert_images_to_csv_with_hog(folder_path, csv_filename, label_mapping, target_size=(48, 48), max_images=2700):
    images = []
    labels = []
    hogs = []

    label = label_mapping[os.path.basename(folder_path).lower()]
    count = 0

    for filename in os.listdir(folder_path):
        if count >= max_images:
            break  

        path = os.path.join(folder_path, filename)
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            img = cv2.resize(img, target_size)
            images.append(img.flatten())
            labels.append(label)

            # Tính toán đặc trưng HOG
            hog_features = feature.hog(img, orientations=9, pixels_per_cell=(8, 8),
                                       cells_per_block=(2, 2), block_norm='L2-Hys',
                                       visualize=False, transform_sqrt=True)
            #print(hog_features.shape)
            hogs.append(hog_features)

            count += 1  

    # Tạo DataFrame cho hình ảnh và HOG
    data = {'label': labels}
    for j in range(len(hogs[0])):
        data[f'hog_{j + 1}'] = [row[j] for row in hogs]

    df = pd.DataFrame(data)
    df.to_csv(csv_filename, index=False)

if __name__ == "__main__":
    labels_mapping = {
        "angry": 0,
        "fear": 1,
        "happy": 2,
        "neutral": 3,
        "sad": 4
    }

    images_folder = "processed_image"
    output_folder = "csv_with_hog"

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for label_name in labels_mapping.keys():
        folder_path = os.path.join(images_folder, label_name.capitalize())
        csv_filename = os.path.join(output_folder, f"{label_name.lower()}.csv")
        convert_images_to_csv_with_hog(folder_path, csv_filename, labels_mapping)

## GATHER ALL CSV FILES INTO A FILE

In [2]:
import pandas as pd
import glob

folder_path = 'csv_with_hog'

all_files = glob.glob(folder_path + "/*.csv")

dfs = []

for file in all_files:
    df = pd.read_csv(file)
    dfs.append(df)

merged_df = pd.concat(dfs, ignore_index=True)

merged_df.to_csv('dataset_with_hog.csv', index=False)
