## Loading Images dataset and applying preprcessing

In [12]:
import os
from tqdm import tqdm
import cv2
import numpy as np
import pandas as pd

LABELS = {
    "Apple": 0,
    "Cherry": 1,
    "Tomatoe": 2
}
IMG_SIZE = (64, 64)
def process_dataset(root_path):
    data = []
    labels = []

    # Loops over main classes (Apple, Cherry, Tomato)
    for fruit in os.listdir(root_path):
        fruit_path = os.path.join(root_path, fruit)

        if not os.path.isdir(fruit_path):
            continue
        
        # convert fruit name to label
        label = LABELS.get(fruit, None)
        if label is None:
            print("Skipping unknown folder:", fruit)
            continue

        # loops over varieties in each class
        for variety in os.listdir(fruit_path):
            variety_path = os.path.join(fruit_path, variety)

            if not os.path.isdir(variety_path):
                continue

            print(f"Processing {fruit}/{variety}")

            # loops over images in each variety
            for img_name in tqdm(os.listdir(variety_path)):
                img_path = os.path.join(variety_path, img_name)

                img = cv2.imread(img_path)
                if img is None:
                    continue

                #Convert the image to RGB
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                #Rezsizing image
                img = cv2.resize(img, IMG_SIZE)

                #Contrast enhancement using CLAHE
                lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
                l, a, b = cv2.split(lab)

                clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
                l_clahe = clahe.apply(l)

                lab_clahe = cv2.merge((l_clahe, a, b))
                img = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2RGB)

                #Gaussian blur
                img = cv2.GaussianBlur(img, (5, 5), 0)

                #Normalize
                img = img / 255.0

                data.append(img)
                labels.append(label)

    return np.array(data), np.array(labels)


## Using HOG for better feature extraction

In [4]:
from skimage.feature import hog

def extract_hog_features(images):
    hog_features = []
    for img in tqdm(images, desc="Extracting HOG"):
        gray = cv2.cvtColor((img * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)

        hog_vec = hog(
            gray,
            orientations=9,
            pixels_per_cell=(8, 8),
            cells_per_block=(2, 2),
            block_norm='L2-Hys'
        )
        hog_features.append(hog_vec)

    return np.array(hog_features)

In [None]:
train_path = "D:\\Level 3 - 1st term\\ML\\Project\\Image_Preprocessing\\dataset\\fruits-360-3-body-problem\\train"
test_path  = "D:\\Level 3 - 1st term\\ML\\Project\\Image_Preprocessing\\dataset\\fruits-360-3-body-problem\\test"

print("\nProcessing TRAIN dataset...")
X_train, y_train = process_dataset(train_path)

print()
print("#"*70)

print("\nProcessing TEST dataset...")
X_test, y_test = process_dataset(test_path)


Processing TRAIN dataset...
Processing Apple/Apple 11


100%|██████████| 572/572 [00:07<00:00, 72.15it/s]


Processing Apple/Apple 12


100%|██████████| 620/620 [00:08<00:00, 73.72it/s]


Processing Apple/Apple 13


100%|██████████| 934/934 [00:11<00:00, 81.19it/s]


Processing Apple/Apple 14


100%|██████████| 620/620 [00:07<00:00, 79.53it/s]


Processing Apple/Apple 17


100%|██████████| 811/811 [00:09<00:00, 82.73it/s]


Processing Apple/Apple 18


100%|██████████| 644/644 [00:08<00:00, 80.35it/s]


Processing Apple/Apple 5


100%|██████████| 586/586 [00:08<00:00, 71.41it/s]


Processing Apple/Apple 7


100%|██████████| 923/923 [00:11<00:00, 80.34it/s]


Processing Apple/Apple 8


100%|██████████| 915/915 [00:11<00:00, 81.63it/s]


Processing Apple/Apple 9


100%|██████████| 925/925 [00:11<00:00, 82.43it/s]


Processing Apple/Apple Braeburn 1


100%|██████████| 656/656 [00:07<00:00, 82.08it/s]


Processing Apple/Apple Crimson Snow 1


100%|██████████| 592/592 [00:07<00:00, 81.30it/s]


Processing Apple/Apple Golden 2


100%|██████████| 656/656 [00:08<00:00, 81.71it/s]


Processing Apple/Apple Golden 3


100%|██████████| 642/642 [00:07<00:00, 82.53it/s]


Processing Apple/Apple hit 1


100%|██████████| 936/936 [00:11<00:00, 80.68it/s]


Processing Apple/Apple Pink Lady 1


100%|██████████| 608/608 [00:07<00:00, 81.28it/s]


Processing Apple/Apple Red 2


100%|██████████| 656/656 [00:08<00:00, 81.92it/s]


Processing Apple/Apple Red 3


100%|██████████| 573/573 [00:06<00:00, 81.91it/s]


Processing Apple/Apple Red Delicious 1


100%|██████████| 656/656 [00:08<00:00, 81.35it/s]


Processing Apple/Apple Red Yellow 2


100%|██████████| 891/891 [00:11<00:00, 80.04it/s]


Processing Apple/Apple Rotten 1


100%|██████████| 650/650 [00:08<00:00, 80.97it/s]


Processing Apple/Apple worm 1


100%|██████████| 927/927 [00:11<00:00, 79.79it/s]


Processing Cherry/Cherry 1


100%|██████████| 656/656 [00:08<00:00, 80.27it/s]


Processing Cherry/Cherry 2


100%|██████████| 984/984 [00:12<00:00, 81.00it/s]


Processing Cherry/Cherry 4


100%|██████████| 930/930 [00:11<00:00, 80.12it/s]


Processing Cherry/Cherry 5


100%|██████████| 921/921 [00:11<00:00, 80.36it/s]


Processing Cherry/Cherry Rainier 2


100%|██████████| 927/927 [00:11<00:00, 80.35it/s]


Processing Cherry/Cherry Rainier 3


100%|██████████| 914/914 [00:11<00:00, 79.52it/s]


Processing Cherry/Cherry Sour 1


100%|██████████| 606/606 [00:07<00:00, 80.25it/s]


Processing Cherry/Cherry Wax Black 1


100%|██████████| 656/656 [00:08<00:00, 79.42it/s]


Processing Cherry/Cherry Wax not ripen 2


100%|██████████| 304/304 [00:03<00:00, 78.17it/s]


Processing Cherry/Cherry Wax Red 1


100%|██████████| 656/656 [00:08<00:00, 79.47it/s]


Processing Cherry/Cherry Wax Red 2


100%|██████████| 602/602 [00:07<00:00, 78.62it/s]


Processing Cherry/Cherry Wax Yellow 1


100%|██████████| 656/656 [00:08<00:00, 75.88it/s]


Processing Tomatoe/Tomato 10


100%|██████████| 919/919 [00:12<00:00, 72.85it/s]


Processing Tomatoe/Tomato 2


100%|██████████| 897/897 [00:11<00:00, 80.60it/s]


Processing Tomatoe/Tomato 3


100%|██████████| 984/984 [00:11<00:00, 83.74it/s]


Processing Tomatoe/Tomato 4


100%|██████████| 639/639 [00:07<00:00, 80.19it/s]


Processing Tomatoe/Tomato 5


100%|██████████| 891/891 [00:11<00:00, 80.12it/s]


Processing Tomatoe/Tomato 8


100%|██████████| 961/961 [00:11<00:00, 86.25it/s]


Processing Tomatoe/Tomato 9


100%|██████████| 959/959 [00:11<00:00, 85.82it/s]


Processing Tomatoe/Tomato Cherry Maroon 1


100%|██████████| 402/402 [00:04<00:00, 85.17it/s]


Processing Tomatoe/Tomato Cherry Orange 1


100%|██████████| 402/402 [00:04<00:00, 84.48it/s]


Processing Tomatoe/Tomato Cherry Red 2


100%|██████████| 402/402 [00:04<00:00, 84.28it/s]


Processing Tomatoe/Tomato Cherry Yellow 1


100%|██████████| 403/403 [00:04<00:00, 84.57it/s]


Processing Tomatoe/Tomato Heart 1


100%|██████████| 912/912 [00:10<00:00, 83.78it/s]


Processing Tomatoe/Tomato Maroon 2


100%|██████████| 592/592 [00:07<00:00, 84.18it/s]


Processing Tomatoe/Tomato not Ripen 1


100%|██████████| 632/632 [00:07<00:00, 85.42it/s]


##################################################

Processing TEST dataset...
Processing Apple/Apple 10


100%|██████████| 930/930 [00:13<00:00, 71.40it/s]


Processing Apple/Apple 19


100%|██████████| 970/970 [00:12<00:00, 77.26it/s]


Processing Apple/Apple 6


100%|██████████| 630/630 [00:07<00:00, 81.82it/s]


Processing Apple/Apple Golden 1


100%|██████████| 640/640 [00:07<00:00, 80.77it/s]


Processing Apple/Apple Granny Smith 1


100%|██████████| 656/656 [00:08<00:00, 79.96it/s]


Processing Apple/Apple Red 1


100%|██████████| 656/656 [00:08<00:00, 81.11it/s]


Processing Apple/Apple Red Yellow 1


100%|██████████| 656/656 [00:08<00:00, 80.50it/s]


Processing Cherry/Cherry 3


100%|██████████| 930/930 [00:11<00:00, 80.34it/s]


Processing Cherry/Cherry Rainier 1


100%|██████████| 984/984 [00:11<00:00, 84.41it/s]


Processing Cherry/Cherry Wax not ripen 1


100%|██████████| 900/900 [00:10<00:00, 82.11it/s]


Processing Cherry/Cherry Wax Red 3


100%|██████████| 604/604 [00:07<00:00, 79.02it/s]


Processing Tomatoe/Tomato 1


100%|██████████| 984/984 [00:11<00:00, 82.42it/s]


Processing Tomatoe/Tomato 7


100%|██████████| 931/931 [00:11<00:00, 82.42it/s]


Processing Tomatoe/Tomato Cherry Red 1


100%|██████████| 656/656 [00:07<00:00, 82.56it/s]


Processing Tomatoe/Tomato Maroon 1


100%|██████████| 494/494 [00:06<00:00, 80.99it/s]


Processing Tomatoe/Tomato Yellow 1


100%|██████████| 612/612 [00:07<00:00, 81.86it/s]


In [23]:
print("Shape of X_train before HOG:", X_train.shape)
print("Shape of X_test before HOG:", X_test.shape)

Shape of X_train before HOG: (34800, 64, 64, 3)
Shape of X_test before HOG: (12233, 64, 64, 3)


In [24]:
X_train_hog = extract_hog_features(X_train)
X_test_hog = extract_hog_features(X_test)

print("Shape of X_train HOG features:", X_train_hog.shape)
print("Shape of X_test HOG features:", X_test_hog.shape)

Extracting HOG: 100%|██████████| 34800/34800 [00:52<00:00, 656.61it/s]
Extracting HOG: 100%|██████████| 12233/12233 [00:17<00:00, 681.59it/s]


Shape of X_train HOG features: (34800, 1764)
Shape of X_test HOG features: (12233, 1764)


## saving the data in a csv file with the lapel column

In [13]:
train_df = pd.DataFrame(X_train_hog)
train_df["label"] = y_train

test_df = pd.DataFrame(X_test_hog)
test_df["label"] = y_test

train_df.to_csv("train_processed.csv", index=False)
test_df.to_csv("test_processed.csv", index=False)

print("\nSaved: train_processed.csv & test_processed.csv")


Saved: train_processed.csv & test_processed.csv
