## Loading Images dataset and applying preprcessing

In [4]:
import os
from tqdm import tqdm
import cv2
import numpy as np
import pandas as pd

LABELS = {
    "Apple": 0,
    "Cherry": 1,
    "Tomatoe": 2
}

IMG_SIZE = (64, 64)

def process_dataset(root_path):
    data = []
    labels = []

    # Loops over main classes (Apple, Cherry, Tomato)
    for fruit in os.listdir(root_path):
        fruit_path = os.path.join(root_path, fruit)

        if not os.path.isdir(fruit_path):
            continue
        
        # convert fruit name to label
        label = LABELS.get(fruit, None)
        if label is None:
            print("Skipping unknown folder:", fruit)
            continue

        # loops over varieties in each class
        for variety in os.listdir(fruit_path):
            variety_path = os.path.join(fruit_path, variety)

            if not os.path.isdir(variety_path):
                continue

            print(f"Processing {fruit}/{variety}")

            # loops over images in each variety
            for img_name in tqdm(os.listdir(variety_path)):
                img_path = os.path.join(variety_path, img_name)

                img = cv2.imread(img_path)
                if img is None:
                    continue

                # Read the image in color
                img = cv2.imread(img_path)

                # Convert to grayscale
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

                # Rezsizing image
                img = cv2.resize(img, IMG_SIZE)

                # Contrast enhancement using CLAHE
                clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
                img = clahe.apply(img)

                # Median filter
                img = cv2.medianBlur(img, 5)

                # Normalization Min–Max scaling
                Img_min = img.min()
                Img_max = img.max()
                img = (img - Img_min) / (Img_max - Img_min)


                data.append(img)
                labels.append(label)

    return np.array(data), np.array(labels)


## Using HOG for better feature extraction

In [6]:
from skimage.feature import hog

def extract_hog_features(images):
    hog_features = []
    for img in tqdm(images, desc="Extracting HOG"):
        gray = (img * 255).astype(np.uint8)

        hog_vec = hog(
            gray,
            orientations=9,
            pixels_per_cell=(8, 8),
            cells_per_block=(2, 2),
            block_norm='L2-Hys'
        )
        hog_features.append(hog_vec)

    return np.array(hog_features)

In [7]:
train_path = "/home/mohamed/AIProject/archive (1)/fruits-360_3-body-problem/fruits-360-3-body-problem/Training"
test_path  = "/home/mohamed/AIProject/archive (1)/fruits-360_3-body-problem/fruits-360-3-body-problem/Test"

print("\nProcessing TRAIN dataset...")
X_train, y_train = process_dataset(train_path)

print()
print("#"*70)

print("\nProcessing TEST dataset...")
X_test, y_test = process_dataset(test_path)


Processing TRAIN dataset...
Processing Tomatoe/Tomato 9


100%|██████████| 959/959 [00:00<00:00, 1952.82it/s]


Processing Tomatoe/Tomato not Ripen 1


100%|██████████| 632/632 [00:00<00:00, 2284.57it/s]


Processing Tomatoe/Tomato 5


100%|██████████| 891/891 [00:00<00:00, 1815.92it/s]


Processing Tomatoe/Tomato Heart 1


100%|██████████| 912/912 [00:00<00:00, 2031.70it/s]


Processing Tomatoe/Tomato 2


100%|██████████| 897/897 [00:00<00:00, 1651.53it/s]


Processing Tomatoe/Tomato Cherry Yellow 1


100%|██████████| 403/403 [00:00<00:00, 1391.74it/s]


Processing Tomatoe/Tomato 8


100%|██████████| 961/961 [00:00<00:00, 1558.36it/s]


Processing Tomatoe/Tomato 3


100%|██████████| 984/984 [00:00<00:00, 2008.40it/s]


Processing Tomatoe/Tomato 4


100%|██████████| 639/639 [00:00<00:00, 1945.12it/s]


Processing Tomatoe/Tomato Cherry Orange 1


100%|██████████| 402/402 [00:00<00:00, 1250.27it/s]


Processing Tomatoe/Tomato Cherry Maroon 1


100%|██████████| 402/402 [00:00<00:00, 1724.81it/s]


Processing Tomatoe/Tomato Cherry Red 2


100%|██████████| 402/402 [00:00<00:00, 1683.87it/s]


Processing Tomatoe/Tomato 10


100%|██████████| 919/919 [00:00<00:00, 1706.43it/s]


Processing Tomatoe/Tomato Maroon 2


100%|██████████| 592/592 [00:00<00:00, 1732.85it/s]


Processing Cherry/Cherry Wax not ripen 2


100%|██████████| 304/304 [00:00<00:00, 1874.72it/s]


Processing Cherry/Cherry Wax Red 2


100%|██████████| 602/602 [00:00<00:00, 1934.55it/s]


Processing Cherry/Cherry Sour 1


100%|██████████| 606/606 [00:00<00:00, 1833.04it/s]


Processing Cherry/Cherry Wax Black 1


100%|██████████| 656/656 [00:00<00:00, 1974.87it/s]


Processing Cherry/Cherry Wax Red 1


100%|██████████| 656/656 [00:00<00:00, 2031.67it/s]


Processing Cherry/Cherry 5


100%|██████████| 921/921 [00:00<00:00, 1767.44it/s]


Processing Cherry/Cherry 2


100%|██████████| 984/984 [00:00<00:00, 1637.31it/s]


Processing Cherry/Cherry 4


100%|██████████| 930/930 [00:00<00:00, 1413.91it/s]


Processing Cherry/Cherry Rainier 2


100%|██████████| 927/927 [00:00<00:00, 1508.79it/s]


Processing Cherry/Cherry 1


100%|██████████| 656/656 [00:00<00:00, 2094.47it/s]


Processing Cherry/Cherry Rainier 3


100%|██████████| 914/914 [00:00<00:00, 1571.52it/s]


Processing Cherry/Cherry Wax Yellow 1


100%|██████████| 656/656 [00:00<00:00, 2389.86it/s]


Processing Apple/Apple Golden 3


100%|██████████| 642/642 [00:00<00:00, 1715.28it/s]


Processing Apple/Apple 5


100%|██████████| 586/586 [00:00<00:00, 1515.39it/s]


Processing Apple/Apple Red Yellow 2


100%|██████████| 891/891 [00:00<00:00, 1392.12it/s]


Processing Apple/Apple Golden 2


100%|██████████| 656/656 [00:00<00:00, 1270.93it/s]


Processing Apple/Apple Pink Lady 1


100%|██████████| 608/608 [00:00<00:00, 1153.11it/s]


Processing Apple/Apple Red Delicious 1


100%|██████████| 656/656 [00:00<00:00, 1074.57it/s]


Processing Apple/Apple Braeburn 1


100%|██████████| 656/656 [00:00<00:00, 1163.85it/s]


Processing Apple/Apple Red 2


100%|██████████| 656/656 [00:00<00:00, 1186.47it/s]


Processing Apple/Apple 9


100%|██████████| 925/925 [00:00<00:00, 997.87it/s] 


Processing Apple/Apple hit 1


100%|██████████| 936/936 [00:00<00:00, 1332.86it/s]


Processing Apple/Apple 18


100%|██████████| 644/644 [00:00<00:00, 914.29it/s] 


Processing Apple/Apple 8


100%|██████████| 915/915 [00:01<00:00, 704.57it/s]


Processing Apple/Apple 7


100%|██████████| 923/923 [00:01<00:00, 851.06it/s]


Processing Apple/Apple Crimson Snow 1


100%|██████████| 592/592 [00:00<00:00, 780.50it/s] 


Processing Apple/Apple 17


100%|██████████| 811/811 [00:01<00:00, 662.52it/s]


Processing Apple/Apple 11


100%|██████████| 572/572 [00:00<00:00, 634.77it/s]


Processing Apple/Apple 14


100%|██████████| 620/620 [00:01<00:00, 517.76it/s]


Processing Apple/Apple 13


100%|██████████| 934/934 [00:01<00:00, 510.97it/s]


Processing Apple/Apple Rotten 1


100%|██████████| 650/650 [00:01<00:00, 542.85it/s]


Processing Apple/Apple worm 1


100%|██████████| 927/927 [00:01<00:00, 573.67it/s]


Processing Apple/Apple 12


100%|██████████| 620/620 [00:01<00:00, 504.71it/s]


Processing Apple/Apple Red 3


100%|██████████| 573/573 [00:01<00:00, 418.93it/s]



######################################################################

Processing TEST dataset...
Processing Tomatoe/Tomato 1


100%|██████████| 984/984 [00:01<00:00, 718.49it/s]


Processing Tomatoe/Tomato 7


100%|██████████| 931/931 [00:01<00:00, 493.41it/s]


Processing Tomatoe/Tomato Yellow 1


100%|██████████| 612/612 [00:01<00:00, 395.12it/s]


Processing Tomatoe/Tomato Maroon 1


100%|██████████| 494/494 [00:01<00:00, 456.65it/s]


Processing Tomatoe/Tomato Cherry Red 1


100%|██████████| 656/656 [00:01<00:00, 432.57it/s]


Processing Cherry/Cherry 3


100%|██████████| 930/930 [00:03<00:00, 289.09it/s]


Processing Cherry/Cherry Wax not ripen 1


100%|██████████| 900/900 [00:02<00:00, 319.93it/s]


Processing Cherry/Cherry Rainier 1


100%|██████████| 984/984 [00:02<00:00, 434.97it/s]


Processing Cherry/Cherry Wax Red 3


100%|██████████| 604/604 [00:01<00:00, 425.91it/s]


Processing Apple/Apple Red Yellow 1


100%|██████████| 656/656 [00:01<00:00, 358.21it/s]


Processing Apple/Apple Red 1


100%|██████████| 656/656 [00:01<00:00, 432.79it/s]


Processing Apple/Apple 19


100%|██████████| 970/970 [00:02<00:00, 442.23it/s]


Processing Apple/Apple Golden 1


100%|██████████| 640/640 [00:01<00:00, 497.62it/s]


Processing Apple/Apple 6


100%|██████████| 630/630 [00:01<00:00, 481.63it/s]


Processing Apple/Apple 10


100%|██████████| 930/930 [00:02<00:00, 398.67it/s]


Processing Apple/Apple Granny Smith 1


100%|██████████| 656/656 [00:01<00:00, 516.24it/s]


In [8]:
print("Shape of X_train before HOG:", X_train.shape)
print("Shape of X_test before HOG:", X_test.shape)

Shape of X_train before HOG: (34800, 64, 64)
Shape of X_test before HOG: (12233, 64, 64)


In [9]:
X_train_hog = extract_hog_features(X_train)
X_test_hog = extract_hog_features(X_test)

print("Shape of X_train HOG features:", X_train_hog.shape)
print("Shape of X_test HOG features:", X_test_hog.shape)

Extracting HOG: 100%|██████████| 34800/34800 [00:42<00:00, 812.77it/s]
Extracting HOG: 100%|██████████| 12233/12233 [00:14<00:00, 825.00it/s]


Shape of X_train HOG features: (34800, 1764)
Shape of X_test HOG features: (12233, 1764)


## saving the data in a csv file with the lapel column

In [10]:
train_df = pd.DataFrame(X_train_hog)
train_df["label"] = y_train

test_df = pd.DataFrame(X_test_hog)
test_df["label"] = y_test

train_df.to_csv("train_processed.csv", index=False)
test_df.to_csv("test_processed.csv", index=False)

print("\nSaved: train_processed.csv & test_processed.csv")


Saved: train_processed.csv & test_processed.csv
