In [3]:
import os
import cv2
import numpy as np
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pickle

In [8]:
Database_path = 'dataset'

CLASSES = ["glass", "paper", "cardboard", "plastic", "metal", "trash"]
img_size = (128, 64) 


In [9]:
# Data Augmentation function
def augment_image(image): 
    # the increase 200% (3 times the original)
    # list of images: original, flipped, rotated
    augmented_images = [image]

    # Horizontal Flip
    # This Teaches model to ignore directionality of objects
    flipped = cv2.flip(image, 1)
    augmented_images.append(flipped)

    # Rotation
    rows, cols = image.shape[:2]
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 15, 1) # Rotate by 15 degrees
    rotated = cv2.warpAffine(image, M, (cols, rows)) 
    augmented_images.append(rotated)
    
    return augmented_images

In [10]:
# Feature Extraction function
def extract_features(image):
    # Converts a raw image into a 1D feature vector using HOG(shape) + (Color) Histogram.

    img_resized = cv2.resize(image, img_size)

    # 1. HOG Features
    # grayscale image for HOG (ignore color and focus on shape and edges)
    gray = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)
    # see in which direction is the brightness changing
    hog_features = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                       cells_per_block=(2, 2), block_norm='L2-Hys', transform_sqrt=True)
    
    # 2. Color Histogram Features
    # split img into green, red, blue channels and compute histogram for each
    channels = cv2.split(img_resized) 
    colors = []
    for i in channels:  
        # for each channel count hm pixels are "dark","bright","medium".
        hist = cv2.calcHist([i], [0], None, [32], [0, 256])
        cv2.normalize(hist, hist)
        colors.extend(hist.flatten())
    # Combine HOG and Color Histogram features
    feature_vector = np.hstack((hog_features, colors))
    return feature_vector

In [11]:
# Load Data
def load_and_preprocess_data():
    features = []
    labels = []

    for class_id,class_name in enumerate(CLASSES):
        class_path = os.path.join(Database_path, class_name)

        print(f"Processing class: {class_name}")
        for file_name in os.listdir(class_path):
            img_path = os.path.join(class_path, file_name)

            # Read image
            img = cv2.imread(img_path)
            if img is None:
                continue
            # Apply data augmentation
            augmented_images = augment_image(img)
            for aug_img in augmented_images:
                feature = extract_features(aug_img)
                features.append(feature)
                labels.append(class_id) # 0 to 5
        
    X = np.array(features)
    y = np.array(labels)
    return X, y

In [12]:
X, y = load_and_preprocess_data()

# Split Data
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# Save preprocessed data
with open('preprocessed_data.pkl', 'wb') as f:
    pickle.dump((x_train, x_test, y_train, y_test, scaler), f)
# The script loads images from a specified database path, applies data augmentation (flipping and rotation),
# extracts HOG and color histogram features, splits the data into training and testing sets,
# scales the features, and saves the preprocessed data to a pickle file.


Processing class: glass
Processing class: paper
Processing class: paper
Processing class: cardboard
Processing class: cardboard
Processing class: plastic
Processing class: plastic
Processing class: metal
Processing class: metal
Processing class: trash
Processing class: trash
