In [33]:
import os 
import pandas as pd
import cv2

In [6]:
def get_labels() -> list[dict]:
    furniture_path = './Furniture_Data'
    labels =  []
    for root, _, files in os.walk(furniture_path):
        for file in files:
            if file.endswith('.jpg'):
                
                path = os.path.join(root, file)
                dirs = path.split(os.path.sep)

                furniture = dirs[2][:-1] #Remove trailing `s` 
                style = dirs[3].lower() #lowercase

                label = {
                    'path': path,
                    'furniture': furniture,
                    'style': style
                }

                labels.append(label)

    return labels

labels = get_labels()
df = pd.DataFrame(labels)

In [7]:
df.head()

Unnamed: 0,path,furniture,style
0,./Furniture_Data/tables/Eclectic/4029eclectic-...,table,eclectic
1,./Furniture_Data/tables/Eclectic/4107eclectic-...,table,eclectic
2,./Furniture_Data/tables/Eclectic/3885eclectic-...,table,eclectic
3,./Furniture_Data/tables/Eclectic/4040eclectic-...,table,eclectic
4,./Furniture_Data/tables/Eclectic/4171eclectic-...,table,eclectic


Try to apply following data augmentation techniques 
- Flip
- Rotate
- Histogram Equalization

In [8]:
# Function for histogram equalization
def hist_equal(img):
    img_to_yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
    img_to_yuv[:, :, 0] = cv2.equalizeHist(img_to_yuv[:, :, 0])
    hist_equalization_result = cv2.cvtColor(img_to_yuv, cv2.COLOR_YUV2BGR)
    return hist_equalization_result

In [9]:
# Function for image zooming
def zoom_image(image, scale, center):
    
    height, width = image.shape[:2]
    new_height = int(height * scale)
    new_width = int(width * scale)

    zoomed_image = cv2.resize(image, (new_width, new_height))
    x = int(center[0] - width / 2)
    y = int(center[1] - width / 2)
    roi = zoomed_image[y : y + height, x : x + width]

    return roi

In [65]:
# Function for image augmentation
def image_augmentation(image):
    flip_h = cv2.flip(image, 0) # Flip image horizontally

    # Get image dimensions
    height, width = flip_h.shape[:2]

    # Define the zoom factor (e.g., zooming in by a factor of 1.5)
    zoom_factor = 1.5

    # Calculate the new dimensions after zooming
    new_height = int(height * zoom_factor)
    new_width = int(width * zoom_factor)

    zoomed_image = cv2.resize(image, (new_width, new_height))
    histogram = hist(image)
    adjusted = cv2.convertScaleAbs(histogram, alpha = 1.0, beta = 5) # Adjust image's contrast and brightness

    return adjusted

**Resampling Problem**
- Flowers: Only have 1 layer of category. Can easily resampling by just calculate the different between each category with the amount of the category with the max number.
   - Technique: Transform the curr scanned image until the current number of category matches the maximum amount
- Furniture: Have 2 layers of categories
  - Naive resampling: Can evenly distribute across furnitures, however styles distribution can be uneven 
  - More cater resampling techniques (Proposed)
    0. Copy curr image folders to `Transformed-Furnitures`
    1. Get the amount of the style of the furniture with maximum amount. Assign to `maximum_amount`
    2. (Optional) Remove that style of the furniture if's smaller than <threshold> (suggest 5% compared to max amount)
    3. Loop thru each image of the category and apply the transformation.
      3a. Increment the `current_amount` by 1
      3b. Saved the new image to corresponding category in `Transformed Furnitures`
    5. Break if `current_amount` doesn't match `maximum_amount`
    6. Continue until end of the loop. If the `current_amount` still doesn't match the `maximum_amount`, then repeat step 3,4 and 5.

Copy raw images dir to transformed images dir

In [84]:
raw_furniture_path = './Furniture_Data'
transformed_furniture_path = './Transfomed_Furniture_Data'

In [17]:
def copy_images_dir():
    # Terminal commands to remove the 'transformed-images' folder if it exists
    if not os.path.exists(transformed_images_folder):
        remove_command = f"rm -rf {transformed_furniture_path}"
        os.system(remove_command)
        print(f"Removed '{transformed_furniture_path}' folder.")
        
        # Terminal command to copy images to the 'transformed-images' folder
        copy_command = f"cp -r {raw_furniture_path} {transformed_furniture_path}"
        os.system(copy_command)
        print("Copying completed successfully.")
        copy_images_dir()
    else:
        print("Folder already exist")

copy_images_dir()

Calculate Furniture Style's maximum amount

In [24]:
max_amount = df.groupby(['furniture', 'style']).size().max()
max_amount


5343

Get combinations of furniture and style unique values 


In [45]:
category_combinations = df[['furniture', 'style']].drop_duplicates().to_records(index=False)
category_combinations[:5]

rec.array([('table', 'eclectic'), ('table', 'midcentury'),
           ('table', 'craftsman'), ('table', 'victorian'),
           ('table', 'transitional')],
          dtype=[('furniture', 'O'), ('style', 'O')])

In [135]:
import random
from scipy import ndimage


def clear_all_transformed_images():
    # Clear images with pattern "resized" or "augmented"
    ...

def resize_image(df_category: pd.DataFrame) -> int:
    """
    Args:
    - df_category(pd.Dataframe)
    Return:
    - pd.Dataframe: New dataframe with initial images and resized images
    """
    # df_resized = df_category.copy(deep=True)
    resized_data = []
    
    for _, record in df_category.iterrows():
        # Read Image
        print('Read Image')
        img_arr = cv2.imread(record['path'])
        
        # Resize
        print('Resize')
        resized_img_arr = cv2.resize(img_arr, (150, 150))
        
        # Generate a name: {current_name}-resized
        file_path, _ = os.path.splitext(record['path'])
        file_path = file_path.replace(raw_furniture_path, transformed_furniture_path)
        
        resized_path = f"{file_path}-resized.jpg"
        
        # Write output  and get path
        print(f'Write resized image to disk {resized_path}')
        cv2.imwrite(resized_path, resized_img_arr)
        
        # Append new record to list
        print('Append to list')
        resized_data.append((
            resized_path,
            record['furniture'],
            record['style']
        ))
    
    
    df_resized = pd.DataFrame(resized_data, columns=df_category.columns)
    return pd.concat([df_category, df_resized], ignore_index=True)

def advanced_transform(df_transform):
    current_amount = len(df_transform)
    augmented_data = []
    
    while current_amount < max_amount:
        # Pick random img file
        print('Pick random img')
        random_df_index = random.choice(df_transform.index.to_list())
        random_df_record = df_transform.iloc[random_df_index]
        
        random_img_file_path = random_df_record['path'] 
        
        #  Augment the img 
        print('Start augment the img')
        random_img_arr = cv2.imread(random_img_file_path)
        augmented_img_arr = image_augmentation(random_img_arr)


        # Gen file path
        file_path, _ = os.path.splitext(random_img_file_path)
        augmented_img_file_path = f"{file_path}-augmented-{current_amount}.jpg"

        # Write augmented image to disk
        print(f'Write augmeted image to disk {augmented_img_file_path}')
        cv2.imwrite(augmented_img_file_path, augmented_img_arr)
        
        # Store new data
        augmented_data.append((
            augmented_img_file_path,
            random_df_record['furniture'],
            random_df_record['style']
        ))
        
        current_amount += 1 

    
    df_augmented = pd.DataFrame(augmented_data, columns=df_transform.columns)
    return pd.concat([df_transform, df_augmented], ignore_index=True)
        
        
def transform(df: pd.DataFrame) -> pd.DataFrame:
    df_transform = None
    for furniture, style in category_combinations:
        print(furniture, style)
        df_category = df[(df['furniture'] == furniture) & (df['style'] == style)][:2]

        # Resize  first
        df_resized =  resize_image(df_category)
        
        df_transform = df_resized if df_transform is None else pd.concat([df_transform, df_resized], ignore_index=True)
        
        # Random pick an image in transformed image, then perform aumgnetation
        df_transform = advanced_transform(df_transform)

    
    return df_transform
        

In [None]:
dummy = transform(df)

In [133]:
dummy.tail().path

6146    ./Furniture_Data/beds/Asian/7842asian-platform...
6147    ./Transfomed_Furniture_Data/beds/Asian/7873asi...
6148    ./Transfomed_Furniture_Data/beds/Asian/2537asi...
6149    ./Transfomed_Furniture_Data/beds/Asian/7577asi...
6150    ./Transfomed_Furniture_Data/beds/Asian/7842asi...
Name: path, dtype: object