# Sliding Window Notebook

### 1. Data Cleaning and Augmentation: Identify furniture categories with less than 1000 images, then calculate the necessary increase factor to reach a target of 5000 images per category.

### 2. Image Processing: The sliding_window function uses a smart approach to generate multiple cropped images from a single input image. It iteratively adjusts the window size and step to achieve the desired number of windows.

### 3. File Organization: Create a structured directory hierarchy (../CrawledData/{image_category}/{image_interior_type}/) to store the cropped images, which is crucial for data organization.

### 4. Analysis Functions: get_furiture_categories and get_category_info provide useful functionalities for analyzing the structure of the dataset.

In [1]:
import os
import cv2
import math
import pandas as pd

In [2]:
# Load the data
furniture_df = pd.read_csv("../furniture_data.csv")

In [3]:
# Create combined category type
furniture_df['Furniture_Category_Type'] = furniture_df['Furniture_Category'] + '-' + furniture_df['Furniture_Type']

In [4]:
# Calculate value counts and filter categories below 1000
value_counts = furniture_df['Furniture_Category_Type'].value_counts()
categories_below_1000 = value_counts[value_counts < 1000].index

In [5]:
# Filter the DataFrame to only include categories below 1000
furniture_below_1000_df = furniture_df[furniture_df['Furniture_Category_Type'].isin(categories_below_1000)].copy()

In [6]:
# Calculate increase factors directly within the DataFrame
furniture_below_1000_df['Increase_Factor'] = (5000 // furniture_below_1000_df['Furniture_Category_Type'].map(value_counts)).fillna(1).astype(int)

In [7]:
print(furniture_below_1000_df)

      Furniture_Category Furniture_Type  \
0                   beds          Asian   
1                   beds          Asian   
2                   beds          Asian   
3                   beds          Asian   
4                   beds          Asian   
...                  ...            ...   
90079             tables      Victorian   
90080             tables      Victorian   
90081             tables      Victorian   
90082             tables      Victorian   
90083             tables      Victorian   

                                              Image_Path  \
0      ../Furniture_Data\beds\Asian\19726asian-daybed...   
1      ../Furniture_Data\beds\Asian\20027asian-canopy...   
2      ../Furniture_Data\beds\Asian\20109asian-panel-...   
3      ../Furniture_Data\beds\Asian\20508asian-platfo...   
4      ../Furniture_Data\beds\Asian\20750asian-comfor...   
...                                                  ...   
90079  ../Furniture_Data\tables\Victorian\5victorian-...   
900

In [8]:
# Example: Accessing data for "beds-Scandinavian"
scandinavian_beds_df = furniture_below_1000_df[furniture_below_1000_df['Furniture_Category_Type'] == "beds-Scandinavian"]
print(scandinavian_beds_df)

     Furniture_Category Furniture_Type  \
3242               beds   Scandinavian   
3243               beds   Scandinavian   
3244               beds   Scandinavian   
3245               beds   Scandinavian   
3246               beds   Scandinavian   
3247               beds   Scandinavian   
3248               beds   Scandinavian   
3249               beds   Scandinavian   
3250               beds   Scandinavian   
3251               beds   Scandinavian   
3252               beds   Scandinavian   
3253               beds   Scandinavian   
3254               beds   Scandinavian   
3255               beds   Scandinavian   
3256               beds   Scandinavian   
3257               beds   Scandinavian   
3258               beds   Scandinavian   
3259               beds   Scandinavian   
3260               beds   Scandinavian   
3261               beds   Scandinavian   
3262               beds   Scandinavian   
3263               beds   Scandinavian   
3264               beds   Scandina

In [9]:
def sliding_window(image_path, saved_path, window_size=(164, 164), desired_windows=6, overlap=0.5):
    """Applies a sliding window technique to an image to extract multiple cropped regions."""

    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Could not read image at {image_path}")
        return

    image_height, image_width = image.shape[:2]
    image_filename = os.path.splitext(os.path.basename(image_path))[0]

    # Calculate step size based on overlap 
    step_size = int((1 - overlap) * min(window_size)) 

    # Calculate approximate number of windows in each dimension
    n_rows = math.ceil((image_height - window_size[1]) / step_size) + 1
    n_cols = math.ceil((image_width - window_size[0]) / step_size) + 1

    # Adjust step size to get closer to desired number of windows
    total_windows = n_rows * n_cols
    while total_windows < desired_windows:
        step_size -= 1
        n_rows = math.ceil((image_height - window_size[1]) / step_size) + 1
        n_cols = math.ceil((image_width - window_size[0]) / step_size) + 1
        total_windows = n_rows * n_cols

    # Create output directory
    os.makedirs(saved_path, exist_ok=True) 

    window_index = 0
    for y in range(0, image_height - window_size[1] + 1, step_size):
        for x in range(0, image_width - window_size[0] + 1, step_size):
            window = image[y:y + window_size[1], x:x + window_size[0]]
            window_filename = os.path.join(saved_path, f"{image_filename}_{window_index}.jpg")
            cv2.imwrite(window_filename, window)
            window_index += 1


# --- DataFrame Iteration --- 
for index, row in furniture_below_1000_df.iterrows():
    image_path = row['Image_Path']
    image_category = row['Furniture_Category']
    image_interior_type = row['Furniture_Type']
    increase_factors = row['Increase_Factor']
    
    saved_parent_path = f"../CrawledData/{image_category}/{image_interior_type}/"

    sliding_window(
        image_path=image_path, 
        saved_path=saved_parent_path,
        desired_windows=increase_factors, 
        overlap=0.5 # Can adjust this value
    )

In [10]:
# Define the root directory
base_dataset_dir = "../CrawledData"

In [11]:
# Function to get all subdirectory names within a directory
def get_subdirectories(directory):
    return [item for item in os.listdir(directory) if os.path.isdir(os.path.join(directory, item))]

In [12]:
# Function to get category information recursively
def get_category_info(root_dir):
    folder_info = {}
    for category in get_subdirectories(root_dir):
        category_path = os.path.join(root_dir, category)
        types = get_subdirectories(category_path)
        folder_info[category] = {
            "num_types": len(types),
            "types": types,
            "image_paths": [
                os.path.join(category_path, type, image) 
                for type in types 
                for image in os.listdir(os.path.join(category_path, type)) 
            ]
        }
    return folder_info

In [13]:
# Get category information 
category_info = get_category_info(base_dataset_dir)

In [14]:
# Print category information
print("Info within each categorical furnitures:")
for category, info in category_info.items():
    print(f" - {category}: {info['num_types']} types")
    print(f"   Types: {info['types']}")

Info within each categorical furnitures:
 - beds: 14 types
   Types: ['Asian', 'Beach', 'Craftsman', 'Eclectic', 'Farmhouse', 'Industrial', 'Mediterranean', 'Midcentury', 'Modern', 'Rustic', 'Scandinavian', 'Southwestern', 'Tropical', 'Victorian']
 - chairs: 12 types
   Types: ['Asian', 'Beach', 'Craftsman', 'Eclectic', 'Farmhouse', 'Industrial', 'Mediterranean', 'Rustic', 'Scandinavian', 'Southwestern', 'Tropical', 'Victorian']
 - dressers: 14 types
   Types: ['Asian', 'Beach', 'Craftsman', 'Eclectic', 'Farmhouse', 'Industrial', 'Mediterranean', 'Midcentury', 'Modern', 'Rustic', 'Scandinavian', 'Southwestern', 'Tropical', 'Victorian']
 - lamps: 6 types
   Types: ['Eclectic', 'Midcentury', 'Rustic', 'Scandinavian', 'Southwestern', 'Tropical']
 - sofas: 15 types
   Types: ['Asian', 'Beach', 'Craftsman', 'Eclectic', 'Farmhouse', 'Industrial', 'Mediterranean', 'Midcentury', 'Modern', 'Rustic', 'Scandinavian', 'Southwestern', 'Traditional', 'Tropical', 'Victorian']
 - tables: 11 types
   T

In [15]:
# Construct DataFrame directly from the dictionary
df_data = []
for category, info in category_info.items():
    for image_path in info['image_paths']:
        type = os.path.basename(os.path.dirname(image_path))
        df_data.append({
            "Furniture_Category": category,
            "Furniture_Type": type,
            "Image_Path": image_path
        })

In [16]:
furniture_sliding_df = pd.DataFrame(df_data)

In [17]:
# Print the DataFrame head
print(furniture_sliding_df.head(10))

  Furniture_Category Furniture_Type  \
0               beds          Asian   
1               beds          Asian   
2               beds          Asian   
3               beds          Asian   
4               beds          Asian   
5               beds          Asian   
6               beds          Asian   
7               beds          Asian   
8               beds          Asian   
9               beds          Asian   

                                          Image_Path  
0  ../CrawledData\beds\Asian\19726asian-daybeds_0...  
1  ../CrawledData\beds\Asian\19726asian-daybeds_1...  
2  ../CrawledData\beds\Asian\19726asian-daybeds_1...  
3  ../CrawledData\beds\Asian\19726asian-daybeds_1...  
4  ../CrawledData\beds\Asian\19726asian-daybeds_1...  
5  ../CrawledData\beds\Asian\19726asian-daybeds_1...  
6  ../CrawledData\beds\Asian\19726asian-daybeds_1...  
7  ../CrawledData\beds\Asian\19726asian-daybeds_1...  
8  ../CrawledData\beds\Asian\19726asian-daybeds_1...  
9  ../CrawledData\be

In [18]:
# Print value counts by category and type
print(furniture_sliding_df.groupby('Furniture_Category')['Furniture_Type'].value_counts())

Furniture_Category  Furniture_Type
beds                Southwestern      5929
                    Eclectic          5632
                    Scandinavian      5577
                    Mediterranean     4800
                    Craftsman         4700
                                      ... 
tables              Scandinavian      4500
                    Beach             4446
                    Eclectic          3690
                    Craftsman         3681
                    Transitional      3312
Name: count, Length: 72, dtype: int64


In [19]:
# Save to CSV
furniture_sliding_df.to_csv("../furniture_sliding_data.csv", index=False)

# The End of Notebook.