In [3]:
import pandas as pd 
import tensorflow as tf
from tensorflow import keras
import os
import cv2
import tensorflow.image as imtf
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
import mahotas.features.texture as texture

tqdm.pandas()

In [4]:
def get_data() -> list[dict]:
    furniture_path = './Transfomed_Furniture_Data'
    labels =  []
    for root, _, files in os.walk(furniture_path):
        for file in files:
            if file.endswith('.jpg'):
                
                path = os.path.join(root, file)
                dirs = path.split(os.path.sep)

                furniture = dirs[2][:-1] #Remove trailing `s` 
                style = dirs[3].lower() #lowercase

                label = {
                    'path': path,
                    'furniture': furniture,
                    'style': style
                }

                labels.append(label)

    return pd.DataFrame(labels)

df = get_data()
df.head()

Unnamed: 0,path,furniture,style
0,./Transfomed_Furniture_Data/tables/Eclectic/40...,table,eclectic
1,./Transfomed_Furniture_Data/tables/Eclectic/47...,table,eclectic
2,./Transfomed_Furniture_Data/tables/Eclectic/41...,table,eclectic
3,./Transfomed_Furniture_Data/tables/Eclectic/41...,table,eclectic
4,./Transfomed_Furniture_Data/tables/Eclectic/41...,table,eclectic


In [5]:
def get_image(img_path):
    img_arr = cv2.imread(img_path)
    
    # Downsize image for faster model training. 
    # Normalize the image to scale 0-1 for faster training time and better performance
    return cv2.resize(img_arr, (50,50)) / 255.0 

X = df['path'].progress_apply(lambda path: get_image(path)).to_list()
X = np.array(X)

  0%|          | 0/182457 [00:00<?, ?it/s]

In [6]:
# Define furniture index. We'll use this data format as target for training 
furniture_indexes = {furniture:idx for idx, furniture in enumerate(df['furniture'].unique(), 1)}
furniture_indexes

{'table': 1, 'sofa': 2, 'lamp': 3, 'chair': 4, 'dresser': 5, 'bed': 6}

In [7]:
def get_index(furniture):
    return furniture_indexes[furniture]

y = df['furniture'].apply(lambda furniture: get_index(furniture)).to_list()
y = np.array(y)


In [8]:
from sklearn.model_selection import train_test_split

# Split the data into training and test sets
# Using stratify on y to ensure better distribution
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42, stratify=y)

X_train = np.squeeze(X_train)
y_train = np.squeeze(y_train)
X_test = np.squeeze(X_test)
y_test = np.squeeze(y_test)

In [9]:
print(f"X_train shape {X_train.shape}")
print(f"X_test shape {X_test.shape}")

X_train shape (109474, 50, 50, 3)
X_test shape (72983, 50, 50, 3)


In [10]:
print(f"y_train shape {y_train.shape}")
print(f"y_test shape {y_test.shape}")

y_train shape (109474,)
y_test shape (72983,)


In [11]:
from tensorflow.keras import layers, models, regularizers

# Create a sequential model
model = models.Sequential()

# Add convolutional layers
# model.add(layers.Conv2D(8, (3, 3), activation='relu', input_shape=(224, 224,3),kernel_regularizer=regularizers.l2(0.001)))
# model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(16, (3, 3), activation='relu', input_shape=(50, 50,3), padding='same'))
model.add(layers.MaxPooling2D((2, 2), padding='same'))
model.add(layers.Conv2D(32, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Conv2D(256, (3, 3), activation='relu'))
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Conv2D(512, (3, 3), activation='relu'))
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Conv2D(1024, (3, 3), activation='relu'))
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Conv2D(512*2*2, (3, 3), activation='relu'))
# model.add(layers.MaxPooling2D((2, 2)))



# Add a dense layer
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
# model.add(layers.Dropout(0.2)) # add a dropout layer with dropout rate of 0.2


# Add the output layer
model.add(layers.Dense(64, activation='softmax'))

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model on the training data
model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))


Epoch 1/5


2024-04-30 13:58:25.130468: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2876b3cd0>

Save model for resuability 

In [12]:
import os

model_dir = "./models"
model_h5_file = os.path.join(model_dir, "model_task_2_cnn_classification.h5")

# Check if the directory exists, and if not, create it
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

model.save(model_h5_file)

TODO: Debug supsending kernel and saved the dataset

In [None]:
bins = 88

def get_histogram(image: np.ndarray):
    histogram = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    histogram = cv2.normalize(histogram, histogram).flatten()
    return histogram 

def get_texture_feature(image: np.ndarray):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    glcm = texture.haralick(gray_image)
    return np.mean(glcm, axis=0)

def get_compactness(image: np.ndarray):
    # Convert the image to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply adaptive thresholding
    thresholded = cv2.adaptiveThreshold(gray_image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
    
    # Find contours in the binary image
    contours, _ = cv2.findContours(thresholded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Calculate compactness for each contour
    compactness_values = []
    for contour in contours:
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        compactness = perimeter / np.sqrt(area) if area > 0 else 0
        compactness_values.append(compactness)
    
    # Maximum Compactness: Choose the contour with the highest compactness value. 
    # This approach assumes that the object with the highest compactness is the most significant or relevant in the image.
    max_compactness_index = np.argmax(compactness_values)
    max_compactness_value = compactness_values[max_compactness_index]
    
    return max_compactness_value


def get_similarity_attrs(row: pd.Series) -> pd.Series: 
    print(row['path'])
    image = cv2.imread(row['path'])

    row['histogram'] = get_histogram(image)
    row['texture_feature'] = get_texture_feature(image)
    row['compactness'] = get_compactness(image)

    return row 

df_similarity = df.progress_apply(get_similarity_attrs, axis=1)

  0%|          | 0/182457 [00:00<?, ?it/s]

./Transfomed_Furniture_Data/tables/Eclectic/4029eclectic-side-tables-and-end-tables.jpg
./Transfomed_Furniture_Data/tables/Eclectic/4768eclectic-coffee-and-accent-tables-resized-augmented-4531.jpg
./Transfomed_Furniture_Data/tables/Eclectic/4107eclectic-nightstands-and-bedside-tables.jpg
./Transfomed_Furniture_Data/tables/Eclectic/4129eclectic-coffee-tables-resized-augmented-5106.jpg
./Transfomed_Furniture_Data/tables/Eclectic/4199eclectic-console-tables-resized-augmented-2073.jpg
./Transfomed_Furniture_Data/tables/Eclectic/3977eclectic-side-tables-and-end-tables-resized-augmented-1222.jpg
./Transfomed_Furniture_Data/tables/Eclectic/3959eclectic-side-tables-and-end-tables-resized-augmented-2036.jpg
./Transfomed_Furniture_Data/tables/Eclectic/3840eclectic-coffee-tables-resized.jpg
./Transfomed_Furniture_Data/tables/Eclectic/3830eclectic-coffee-tables-resized-augmented-4914.jpg
./Transfomed_Furniture_Data/tables/Eclectic/3885eclectic-side-tables-and-end-tables.jpg
./Transfomed_Furniture_

NameError: name 'model' is not defined