In [None]:
# This is the second CNN where I implement different methods to try achieve the highest accuracy possible

import pandas as pd 
import numpy as np 
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout 
from tensorflow.keras.preprocessing.image import load_img, img_to_array 
from sklearn.preprocessing import LabelEncoder 
from sklearn.model_selection import train_test_split 
import pandas as pd 
from google.colab import drive 

# Connecting to my google drive in order to access files, in this case styles.csv 
drive.mount('/content/drive') 
csv_path = '/content/drive/My Drive/styles.csv' 
df = pd.read_csv(csv_path, on_bad_lines='skip') 

# Filter for three categories 
clothing_categories = ['Casual', 'Formal', 'Smart Casual'] 
df_filtered = df[df['usage'].isin(clothing_categories)] 
df_filtered = df_filtered.reset_index(drop=True) 

# checking the rows in the dataframe 
print(f"Number of rows in filtered dataframe: {len(df_filtered)}") 
print(df_filtered.head()) 

In [None]:
import os

# Setting image dimensions
IMG_HEIGHT = 128
IMG_WIDTH = 128
NUM_CHANNELS = 3

X = []
y = []

# File path to the folder containing all images
file_path = '/content/drive/My Drive/images'

# Get's all images in the id range
df_subset = df_filtered[
   (df_filtered['id'] >= 0) &
   (df_filtered['id'] <= 50000)
]

# Goes through the folder outputting how many images it has processed as it runs, also displays any images it didn't find
for idx, row in df_subset.iterrows():
    img_id = str(row['id'])

    try:
        img_path = os.path.join(file_path, f"{img_id}.jpg")

        if not os.path.exists(img_path):
            print(f"Image {img_id} not found.")
            continue

        img = load_img(img_path, target_size=(IMG_HEIGHT, IMG_WIDTH))
        img_array = img_to_array(img)

        img_array = img_array / 255.0

        X.append(img_array)
        y.append(row['usage'])

        # Every 1000 images processed output message
        if len(X) % 1000 == 0:
            print(f"Processed {len(X)} images")

    except Exception as e:
        print(f"Error loading image {img_id}: {str(e)}")

X = np.array(X)
y = np.array(y)

print("\nFinal dataset shape:")
print("Images shape:", X.shape)
print("Labels shape:", y.shape)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator 
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import LabelEncoder 
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout 
from tensorflow.keras.optimizers import Adam 

# Encode labels (convert text labels to numbers) 
label_encoder = LabelEncoder() 
y_encoded = label_encoder.fit_transform(y) 

# Split the data 
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42) 

# Define data augmentation for the training data 
train_datagen = ImageDataGenerator( 
    rescale=1./255,          # Normalizes pixel values to 0, 1 
    rotation_range=40,       # Random rotation of up to 40 degrees 
    width_shift_range=0.2,   # Random horizontal shift of the image
    height_shift_range=0.2,  # Random vertical shift of the image
    zoom_range=0.2,          # Random zoom 
    horizontal_flip=True,    # Randomly flip images horizontally 
    shear_range=0.2,         # Random shear transformation 
    fill_mode='nearest'      # Fill any missing pixels after transformations 
) 

# Define validation data augmentation  
test_datagen = ImageDataGenerator(rescale=1./255) 

# Applies data augmentation to training data 
train_generator = train_datagen.flow(X_train, y_train, batch_size=32) 

# Applies rescaling to test data 
test_generator = test_datagen.flow(X_test, y_test, batch_size=32) 

# Loads the VGG16 model for transfer learning
#base_model = VGG16(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))
#base_model.trainable = False
#model = Sequential([
    #base_model,              # Pre-trained VGG16 base model
    #Flatten(),               
   # Dense(64, activation='relu'),  
  #  Dropout(0.5),           
 #   Dense(3, activation='softmax') 
#])

# Relu is used as it introduces non-linearity, allowing the model to learn more complex patterns
model = Sequential([ 
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)), 
    MaxPooling2D(2, 2), 
    Conv2D(64, (3, 3), activation='relu'), 
    MaxPooling2D(2, 2), 
    Conv2D(64, (3, 3), activation='relu'), 
    MaxPooling2D(2, 2), 
    Flatten(), # Flattens the output to a 1D array to connect to dense layers
    Dense(64, activation='relu'), 
    Dropout(0.7), # Dropout layer to prevent overfitting

    Dense(3, activation='softmax') # Output layer
]) 

 

# Compiles the model 
# Used Adam optimizer as it adapts the learning rate for each parameter making it efficent for large datasets or when dealing
# with large amounts of parameters. The faster convergance of Adam allowed for faster testing.
model.compile(optimizer=Adam(), 
    loss='sparse_categorical_crossentropy', # Good option for multi-class classification it measures the difference between the predicted class 
    metrics=['accuracy'])                   # probabilities and the true class labels

# Prints a summary of the model
model.summary() 

In [None]:
# I chose 50 epochs to make sure I was certain I was letting the model go through enough iterations the model only needs ~15 before overfitting occurs
history = model.fit( 
    X_train, 
    y_train, 
    epochs=50, 
    batch_size=32, 
    validation_split=0.2 
) 

test_loss, test_accuracy = model.evaluate(X_test, y_test) 
print(f"\nTest accuracy: {test_accuracy:.4f}") 

model.save('fashion_classifier.h5') 

import pickle 
with open('label_encoder.pkl', 'wb') as f: 
    pickle.dump(label_encoder, f) 

In [None]:
import matplotlib.pyplot as plt 

# Plots the training and validation accuracy 
plt.plot(history.history['accuracy'], label='Training Accuracy') 
plt.plot(history.history['val_accuracy'], label='Validation Accuracy') 
plt.title('Model Accuracy') 
plt.xlabel('Epochs') 
plt.ylabel('Accuracy') 
plt.legend() 
plt.grid(True) 
plt.show() 

# Evaluates the model printing the results
test_loss, test_accuracy = model.evaluate(X_test, y_test) 
print(f"\nTest accuracy: {test_accuracy:.4f}") 