<a href="https://colab.research.google.com/github/sreeman-11021996/Fashion-Recommendation-system/blob/main/my_resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

In [None]:
!kaggle datasets download -d paramaggarwal/fashion-product-images-small

Downloading fashion-product-images-small.zip to /content
 99% 560M/565M [00:03<00:00, 220MB/s]
100% 565M/565M [00:03<00:00, 193MB/s]


In [None]:
import zipfile
zip_ref = zipfile.ZipFile("/content/fashion-product-images-small.zip", "r")
zip_ref.extractall("/content")
zip_ref.close()

In [None]:
import tensorflow
from tensorflow.keras.layers import Conv2D,BatchNormalization,Activation,Add,ZeroPadding2D,\
MaxPooling2D,AveragePooling2D,Input,Flatten,Dense
from tensorflow.keras.initializers import GlorotUniform
from keras.models import Model
from tensorflow.keras.preprocessing import image

import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import pickle
import matplotlib.pyplot as plt

In [None]:
#Loading the labels and removing missing images:
train=pd.read_csv('/content/styles.csv',usecols=["id","gender","masterCategory",
                                                 "subCategory","articleType",
                                                 "baseColour","season","usage"])

In [None]:
def clean_data_(df,dirimg="/content/images"):
    """
    this functions removes those rows from the DataFrame for which there are
    no images in the dataset
    """
    drop_indices = []
    print('[INFO]: Checking if all images are present')
    for index, image_id in tqdm(df.iterrows()):
        if not os.path.exists(os.path.join(dirimg,str(image_id.id)+".jpg")):
            drop_indices.append(index)
    print(f"[INFO]: Dropping indices: {drop_indices}")
    df = df.drop(df.index[drop_indices])
    return df

In [None]:
train_clean = clean_data_(train)

[INFO]: Checking if all images are present


44446it [00:02, 16281.16it/s]


[INFO]: Dropping indices: [6697, 16207, 32324, 36399, 40022]


In [None]:
train_clean.shape

(44441, 8)

In [None]:
train_clean.head()

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,usage
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,Casual
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,Casual
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,Casual
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,Casual
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,Casual


In [None]:
def cat_comb(df,columns=['gender','masterCategory','subCategory','articleType',\
                                'baseColour','season','usage']):
    df_new = df.copy()
    count = df_new.groupby(columns).size().reset_index().rename(columns={0:"count"})
    single_comb = count[count["count"]==1]
    cat_comb_df = pd.concat([count,single_comb]).drop_duplicates(keep=False)
    return cat_comb_df,single_comb

In [None]:
cat_comb_df,single_com = cat_comb(train_clean,columns=["gender","masterCategory","subCategory"])

In [None]:
single_com

Unnamed: 0,gender,masterCategory,subCategory,count
0,Boys,Accessories,Eyewear,1
2,Boys,Accessories,Headwear,1
3,Boys,Accessories,Socks,1
16,Girls,Accessories,Headwear,1
17,Girls,Accessories,Jewellery,1
37,Men,Accessories,Perfumes,1
45,Men,Accessories,Water Bottle,1
54,Men,Personal Care,Bath and Body,1
62,Unisex,Accessories,Gloves,1
64,Unisex,Accessories,Mufflers,1


In [None]:
from keras.models import model_from_json

In [None]:
def save_models(model):
  # save the architecture
  save_model_json = model.to_json()
  with open("myModel_arc.json", "w") as json_file:
      json_file.write(save_model_json )

  # save the weights
  model.save_weights("myModel_wts.h5")

In [None]:
# save the train and validation loss plots to disk
def save_loss_plot(train_loss, val_loss):
    plt.figure(figsize=(10, 7))
    plt.plot(train_loss, color='orange', label='train loss')
    plt.plot(val_loss, color='red', label='validataion loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig('../outputs/loss.jpg')
    plt.show()

In [None]:
def map_label_dicts(df):
    # remove rows from the DataFrame which do not have corresponding images
    df = clean_data_(df)
    # we will use the `gender`, `masterCategory`. and `subCategory` labels

    # mapping `gender` to numerical values
    cat_list_gender = df['gender'].unique()
    # 5 unique categories for gender
    num_list_gender = {cat:i for i, cat in enumerate(cat_list_gender)}

    # mapping `masterCategory` to numerical values
    cat_list_master = df['masterCategory'].unique()
    # 7 unique categories for `masterCategory`
    num_list_master = {cat:i for i, cat in enumerate(cat_list_master)}

    # mapping `subCategory` to numerical values
    cat_list_sub = df['subCategory'].unique()
    # 45 unique categories for `subCategory`
    num_list_sub = {cat:i for i, cat in enumerate(cat_list_sub)}

    return num_list_gender,num_list_master,num_list_sub

In [None]:
## Preparing Dataset

In [None]:
import cv2
from keras.preprocessing.image import ImageDataGenerator
import math

In [None]:
def train_val_split(df):
    # remove rows from the DataFrame which do not have corresponding images
    df = clean_data_(df)

    # shuffle the dataframe
    # sample(frac=1) -> gives us a shuffled dataset, then we reset the index
    df = df.sample(frac=1).reset_index(drop=True)

    # 90% for training and 10% for validation
    num_train_samples = math.floor(len(df) * 0.90)
    num_val_samples = math.floor(len(df) * 0.10)
    train_df = df[:num_train_samples].reset_index(drop=True)
    val_df = df[-num_val_samples:].reset_index(drop=True)
    return train_df, val_df

In [None]:
train.head(1)["id"]

0    15970
Name: id, dtype: int64

In [None]:
class FashionDataset():
    def __init__(self, df, is_train=True):
        self.df = df
        gender,master,sub = map_label_dicts(df)
        self.num_list_gender = gender
        self.num_list_master = master
        self.num_list_sub = sub
        self.is_train = is_train

        # the training transforms and augmentations
        # ToPILImage(),Resize((224, 224)),
        if self.is_train:
            self.datagen = ImageDataGenerator(
                rescale=1./255,
                horizontal_flip=True,
                vertical_flip=True,
                rotation_range = 40,
                shear_range = 0.2,
                zoom_range = 0.2,
                width_shift_range=0.2,
                height_shift_range = 0.2
            )
        # the validation transforms
        if not self.is_train:
            self.datagen = ImageDataGenerator(rescale=1./255)

    def label_mappings(self):
        for ind in self.df.index:
          temp = []
          

    def __len__(self):
        return len(self.df)

    def preprocess(self,img_path):
        image = image.load_img(img_path,target_size=(224,224))
        img_array = image.img_to_array(image)
        expanded_img_array = np.expand_dims(img_array, axis=0)
        preprocessed_img = expanded_img_array/255
        return preprocessed_img

    def __getitem__(self, index):
        # one image
        root_dir = "images"
        img_filename = str(self.df['id'][index]) + ".jpg"
        img_path = os.path.join(root_dir,img_filename)

        image = self.preprocess(img_path)

        cat_gender = self.df['gender'][index]
        label_gender = self.num_list_gender[cat_gender]
        cat_master = self.df['masterCategory'][index]
        label_master = self.num_list_master[cat_master]
        cat_sub = self.df['subCategory'][index]
        label_sub = self.num_list_sub[cat_sub]
        
        # image to float32 tensor
        image = tensorflow.convert_to_tensor(image, dtype=tensorflow.float32)
        # labels to long tensors
        label_gender = tensorflow.convert_to_tensor(label_gender, 
                                                    dtype=tensorflow.long)
        label_master = tensorflow.convert_to_tensor(label_master, 
                                                    dtype=tensorflow.long)
        label_sub = tensorflow.convert_to_tensor(label_sub, 
                                                 dtype=tensorflow.long)
        return {
            'image': image,
            'gender': label_gender,
            'master': label_master,
            'sub': label_sub
        }