In [180]:
import json, math
import numpy as np
import pandas as pd
import cv2 as cv
import matplotlib.pyplot as plt

In [181]:
df_outfit = pd.read_csv('./data/outfits.csv', encoding='utf-8', sep=";")
df_outfit = df_outfit.rename(columns={"id": "outfit.id"})
df_outfit.head(2)

Unnamed: 0,outfit.id,name,description,group,owner,timeCreated,retailPrice,pricePerWeek,pricePerMonth,outfit_tags,tag_categories
0,outfit.fffdaa715c3646f8b1c0f04d549ff07e,Out of stock - Asymmetric Frilled Dress,"This fun, short dress features and asymmetric ...",group.50a586c78eb7626e294ba3bd07d12c79,o_00053,2017-12-30 11:28:01.000,4000.0,600.0,1200.0,"['Synthetic', 'Statement', 'Dresses', 'Metalli...","['Material', 'Occasion', 'Category', 'Details'..."
1,outfit.fffa1b9a3db6415d806f3c48f8ab58d9,Yellow Shell Mellomholmene Blouse,This beautiful blouse features an adjustable n...,group.61ad2fcabb3e9197e3836376e6b67f2c,o_00577,2021-06-07 12:07:22.921,1300.0,590.0,1180.0,"['ILAG', 'Tops', 'Spring', 'Summer', 'M', 'Pat...","['Brand', 'Category', 'Seasons', 'Seasons', 'S..."


In [182]:
df_names = pd.read_csv('./data/picture_triplets.csv', encoding='utf-8', sep=";")
df_names.head(2)

Unnamed: 0,picture.id,outfit.id,displayOrder,file_name
0,picture.0000cdba64314d84a49ed1c266589cc0,outfit.794483397da8425a813301eecf9828c6,0,0000cdba64314d84a49ed1c266589cc0.jpg
1,picture.00058abb53434872ae9bb4270ae21f8e,outfit.98f32aaf08bc4ff09c44e6e11e9199bc,2,00058abb53434872ae9bb4270ae21f8e.jpg


In [183]:
df_outfit.shape

(15649, 11)

In [184]:
df = pd.merge(df_outfit, df_names, on="outfit.id", how="inner").drop(columns=["outfit.id"])
df.head(2)

Unnamed: 0,name,description,group,owner,timeCreated,retailPrice,pricePerWeek,pricePerMonth,outfit_tags,tag_categories,picture.id,displayOrder,file_name
0,Out of stock - Asymmetric Frilled Dress,"This fun, short dress features and asymmetric ...",group.50a586c78eb7626e294ba3bd07d12c79,o_00053,2017-12-30 11:28:01.000,4000.0,600.0,1200.0,"['Synthetic', 'Statement', 'Dresses', 'Metalli...","['Material', 'Occasion', 'Category', 'Details'...",picture.1b9e3cf3581d4205890c52f4eac29cd1,0,1b9e3cf3581d4205890c52f4eac29cd1.jpg
1,Out of stock - Asymmetric Frilled Dress,"This fun, short dress features and asymmetric ...",group.50a586c78eb7626e294ba3bd07d12c79,o_00053,2017-12-30 11:28:01.000,4000.0,600.0,1200.0,"['Synthetic', 'Statement', 'Dresses', 'Metalli...","['Material', 'Occasion', 'Category', 'Details'...",picture.4d6f95d3f283451492d0c17b24e557e2,0,4d6f95d3f283451492d0c17b24e557e2.jpg


In [185]:
df = df[~df['description'].str.contains("fur", case=False, na=False)]
df = df[~df['description'].str.contains("signature", case=False, na=False)]

In [186]:
data_tags = {}
for i, row in df.iterrows():
    outfit_tags = json.loads(row["outfit_tags"].replace("'", '"'))
    
    tag_categories = json.loads(row["tag_categories"].replace("'", '"'))
    data_tags[i] = {}
    data_tags[i]["file_name"] = row["file_name"]
    data_tags[i]["description"] = row["description"]
    
    for tag, category in zip(outfit_tags, tag_categories):
        data_tags[i][category] = tag
        
df = pd.DataFrame.from_dict(data_tags, orient='index')
df

Unnamed: 0,file_name,description,Material,Occasion,Category,Details,Length,Size,Color,Brand,Seasons,Gender,Fit
0,1b9e3cf3581d4205890c52f4eac29cd1.jpg,"This fun, short dress features and asymmetric ...",Cotton,Statement,Dresses,Metallic,Mini,S,Black,Sandro,,,
1,4d6f95d3f283451492d0c17b24e557e2.jpg,"This fun, short dress features and asymmetric ...",Cotton,Statement,Dresses,Metallic,Mini,S,Black,Sandro,,,
2,8a8e1c4096ad46619d4781cf754b7a27.jpg,"This fun, short dress features and asymmetric ...",Cotton,Statement,Dresses,Metallic,Mini,S,Black,Sandro,,,
3,c5fbf81e1d4c40babef6d89e3fbcefd1.jpg,"This fun, short dress features and asymmetric ...",Cotton,Statement,Dresses,Metallic,Mini,S,Black,Sandro,,,
4,c684d164310c4f11b0e8341ba2687ab1.jpg,"This fun, short dress features and asymmetric ...",Cotton,Statement,Dresses,Metallic,Mini,S,Black,Sandro,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
50188,4d000a034f7c441b9be923450d7263d2.jpg,The Oldina Parka from Kari Traa is a women's p...,Synthetic,Everyday,Coats,,Midi,XS,Orange,Kari Traa,Winter,Women,
50189,5154c9ffde1441908749ee9c6a6fddc6.jpg,The Oldina Parka from Kari Traa is a women's p...,Synthetic,Everyday,Coats,,Midi,XS,Orange,Kari Traa,Winter,Women,
50190,9c821ecbecb14c959f35078010fb91f3.jpg,The Oldina Parka from Kari Traa is a women's p...,Synthetic,Everyday,Coats,,Midi,XS,Orange,Kari Traa,Winter,Women,
50191,a2b794c7ef83495a8997e7b0c318d65a.jpg,The FWSS Yugen Cardigan is a form-fitted cardi...,Wool,Business,Cardigans,,,M,Black,FWSS,Spring,Women,Maternity


In [187]:
df[df["description"].str.contains("print", case=False, na=False)]

Unnamed: 0,file_name,description,Material,Occasion,Category,Details,Length,Size,Color,Brand,Seasons,Gender,Fit
12,708012de16ba4d5cb0944b8b08b5acaf.jpg,Kaula from Rodebjer is a fitted dress made in ...,Synthetic,Everyday,Dresses,,Mini,M,Black,Rodebjer,Multi Season,Women,
13,813412f805444991a3a80614e864b2b9.jpg,Kaula from Rodebjer is a fitted dress made in ...,Synthetic,Everyday,Dresses,,Mini,M,Black,Rodebjer,Multi Season,Women,
14,8adfb24cea2b46deaa5e831960659782.jpg,Kaula from Rodebjer is a fitted dress made in ...,Synthetic,Everyday,Dresses,,Mini,M,Black,Rodebjer,Multi Season,Women,
15,fbdae7bcaaf84f169cd7995c3e508a16.jpg,Kaula from Rodebjer is a fitted dress made in ...,Synthetic,Everyday,Dresses,,Mini,M,Black,Rodebjer,Multi Season,Women,
110,37117e13c92f49bc91acde8985ed58e5.jpg,A gorgeous midi wrap dress from Gant in the mo...,Lyocell,Everyday,Dresses,Floral,Midi,S,Blue,Ganni,Summer,Women,Wrap
...,...,...,...,...,...,...,...,...,...,...,...,...,...
50081,dc179827af3449e7ac5ae4abbf31b89e.jpg,Tennessee from Rodebjer is a relaxed fitted bl...,Viscose,Dressed-up,Blouses,,,XS,Multicolor,Rodebjer,Multi Season,Women,
50132,768f82dce12a496ea9f5724cd44c03aa.jpg,The Printed Mesh Dress from Ganni is a colorfu...,Synthetic,Statement,Dresses,Pattern,Midi,M,Green,Ganni,Multi Season,Women,Stretchy
50133,ab25f009e4ae4664b5e2b0381422f85c.jpg,The Printed Mesh Dress from Ganni is a colorfu...,Synthetic,Statement,Dresses,Pattern,Midi,M,Green,Ganni,Multi Season,Women,Stretchy
50134,ccd3d84df03241fda702e0719bcce734.jpg,The Printed Mesh Dress from Ganni is a colorfu...,Synthetic,Statement,Dresses,Pattern,Midi,M,Green,Ganni,Multi Season,Women,Stretchy


In [188]:
df[df["description"].str.contains("logo", case=False, na=False)]

Unnamed: 0,file_name,description,Material,Occasion,Category,Details,Length,Size,Color,Brand,Seasons,Gender,Fit
54,307c1fe9b28b454cb0658e066d8e7760.jpg,Devy From Rodebjer is a faux leather fitted sh...,Faux Leather,Everyday,Shirts,,,XS,Brown,Rodebjer,Multi Season,Women,
55,7a10d2f8d4f14858bbd9ec7db0d7b21b.jpg,Devy From Rodebjer is a faux leather fitted sh...,Faux Leather,Everyday,Shirts,,,XS,Brown,Rodebjer,Multi Season,Women,
56,a5bded5baece4494a383f86bbf6689d5.jpg,Devy From Rodebjer is a faux leather fitted sh...,Faux Leather,Everyday,Shirts,,,XS,Brown,Rodebjer,Multi Season,Women,
57,cca59bb522cc4f4f956ecee6e6fe9a33.jpg,Devy From Rodebjer is a faux leather fitted sh...,Faux Leather,Everyday,Shirts,,,XS,Brown,Rodebjer,Multi Season,Women,
248,15a3f0425bd84b74a1ac8e0ab042b5b8.jpg,The lovely Sway Vest from Johaug is a long fle...,Synthetic,Active,Vests,,,S,Orange,Johaug,Winter,Women,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
49318,7f425ceb0283429fa09cd914c244a36b.jpg,Nomad is a lined jacket with patchwok details ...,Wool,Everyday,Jackets,,,L,Black,Rodebjer,Fall,Women,
49319,d95fd67ac8514b2cb3c25785597f7001.jpg,Nomad is a lined jacket with patchwok details ...,Wool,Everyday,Jackets,,,L,Black,Rodebjer,Fall,Women,
49320,e811ca445fce43418bcc0046bd80bf66.jpg,Nomad is a lined jacket with patchwok details ...,Wool,Everyday,Jackets,,,L,Black,Rodebjer,Fall,Women,
49468,8880e6bb27d24717a2f79fc5285b5fb3.jpg,The Tirill Down Jacket by Kari Traa for women ...,Synthetic,Everyday,Jackets,,,L,Red,Kari Traa,Winter,Women,


In [189]:
mask = (
    df["Details"].isna()
    & df["Color"].notna()
    & (df["Color"] != "Multicolor")
    & (df["Category"] != "Accessories")
    & (~df["description"].str.contains("print", case=False, na=False))
    & (~df["description"].str.contains("pattern", case=False, na=False))
    & (~df["description"].str.contains("stitching", case=False, na=False))
    & (~df["description"].str.contains("embroidery", case=False, na=False))
)

df.loc[mask, "Details"] = "Solid"

In [190]:
df.dropna(subset=["Color", "Category", "Details"])

Unnamed: 0,file_name,description,Material,Occasion,Category,Details,Length,Size,Color,Brand,Seasons,Gender,Fit
0,1b9e3cf3581d4205890c52f4eac29cd1.jpg,"This fun, short dress features and asymmetric ...",Cotton,Statement,Dresses,Metallic,Mini,S,Black,Sandro,,,
1,4d6f95d3f283451492d0c17b24e557e2.jpg,"This fun, short dress features and asymmetric ...",Cotton,Statement,Dresses,Metallic,Mini,S,Black,Sandro,,,
2,8a8e1c4096ad46619d4781cf754b7a27.jpg,"This fun, short dress features and asymmetric ...",Cotton,Statement,Dresses,Metallic,Mini,S,Black,Sandro,,,
3,c5fbf81e1d4c40babef6d89e3fbcefd1.jpg,"This fun, short dress features and asymmetric ...",Cotton,Statement,Dresses,Metallic,Mini,S,Black,Sandro,,,
4,c684d164310c4f11b0e8341ba2687ab1.jpg,"This fun, short dress features and asymmetric ...",Cotton,Statement,Dresses,Metallic,Mini,S,Black,Sandro,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
50188,4d000a034f7c441b9be923450d7263d2.jpg,The Oldina Parka from Kari Traa is a women's p...,Synthetic,Everyday,Coats,Solid,Midi,XS,Orange,Kari Traa,Winter,Women,
50189,5154c9ffde1441908749ee9c6a6fddc6.jpg,The Oldina Parka from Kari Traa is a women's p...,Synthetic,Everyday,Coats,Solid,Midi,XS,Orange,Kari Traa,Winter,Women,
50190,9c821ecbecb14c959f35078010fb91f3.jpg,The Oldina Parka from Kari Traa is a women's p...,Synthetic,Everyday,Coats,Solid,Midi,XS,Orange,Kari Traa,Winter,Women,
50191,a2b794c7ef83495a8997e7b0c318d65a.jpg,The FWSS Yugen Cardigan is a form-fitted cardi...,Wool,Business,Cardigans,Solid,,M,Black,FWSS,Spring,Women,Maternity


In [191]:
df = df[~df['Category'].isin(['Accessories', 'Jewelry', 'Bags'])]

In [192]:
df.groupby(["Details"]).size().reset_index(name='counts').sort_values(by="counts", ascending=False)

Unnamed: 0,Details,counts
8,Solid,24447
3,Floral,5346
5,Pattern,4087
6,Ruffles,2842
9,Stripes,1198
7,Sequins,1176
4,Metallic,804
2,Checkers,697
0,Animal print,382
1,Beaded,220


In [193]:
df.groupby(["Category"]).size().reset_index(name='counts').sort_values(by="counts", ascending=False)

Unnamed: 0,Category,counts
4,Dresses,22176
13,Tops,3833
14,Trousers,3090
11,Skirts,2895
5,Jackets,2401
7,Knitwear,1712
12,Sweaters,1616
0,Blazers,1342
8,Shirts,1271
3,Coats,1218


In [194]:
df.groupby(["Color"]).size().reset_index(name='counts').sort_values(by="counts", ascending=False)

Unnamed: 0,Color,counts
1,Black,8754
2,Blue,7026
13,White,4756
5,Green,4148
9,Pink,3748
0,Beige,2713
7,Multicolor,2269
11,Red,2056
14,Yellow,1928
6,Grey,1731


In [195]:
def exibir_imagens(df, idx_initial, amount, detail=None, category=None, color=None, img_per_line=5):
    # Filtrando as imagens desejadas
    dfp = df.copy()
    
    imagens_filtradas = df[df["Details"] == detail].iloc[idx_initial:idx_initial+amount]
    
    n_lines = math.ceil(len(imagens_filtradas) / img_per_line)
    
    fig, axes = plt.subplots(n_lines, img_per_line, figsize=(5 * img_per_line, 5 * n_lines))

    if n_lines == 1:
        axes = [axes]
    axes = axes.flatten() if isinstance(axes, np.ndarray) else [ax for linha in axes for ax in linha]

    # Iterando sobre as imagens e exibindo
    for ax, (_, file) in zip(axes, imagens_filtradas.iterrows()):
        filepath = f"./data/images/{file['file_name']}"
        img = cv.imread(filepath)

        if img is not None:
            ax.imshow(cv.cvtColor(img, cv.COLOR_BGR2RGB))
            ax.set_title(f"{file['Details']} | {file['Category']} | {file['Color']}\n{file['file_name']}")
        else:
            ax.set_title("Imagem não encontrada")

        ax.axis('off')

    # Desativando eixos vazios se houver menos imagens do que espaços
    for ax in axes[len(imagens_filtradas):]:
        ax.axis('off')

    plt.tight_layout()
    plt.show()

In [196]:
# exibir_imagens(df, 200, 220, "Solid")

In [197]:
df.loc[df['description'].str.contains("leopard | zebra", case=False, na=False), 'Details'] = 'Animal print'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[df['description'].str.contains("leopard | zebra", case=False, na=False), 'Details'] = 'Animal print'


In [199]:
df = df[~df['Details'].isin(['Ruffles', 'Metallic', 'Beaded', 'Sequins'])]

In [200]:
df.groupby(["Details"]).size().reset_index(name='counts').sort_values(by="counts", ascending=False)

Unnamed: 0,Details,counts
4,Solid,24447
2,Floral,5346
3,Pattern,4040
5,Stripes,1188
1,Checkers,697
0,Animal print,560


In [202]:
df.to_csv("initial_filtered_clothes.csv", index=False)

In [203]:
df

Unnamed: 0,file_name,description,Material,Occasion,Category,Details,Length,Size,Color,Brand,Seasons,Gender,Fit
7,4685852ed93c439a944ca8ccdd3d1c52.jpg,This beautiful blouse features an adjustable n...,Cotton,Everyday,Blouses,Pattern,,M,Yellow,ILAG,Summer,Women,
8,649ea4f38ffa47eb92556af7d3195ba4.jpg,This beautiful blouse features an adjustable n...,Cotton,Everyday,Blouses,Pattern,,M,Yellow,ILAG,Summer,Women,
9,c2d4bb2bf67e4490bdb1c82a9f5bead3.jpg,This beautiful blouse features an adjustable n...,Cotton,Everyday,Blouses,Pattern,,M,Yellow,ILAG,Summer,Women,
10,c975446e9479495dac2713529edc4230.jpg,This beautiful blouse features an adjustable n...,Cotton,Everyday,Blouses,Pattern,,M,Yellow,ILAG,Summer,Women,
11,fb47724315704dc980a27311bed834e2.jpg,This beautiful blouse features an adjustable n...,Cotton,Everyday,Blouses,Pattern,,M,Yellow,ILAG,Summer,Women,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
50188,4d000a034f7c441b9be923450d7263d2.jpg,The Oldina Parka from Kari Traa is a women's p...,Synthetic,Everyday,Coats,Solid,Midi,XS,Orange,Kari Traa,Winter,Women,
50189,5154c9ffde1441908749ee9c6a6fddc6.jpg,The Oldina Parka from Kari Traa is a women's p...,Synthetic,Everyday,Coats,Solid,Midi,XS,Orange,Kari Traa,Winter,Women,
50190,9c821ecbecb14c959f35078010fb91f3.jpg,The Oldina Parka from Kari Traa is a women's p...,Synthetic,Everyday,Coats,Solid,Midi,XS,Orange,Kari Traa,Winter,Women,
50191,a2b794c7ef83495a8997e7b0c318d65a.jpg,The FWSS Yugen Cardigan is a form-fitted cardi...,Wool,Business,Cardigans,Solid,,M,Black,FWSS,Spring,Women,Maternity
