# MODELOS PARA BUSQUEDA DE PRENDAS MÁS PARECIDAS A LA BÚSQUEDA

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
from sklearn.metrics import confusion_matrix
from sklearn.cluster import KMeans
import warnings


import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, Dense, Flatten, Dropout, GlobalMaxPooling2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.metrics import categorical_accuracy, top_k_categorical_accuracy
from tensorflow.keras.models import load_model
plt.style.use('ggplot')
warnings.filterwarnings("ignore")

Funciones para segmentación de color
==

In [2]:
def get_max_colors_(cluster, centroids):
    # Get the number of different clusters, create histogram, and normalize
    labels = np.arange(0, len(np.unique(cluster.labels_)) + 1)
    (hist, _) = np.histogram(cluster.labels_, bins = labels)
    hist = hist.astype("float")
    hist /= hist.sum()

    # Create frequency rect and iterate through each cluster's color and percentage
    rect = np.zeros((50, 300, 3), dtype=np.uint8)
    colors = sorted([(percent, color) for (percent, color) in zip(hist, centroids)])
    start = 0
    return max(colors)

def crop_cv_pct(img, pct=0.25):
    h = img.shape[0]
    w = img.shape[1]
    x = int(pct * h)
    y = int(pct * w)
    h0 = x
    h1 = h - x
    w0 = y
    w1 = w - y
    return img[h0:h1,w0:w1]

def cluster_color(img):
    reshape = img.reshape((img.shape[0] * img.shape[1], 3))
    cluster = KMeans(n_clusters=5).fit(reshape)
    color_img = get_max_colors_(cluster, cluster.cluster_centers_)[1]
    return color_img

## Se carga el modelo entrenado

In [4]:
base_model = tf.keras.models.load_model('saved_model/modelo_04_100_epochs_vgg16_modificado_2_sin_transfer_data_ampl.h5')

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [5]:
base_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 128, 128, 3)]     0         
_________________________________________________________________
conv2d (Conv2D)              (None, 128, 128, 128)     3584      
_________________________________________________________________
batch_normalization (BatchNo (None, 128, 128, 128)     512       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 128, 128, 128)     147584    
_________________________________________________________________
batch_normalization_1 (Batch (None, 128, 128, 128)     512       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 64, 64, 128)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 256)       295168

## Se obtiene modelo de generación de deep features

In [6]:
x = base_model.get_layer('dropout_2').output
feature_model = Model(inputs=base_model.input, outputs=x) #conexion del nuevo modelo

In [7]:
feature_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 128, 128, 3)]     0         
_________________________________________________________________
conv2d (Conv2D)              (None, 128, 128, 128)     3584      
_________________________________________________________________
batch_normalization (BatchNo (None, 128, 128, 128)     512       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 128, 128, 128)     147584    
_________________________________________________________________
batch_normalization_1 (Batch (None, 128, 128, 128)     512       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 64, 64, 128)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 256)       295168

## Se genera la transformación de las imagenes a vectores con deep features

Se crea un dataframe con cuatro campos creados con los campos de entrenamiento, simulando una pequeña base de datos. El primer campo corresponde a la ruta y nombre de la imagen original, el segundo campo corresponde a la categoria verdadera de la prenda, el tercer campo corresponde a un numero del 0 al 18 correspondiente a las 19 categorías de las prendas, y el ultimo campo corresponde a un vector de 1024 componentes, correspondiente a los vectores de Deep Features.

In [8]:
dict_cat ={'MUJER-SHORT': [0],
           'MUJER-CAMISA': [1],
           'MUJER-JOGGER': [2],
           'HOMBRE-PANTALON': [3],
           'MUJER-TOP_ML': [4],
           'HOMBRE-BUSO': [5],
           'MUJER-BUSO': [6],
           'HOMBRE-POLO': [7],
           'MUJER-TOP': [8],
           'HOMBRE-SHORT': [9],
           'MUJER-PANTALON': [10],
           'MUJER-VESTIDO': [11],
           'HOMBRE-CAMISA': [12],
           'HOMBRE-CHAQUETA': [13],
           'HOMBRE-CAMISETA': [14],
           'MUJER-CHAQUETA': [15],
           'HOMBRE-JOGGER': [16],
           'HOMBRE-TANK': [17],
           'MUJER-FALDA': [18]}

In [9]:
def cat_text2number(texto):
    dict_cat ={'MUJER-SHORT': [0],
           'MUJER-CAMISA': [1],
           'MUJER-JOGGER': [2],
           'HOMBRE-PANTALON': [3],
           'MUJER-TOP_ML': [4],
           'HOMBRE-BUSO': [5],
           'MUJER-BUSO': [6],
           'HOMBRE-POLO': [7],
           'MUJER-TOP': [8],
           'HOMBRE-SHORT': [9],
           'MUJER-PANTALON': [10],
           'MUJER-VESTIDO': [11],
           'HOMBRE-CAMISA': [12],
           'HOMBRE-CHAQUETA': [13],
           'HOMBRE-CAMISETA': [14],
           'MUJER-CHAQUETA': [15],
           'HOMBRE-JOGGER': [16],
           'HOMBRE-TANK': [17],
           'MUJER-FALDA': [18]}
    val = dict_cat[texto][0]
    return val

In [10]:
cat_text2number('HOMBRE-CAMISA')

12

In [11]:
cont = 0                                           # Contador de clases o carpetas 
path =[]                                           # Ruta carpeta/archivo
categoria_text = []                                # Clase o folder
nombre_prenda=[]                                   # Nombre archivo
path1=[]                                           # Ruta completa (directorio, folder, archivo)
categoria_num=[]                                   # Clase numerica entera

for folder in os.listdir("Input"):
    rootDir = os.path.join('Input',folder)
    cont += 1
    for file in os.listdir(rootDir):
        rootComplet = os.path.join(rootDir,file)
        rootFolder = os.path.join(folder,file)
        path.append(rootFolder)
        categoria_text.append(folder)
        nombre_prenda.append(file)
        path1.append(rootComplet)
        categoria_num.append(cat_text2number(folder))
        
        
DB_ropa = pd.DataFrame(list(zip(path, categoria_text, nombre_prenda, path1, categoria_num)),
                       columns=['path', 'categoria_text', 'nombre_prenda', 'path1', 'categoria_num'])
DB_ropa.head()

Unnamed: 0,path,categoria_text,nombre_prenda,path1,categoria_num
0,HOMBRE-BUSO\10270.jpg,HOMBRE-BUSO,10270.jpg,Input\HOMBRE-BUSO\10270.jpg,5
1,HOMBRE-BUSO\10271.jpg,HOMBRE-BUSO,10271.jpg,Input\HOMBRE-BUSO\10271.jpg,5
2,HOMBRE-BUSO\10272.jpg,HOMBRE-BUSO,10272.jpg,Input\HOMBRE-BUSO\10272.jpg,5
3,HOMBRE-BUSO\10273.jpg,HOMBRE-BUSO,10273.jpg,Input\HOMBRE-BUSO\10273.jpg,5
4,HOMBRE-BUSO\10510.jpg,HOMBRE-BUSO,10510.jpg,Input\HOMBRE-BUSO\10510.jpg,5


In [12]:
DB_ropa.tail()

Unnamed: 0,path,categoria_text,nombre_prenda,path1,categoria_num
17112,MUJER-VESTIDO\9625.jpg,MUJER-VESTIDO,9625.jpg,Input\MUJER-VESTIDO\9625.jpg,11
17113,MUJER-VESTIDO\9626.jpg,MUJER-VESTIDO,9626.jpg,Input\MUJER-VESTIDO\9626.jpg,11
17114,MUJER-VESTIDO\9627.jpg,MUJER-VESTIDO,9627.jpg,Input\MUJER-VESTIDO\9627.jpg,11
17115,MUJER-VESTIDO\9865.jpg,MUJER-VESTIDO,9865.jpg,Input\MUJER-VESTIDO\9865.jpg,11
17116,MUJER-VESTIDO\9905.jpg,MUJER-VESTIDO,9905.jpg,Input\MUJER-VESTIDO\9905.jpg,11


In [13]:
DB_ropa.head(2)

Unnamed: 0,path,categoria_text,nombre_prenda,path1,categoria_num
0,HOMBRE-BUSO\10270.jpg,HOMBRE-BUSO,10270.jpg,Input\HOMBRE-BUSO\10270.jpg,5
1,HOMBRE-BUSO\10271.jpg,HOMBRE-BUSO,10271.jpg,Input\HOMBRE-BUSO\10271.jpg,5


In [14]:
DB_ropa[DB_ropa.isnull().any(axis=1)]

Unnamed: 0,path,categoria_text,nombre_prenda,path1,categoria_num


In [15]:
DB_ropa.head(20)

Unnamed: 0,path,categoria_text,nombre_prenda,path1,categoria_num
0,HOMBRE-BUSO\10270.jpg,HOMBRE-BUSO,10270.jpg,Input\HOMBRE-BUSO\10270.jpg,5
1,HOMBRE-BUSO\10271.jpg,HOMBRE-BUSO,10271.jpg,Input\HOMBRE-BUSO\10271.jpg,5
2,HOMBRE-BUSO\10272.jpg,HOMBRE-BUSO,10272.jpg,Input\HOMBRE-BUSO\10272.jpg,5
3,HOMBRE-BUSO\10273.jpg,HOMBRE-BUSO,10273.jpg,Input\HOMBRE-BUSO\10273.jpg,5
4,HOMBRE-BUSO\10510.jpg,HOMBRE-BUSO,10510.jpg,Input\HOMBRE-BUSO\10510.jpg,5
5,HOMBRE-BUSO\10511.jpg,HOMBRE-BUSO,10511.jpg,Input\HOMBRE-BUSO\10511.jpg,5
6,HOMBRE-BUSO\10512.jpg,HOMBRE-BUSO,10512.jpg,Input\HOMBRE-BUSO\10512.jpg,5
7,HOMBRE-BUSO\10513.jpg,HOMBRE-BUSO,10513.jpg,Input\HOMBRE-BUSO\10513.jpg,5
8,HOMBRE-BUSO\10807.jpg,HOMBRE-BUSO,10807.jpg,Input\HOMBRE-BUSO\10807.jpg,5
9,HOMBRE-BUSO\10808.jpg,HOMBRE-BUSO,10808.jpg,Input\HOMBRE-BUSO\10808.jpg,5


In [16]:
DB_ropa.tail(20)

Unnamed: 0,path,categoria_text,nombre_prenda,path1,categoria_num
17097,MUJER-VESTIDO\9023.jpg,MUJER-VESTIDO,9023.jpg,Input\MUJER-VESTIDO\9023.jpg,11
17098,MUJER-VESTIDO\9024.jpg,MUJER-VESTIDO,9024.jpg,Input\MUJER-VESTIDO\9024.jpg,11
17099,MUJER-VESTIDO\9234.jpg,MUJER-VESTIDO,9234.jpg,Input\MUJER-VESTIDO\9234.jpg,11
17100,MUJER-VESTIDO\9235.jpg,MUJER-VESTIDO,9235.jpg,Input\MUJER-VESTIDO\9235.jpg,11
17101,MUJER-VESTIDO\9236.jpg,MUJER-VESTIDO,9236.jpg,Input\MUJER-VESTIDO\9236.jpg,11
17102,MUJER-VESTIDO\9237.jpg,MUJER-VESTIDO,9237.jpg,Input\MUJER-VESTIDO\9237.jpg,11
17103,MUJER-VESTIDO\9238.jpg,MUJER-VESTIDO,9238.jpg,Input\MUJER-VESTIDO\9238.jpg,11
17104,MUJER-VESTIDO\9348.jpg,MUJER-VESTIDO,9348.jpg,Input\MUJER-VESTIDO\9348.jpg,11
17105,MUJER-VESTIDO\9458.jpg,MUJER-VESTIDO,9458.jpg,Input\MUJER-VESTIDO\9458.jpg,11
17106,MUJER-VESTIDO\9459.jpg,MUJER-VESTIDO,9459.jpg,Input\MUJER-VESTIDO\9459.jpg,11


In [17]:
# Creacion de un df para mantener las categorias con sus clases numericas
categorias = DB_ropa[['categoria_text','categoria_num']]
categorias.head()

Unnamed: 0,categoria_text,categoria_num
0,HOMBRE-BUSO,5
1,HOMBRE-BUSO,5
2,HOMBRE-BUSO,5
3,HOMBRE-BUSO,5
4,HOMBRE-BUSO,5


In [18]:
# Se elimina los duplicados
categorias.drop_duplicates(keep='first', inplace=True)
categorias.reset_index(drop=True,inplace=True)
categorias

Unnamed: 0,categoria_text,categoria_num
0,HOMBRE-BUSO,5
1,HOMBRE-CAMISA,12
2,HOMBRE-CAMISETA,14
3,HOMBRE-CHAQUETA,13
4,HOMBRE-JOGGER,16
5,HOMBRE-PANTALON,3
6,HOMBRE-POLO,7
7,HOMBRE-SHORT,9
8,HOMBRE-TANK,17
9,MUJER-BUSO,6


In [19]:
dict_cat = categorias.groupby('categoria_text')['categoria_num'].apply(list).to_dict()
dict_cat

{'HOMBRE-BUSO': [5],
 'HOMBRE-CAMISA': [12],
 'HOMBRE-CAMISETA': [14],
 'HOMBRE-CHAQUETA': [13],
 'HOMBRE-JOGGER': [16],
 'HOMBRE-PANTALON': [3],
 'HOMBRE-POLO': [7],
 'HOMBRE-SHORT': [9],
 'HOMBRE-TANK': [17],
 'MUJER-BUSO': [6],
 'MUJER-CAMISA': [1],
 'MUJER-CHAQUETA': [15],
 'MUJER-FALDA': [18],
 'MUJER-JOGGER': [2],
 'MUJER-PANTALON': [10],
 'MUJER-SHORT': [0],
 'MUJER-TOP': [8],
 'MUJER-TOP_ML': [4],
 'MUJER-VESTIDO': [11]}

In [20]:
dict_cat['HOMBRE-BUSO']

[5]

In [21]:
cat_text2number('HOMBRE-BUSO')

5

## Add to Dataframe the columns with Features from latent state

In [22]:
IMG_S = 128

In [23]:
final_features = list()
path_list = list()
color_feature = list()
for i in range(len(DB_ropa)):
    try:
        imagex = DB_ropa.loc[i, "path1"]
        imagex = cv2.imread(DB_ropa.loc[i, "path1"], cv2.IMREAD_ANYCOLOR)
        imagex = cv2.resize(imagex , (IMG_S, IMG_S))
        imagex = cv2.cvtColor(imagex, cv2.COLOR_BGR2RGB)
        imagex = np.array(imagex)/255.
        img = crop_cv_pct(imagex.copy(), pct=0.33)
        img = cluster_color(img)
        img = np.expand_dims(img, axis=0)        

        imagex = np.expand_dims(imagex, axis=0)        
        feature_ropa_test = feature_model.predict(imagex)
        final_features.append(feature_ropa_test)
        path_list.append(DB_ropa.loc[i, "path1"])  
        color_feature.append(img)
    except:
        pass
   

In [24]:
Final_DB = pd.DataFrame(list(zip(path_list, final_features,color_feature)), 
               columns =['path', 'feature','color']) 

In [25]:
Final_DB.shape

(16802, 3)

In [26]:
Final_DB.head()

Unnamed: 0,path,feature,color
0,Input\HOMBRE-BUSO\10270.jpg,"[[0.0, 1.4480228, 0.0, 0.0, 0.018729024, 0.0, ...","[[0.09663999251251981, 0.09233001076325714, 0...."
1,Input\HOMBRE-BUSO\10271.jpg,"[[0.0, 1.3410525, 0.0, 0.0, 0.09569714, 0.0, 0...","[[0.9849683952528372, 0.9833075335397333, 0.98..."
2,Input\HOMBRE-BUSO\10272.jpg,"[[0.0, 1.5886359, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...","[[0.02617081920487495, 0.025984077841661887, 0..."
3,Input\HOMBRE-BUSO\10273.jpg,"[[0.4330516, 1.1938989, 0.0, 0.0, 0.0, 0.0, 0....","[[0.051571250955297625, 0.05334154604250103, 0..."
4,Input\HOMBRE-BUSO\10510.jpg,"[[0.035746604, 1.0029361, 0.9901881, 0.0, 0.0,...","[[0.11913157563667087, 0.11574558329289567, 0...."


In [28]:
#crear columna con categorias en el dataframe
Final_DB['categoria_text'] = Final_DB['path'].str.split("\\",expand = True)[1]
Final_DB['categoria_num'] = Final_DB['categoria_text'].apply(cat_text2number)

In [29]:
Final_DB.head()

Unnamed: 0,path,feature,color,categoria_text,categoria_num
0,Input\HOMBRE-BUSO\10270.jpg,"[[0.0, 1.4480228, 0.0, 0.0, 0.018729024, 0.0, ...","[[0.09663999251251981, 0.09233001076325714, 0....",HOMBRE-BUSO,5
1,Input\HOMBRE-BUSO\10271.jpg,"[[0.0, 1.3410525, 0.0, 0.0, 0.09569714, 0.0, 0...","[[0.9849683952528372, 0.9833075335397333, 0.98...",HOMBRE-BUSO,5
2,Input\HOMBRE-BUSO\10272.jpg,"[[0.0, 1.5886359, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...","[[0.02617081920487495, 0.025984077841661887, 0...",HOMBRE-BUSO,5
3,Input\HOMBRE-BUSO\10273.jpg,"[[0.4330516, 1.1938989, 0.0, 0.0, 0.0, 0.0, 0....","[[0.051571250955297625, 0.05334154604250103, 0...",HOMBRE-BUSO,5
4,Input\HOMBRE-BUSO\10510.jpg,"[[0.035746604, 1.0029361, 0.9901881, 0.0, 0.0,...","[[0.11913157563667087, 0.11574558329289567, 0....",HOMBRE-BUSO,5


In [30]:
Final_DB.color[0].shape

(1, 3)

In [31]:
Final_DB.feature[0].shape

(1, 1024)

In [32]:
Final_DB.to_pickle('./DB/DB_prendas_features.pkl')    #se guarda la base de datos de prendas y features
#df1 = pd.read_pickle('./DB/DB_prendas_features.pkl') #se carga la base de datos de prendas y features

In [33]:
Final_DB[Final_DB['categoria_num'] ==14]

Unnamed: 0,path,feature,color,categoria_text,categoria_num
898,Input\HOMBRE-CAMISETA\10067.jpg,"[[0.5282475, 0.4532714, 1.3763256, 0.0, 0.0, 0...","[[0.12103037293348659, 0.10890217049386533, 0....",HOMBRE-CAMISETA,14
899,Input\HOMBRE-CAMISETA\10068.jpg,"[[0.27601665, 0.30286002, 1.3191558, 0.0, 0.0,...","[[0.10985174557627961, 0.10339151920930936, 0....",HOMBRE-CAMISETA,14
900,Input\HOMBRE-CAMISETA\10178.jpg,"[[0.019437045, 0.5984186, 1.0574868, 0.0, 0.0,...","[[0.07419646606583008, 0.09584585069612372, 0....",HOMBRE-CAMISETA,14
901,Input\HOMBRE-CAMISETA\10179.jpg,"[[0.090370655, 0.37720108, 1.0841547, 0.0, 0.0...","[[0.08551668322112926, 0.07619100172902193, 0....",HOMBRE-CAMISETA,14
902,Input\HOMBRE-CAMISETA\10180.jpg,"[[0.49229363, 0.16864061, 1.4884001, 0.0, 0.0,...","[[0.18956042547738125, 0.17338459566833334, 0....",HOMBRE-CAMISETA,14
...,...,...,...,...,...
5102,Input\HOMBRE-CAMISETA\9696.jpg,"[[0.44363776, 0.25186893, 0.86093795, 0.0, 0.0...","[[0.3849743481622042, 0.17744746644177406, 0.1...",HOMBRE-CAMISETA,14
5103,Input\HOMBRE-CAMISETA\9697.jpg,"[[0.9348272, 0.61874026, 1.6063149, 0.0, 0.0, ...","[[0.8845890121767909, 0.8796811403286547, 0.87...",HOMBRE-CAMISETA,14
5104,Input\HOMBRE-CAMISETA\9698.jpg,"[[0.43468317, 0.4232099, 1.1460156, 0.0, 0.0, ...","[[0.0748532312333206, 0.07344703561898214, 0.0...",HOMBRE-CAMISETA,14
5105,Input\HOMBRE-CAMISETA\9699.jpg,"[[0.41736963, 0.39867377, 1.4437891, 0.0, 0.0,...","[[0.9279317697228138, 0.9158326016973963, 0.92...",HOMBRE-CAMISETA,14
