In [1]:
import pandas as pd
import numpy as np
import config
import tensorflow as tf

import matplotlib.pyplot as plt
import seaborn as sns

import keras
from keras.models import Model, load_model, save_model
from keras.applications import DenseNet201, densenet
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Lambda

from pathlib import Path

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def build_model():
    bb_model = DenseNet201(include_top=False, pooling='avg', input_shape=(224,224,3))
    l2norm = Lambda(lambda x: tf.nn.l2_normalize(x, axis=1))(bb_model.output)
    l2_model = Model(inputs=[bb_model.input], outputs=l2norm)
    l2_model.summary()
    return l2_model

In [3]:
batch_size = 32
datagen = ImageDataGenerator(preprocessing_function=densenet.preprocess_input)
generator = datagen.flow_from_directory( "data/",
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)

Found 3026 images belonging to 1 classes.


In [4]:
model = build_model()
num_samples = generator.samples
num_batches = np.ceil(num_samples / batch_size)
embeddings = model.predict_generator(generator, num_batches, verbose=1)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 112, 112, 64) 9408        zero_padding2d_1[0][0]           
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1/conv[0][0]                 
__________________________________________________________________________________________________
conv1/relu



In [5]:
embeddings.shape

(3026, 1920)

In [6]:
from keras.layers import Dense, Dropout, Input

In [7]:
files = generator.filenames

In [21]:
def build_ae_model(inp_dim=1920, hidden_dim=128):
    input_img = Input(shape=(inp_dim,))
    drop = Dropout(0.4)(input_img)
    h1 = Dense(256, activation='relu', name='h1')(drop)
    encoded = Dense(hidden_dim, activation='relu', name='encoder')(h1)
    d1 = Dense(256, activation='relu', name='h2')(encoded)
    decoded = Dense(inp_dim, activation='relu', name='decoder')(d1)
    autoencoder = Model(input_img, decoded)
    autoencoder.compile(optimizer='adam', loss='mean_squared_error')
    return autoencoder

In [22]:
def build_encoder(ae_model):
    model = Model(ae_model.input, ae_model.get_layer('encoder').output)
    return model

In [23]:
ae_model = build_ae_model(hidden_dim=32)
ae_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 1920)              0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 1920)              0         
_________________________________________________________________
h1 (Dense)                   (None, 256)               491776    
_________________________________________________________________
encoder (Dense)              (None, 32)                8224      
_________________________________________________________________
h2 (Dense)                   (None, 256)               8448      
_________________________________________________________________
decoder (Dense)              (None, 1920)              493440    
Total params: 1,001,888
Trainable params: 1,001,888
Non-trainable params: 0
_________________________________________________________________


In [24]:
ae_model.fit(embeddings, embeddings, batch_size=128, epochs=300, shuffle=True)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

Epoch 93/300
Epoch 94/300
Epoch 95/300
Epoch 96/300
Epoch 97/300
Epoch 98/300
Epoch 99/300
Epoch 100/300
Epoch 101/300
Epoch 102/300
Epoch 103/300
Epoch 104/300
Epoch 105/300
Epoch 106/300
Epoch 107/300
Epoch 108/300
Epoch 109/300
Epoch 110/300
Epoch 111/300
Epoch 112/300
Epoch 113/300
Epoch 114/300
Epoch 115/300
Epoch 116/300
Epoch 117/300
Epoch 118/300
Epoch 119/300
Epoch 120/300
Epoch 121/300
Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300

Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 194/300
Epoch 195/300
Epoch 196/300
Epoch 197/300
Epoch 198/300
Epoch 199/300
Epoch 200/300
Epoch 201/300
Epoch 202/300
Epoch 203/300
Epoch 204/300
Epoch 205/300
Epoch 206/300
Epoch 207/300
Epoch 208/300
Epoch 209/300
Epoch 210/300
Epoch 211/300
Epoch 212/300
Epoch 213/300
Epoch 214/300
Epoch 215/300
Epoch 216/300
Epoch 217/300
Epoch 218/300
Epoch 219/300
Epoch 220/300
Epoch 221/300
Epoch 222/300
Epoch 223/300
Epoch 224/300
Epoch 225/300
Epoch 226/300
Epoch 227/300
Epoch 228/300
Epoch 229/300
Epoch 230/300
Epoch 231/300
Epoch 232/300
Epoch 233/300
Epoch 234/300
Epoch 235/300
Epoch 236/300
Epoch 237/300
Epoch 238/300
Epoch 239/300
Epoch 240/300
Epoch 241/300
Epoch 242/300
Epoch 243/300
Epoch 244/300
Epoch 245/300
Epoch 246/300
Epoch 247/300
Epoch 248/300
Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 

Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300
Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


<keras.callbacks.History at 0x7f0ae55b5be0>

In [25]:
ae_encoder = build_encoder(ae_model)

In [26]:
ae_embeds = ae_encoder.predict(embeddings)

In [27]:
ae_embeds.shape

(3026, 32)

In [28]:
# Check embeds

In [29]:
productids = [int(Path(f).stem) for f in files]
productids

[11139192,
 11139194,
 11139524,
 11139560,
 11139588,
 11139650,
 11141306,
 11141308,
 11141318,
 11141320,
 11141324,
 11141326,
 11141328,
 11141330,
 11141338,
 11141340,
 11141342,
 11141346,
 11141354,
 11141530,
 11141538,
 11141644,
 11144136,
 11144260,
 11145600,
 11145602,
 11145612,
 11145614,
 11145620,
 11145624,
 11145626,
 11145634,
 11145640,
 11145642,
 11145654,
 11145664,
 11145666,
 11145684,
 11145702,
 11145726,
 11145728,
 11145740,
 11145744,
 11145748,
 11145762,
 11146082,
 11146084,
 11146384,
 11146754,
 11147250,
 11147252,
 11147258,
 11147268,
 11147280,
 11147282,
 11147290,
 11147292,
 11147300,
 11147312,
 11147324,
 11147512,
 11147534,
 11147546,
 11147548,
 11147600,
 11147622,
 11148212,
 11148440,
 11148482,
 11148514,
 11148516,
 11148792,
 11148854,
 11148860,
 11148888,
 11148902,
 11148920,
 11148922,
 11148954,
 11148964,
 11148966,
 11148980,
 11148982,
 11148984,
 11149026,
 11149036,
 11149062,
 11149198,
 11149838,
 11149842,
 11149866,

In [30]:
em_dict = {}
for pid, em in zip(productids, ae_embeds):
    em_dict[pid] = em.flatten()

In [31]:
import pickle
with open( "outputs/pid_em.pkl", "wb") as f:
    pickle.dump(em_dict, f)

In [32]:
len(em_dict)

3026

In [33]:
em_dict[11139192]

array([0.        , 0.        , 0.4325103 , 0.        , 0.46405852,
       0.        , 0.38293362, 0.        , 0.24399802, 0.33453616,
       0.        , 0.        , 0.        , 0.32402   , 0.        ,
       0.        , 0.        , 0.14970759, 0.24812868, 0.5148265 ,
       0.18516803, 0.        , 0.26886356, 0.        , 0.09424812,
       0.12164172, 0.2286085 , 0.        , 0.        , 0.2314678 ,
       0.        , 0.        ], dtype=float32)