In [1]:
import os
import numpy as np

from keras.preprocessing import sequence
import keras.models as km
import keras.layers as kl
import keras.constraints as kc
from keras.datasets import imdb
from keras import losses
from keras.models import load_model
import keras

from keras.utils.vis_utils import model_to_dot

import matplotlib.pyplot as plt
%matplotlib inline

from IPython.display import SVG

import ipywidgets as ipw

import data_preparation as mdp
from data_preparation import MercariConfig

Using TensorFlow backend.


In [2]:
%%time

X_name_seq_train, X_item_desc_seq_train, x_cat_train, x_brand_train, x_f_train, y_train = mdp.get_data_for_training(
    MercariConfig.TRAINING_SET_PREP_FILE, 
    MercariConfig.TRAINING_NAME_INDEX_FILE,
    MercariConfig.TRAINING_ITEM_DESC_INDEX_FILE,
    MercariConfig.MAX_WORDS_IN_NAME,
    MercariConfig.MAX_WORDS_IN_ITEM_DESC)

2018-01-30 11:00:09,685 - MerL.data_preparation - INFO - Loading index_sequence_data from mercari_train_name_index.csv ...
2018-01-30 11:00:10,051 - MerL.data_preparation - INFO - Loading index_sequence_data from mercari_train_name_index.csv done.
2018-01-30 11:00:10,051 - MerL.data_preparation - INFO - Loading index_sequence_data from mercari_train_item_desc_index.csv ...
2018-01-30 11:00:16,888 - MerL.data_preparation - INFO - Loading index_sequence_data from mercari_train_item_desc_index.csv done.


CPU times: user 7.08 s, sys: 1.05 s, total: 8.13 s
Wall time: 8.13 s


In [3]:
print(X_name_seq_train.shape, X_item_desc_seq_train.shape, 
      x_cat_train.shape, x_brand_train.shape, x_f_train.shape, y_train.shape)

(237205, 21) (237205, 301) (237205,) (237205,) (237205, 2) (237205,)


In [4]:
%%time

X_name_seq_val, X_item_desc_seq_val, x_cat_val, x_brand_val, x_f_val, y_val = mdp.get_data_for_training(
    MercariConfig.VALIDATION_SET_PREP_FILE, 
    MercariConfig.VALIDATION_NAME_INDEX_FILE,
    MercariConfig.VALIDATION_ITEM_DESC_INDEX_FILE,
    MercariConfig.MAX_WORDS_IN_NAME,
    MercariConfig.MAX_WORDS_IN_ITEM_DESC)

2018-01-30 11:00:20,297 - MerL.data_preparation - INFO - Loading index_sequence_data from mercari_val_name_index.csv ...
2018-01-30 11:00:20,346 - MerL.data_preparation - INFO - Loading index_sequence_data from mercari_val_name_index.csv done.
2018-01-30 11:00:20,347 - MerL.data_preparation - INFO - Loading index_sequence_data from mercari_val_item_desc_index.csv ...
2018-01-30 11:00:21,145 - MerL.data_preparation - INFO - Loading index_sequence_data from mercari_val_item_desc_index.csv done.


CPU times: user 960 ms, sys: 8 ms, total: 968 ms
Wall time: 963 ms


In [5]:
print(X_name_seq_val.shape,X_item_desc_seq_val.shape, 
      x_cat_val.shape, x_brand_val.shape, x_f_val.shape, y_val.shape)

(29651, 21) (29651, 301) (29651,) (29651,) (29651, 2) (29651,)


In [6]:
num_words_item_desc = MercariConfig.MAX_WORDS_FROM_INDEX_4_ITEM_DESC + MercariConfig.WORD_I
max_seq_len_item_desc = MercariConfig.MAX_WORDS_IN_ITEM_DESC + 1 # Remember: first word is always <START>

num_words_name = MercariConfig.MAX_WORDS_FROM_INDEX_4_NAME + MercariConfig.WORD_I
max_seq_len_name = MercariConfig.MAX_WORDS_IN_NAME + 1 # Remember: first word is always <START>

X_name_seq_train, X_item_desc_seq_train = mdp.pad_sequences(X_name_seq=X_name_seq_train, 
                                                        X_item_desc_seq=X_item_desc_seq_train, 
                                                        max_seq_len_name=max_seq_len_name,
                                                        max_seq_len_item_desc=max_seq_len_item_desc)

X_name_seq_val, X_item_desc_seq_val = mdp.pad_sequences(X_name_seq=X_name_seq_val, 
                                                        X_item_desc_seq=X_item_desc_seq_val, 
                                                        max_seq_len_name=max_seq_len_name,
                                                        max_seq_len_item_desc=max_seq_len_item_desc)

In [7]:
word_embedding_dims = 32
cat_embedding_dims = 10

num_categories = 1098
num_brands = 2767


model = mdp.build_keras_model(word_embedding_dims=word_embedding_dims, 
                      num_words_name=num_words_name, max_seq_len_name=max_seq_len_name, 
                      num_words_item_desc=num_words_item_desc, max_seq_len_item_desc=max_seq_len_item_desc,
                      cat_embedding_dims=cat_embedding_dims,
                      num_categories=num_categories, num_brands=num_brands)

In [8]:
#SVG(model_to_dot(model).create(prog='dot', format='svg'))

In [9]:
#model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
name_input (InputLayer)         (None, 21)           0                                            
__________________________________________________________________________________________________
item_desc_input (InputLayer)    (None, 301)          0                                            
__________________________________________________________________________________________________
name_embedding (Embedding)      (None, 21, 32)       992256      name_input[0][0]                 
__________________________________________________________________________________________________
item_desc_embedding (Embedding) (None, 301, 32)      1280256     item_desc_input[0][0]            
__________________________________________________________________________________________________
name_embed

In [10]:
#model = mdp.load_keras_model('merl_model-v3_10.h5')

In [None]:
tf_log_dir = MercariConfig.get_new_tf_log_dir()

batch_size = None

tb_callback = keras.callbacks.TensorBoard(log_dir=tf_log_dir, histogram_freq=0, batch_size=batch_size, 
                            write_graph=True, write_grads=False, write_images=False, 
                            embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)

#reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
#                              patience=3, min_lr=0.001)

history_simple = model.fit(
    [x_f_train, x_cat_train, x_brand_train, X_name_seq_train, X_item_desc_seq_train], y_train,
    [x_f_val, x_cat_val, x_brand_val, X_name_seq_val, X_item_desc_seq_val], y_val,
    batch_size=None,
    epochs=1,
    verbose=1,
#    callbacks=[tb_callback],
    shuffle=True,
    initial_epoch=0,
    steps_per_epoch=None,
    validation_data=[[x_f_val, x_cat_val, x_brand_val, X_name_seq_val, X_item_desc_seq_val], y_val])