In [27]:
import time
import datetime
import numpy as np
import pandas as pd
import os

In [28]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [29]:
print(time.strftime("%Y-%m-%d %H:%M:%S"))

2018-05-01 03:07:33


In [30]:
class Timer():
    def __init__(self):
        self.info = 'main'
        self.start_time = time.time()
    
    def start(self, info):
        self.info = info
        self.start_time = time.time()
        self.checkpoint('start', elapsed_on=False)
    
    def end(self):
        self.checkpoint(' end ')
        
    def checkpoint(self, tag, elapsed_on=True):
        if elapsed_on:
            elapsed = datetime.timedelta(seconds=round(time.time() - self.start_time))
            expanded_info = self.info + ' [time elapsed: %s]' % str(elapsed)
        else:
            expanded_info = self.info
        self.output(tag, info=expanded_info)
        
    def output(self, tag=' '*5, info=''):
        if type(info) != type(''):
            info = str(info)
        print('[%s] (%s) %s' % (Timer.get_current_time(), tag, info))
    
    @staticmethod
    def get_current_time():
        return time.strftime("%Y-%m-%d %H:%M:%S")

timer = Timer()
sub_timer = Timer()

# Load Data

In [31]:
timer.start('Load Data')
# directory = '../data/split/'
# df_train = pd.read_csv(directory + 'train.csv')
# df_test_warm = pd.read_csv(directory + 'test_warm.csv')
# df_test_cold_user = pd.read_csv(directory + 'test_cold_user.csv')
# df_test_cold_item = pd.read_csv(directory + 'test_cold_item.csv')

[2018-05-01 03:07:33] (start) Load Data


In [32]:
directory = '../data/context/'
df_event_context = pd.read_csv(directory + 'event_context.csv')
df_song_context = pd.read_csv(directory + 'song_context.csv')
df_user_context = pd.read_csv(directory + 'user_context.csv')
timer.checkpoint('context')

[2018-05-01 03:07:40] (context) Load Data [time elapsed: 0:00:07]


In [33]:
num_user = len(df_user_context.user_id.unique())
num_item = len(df_song_context.song_id.unique())
print (num_user)
print (num_item)

30755
359966


In [34]:
# # load target sets
# import pickle
# with open('../data/split/target_set.pickle', 'rb') as handle:
#     target_set = pickle.load(handle)
# with open('../data/split/train_target_set.pickle', 'rb') as handle:
#     train_target_set = pickle.load(handle)
# with open('../data/split/test_warm_target_set.pickle', 'rb') as handle:
#     test_warm_target_set = pickle.load(handle)
# with open('../data/split/test_cold_user_target_set.pickle', 'rb') as handle:
#     test_cold_user_target_set = pickle.load(handle)
# with open('../data/split/test_cold_item_target_set.pickle', 'rb') as handle:
#     test_cold_item_target_set = pickle.load(handle)

In [35]:
class Data():
    def __init__(self, name):
        '''
        user_list: list(int), the list of user id's used in the dataset
        target_set: list(set), set of target items for each user
        item_list: list(numpy array), list of items used in the dataset for each user
        '''
        self.name = name
        self.df = None
        self.user_list = None
        self.item_list = None
        self.target_set = None
    
    def load(self, filename):
        self.df = pd.read_csv(filename)
        # prepare user list
        self.user_list = self.df['user_id'].unique()
        
        # prepare item list
        self.item_list = [[] for i in range(num_user)]
        self.df.apply(
            lambda row: self.item_list[row['user_id']].append(row['song_id']),
            axis=1
        )
        self.item_list = list(map(np.array, self.item_list))
        
        # prepare target set
        self.target_set = [set() for i in range(num_user)]
        self.df[self.df['target'] == 1].apply(
            lambda row: self.target_set[row['user_id']].add(row['song_id']),
            axis=1
        )
        
# def load_split(name):
#     data = Data(name)
#     # load the user ids in the data set
#     with open('../data/split/' + name + '_user_list.pickle', 'rb') as handle:
#         data.user_list = pickle.load(handle)
        
#     # load the list(set) for items in the data set with label=1
#     with open('../data/split/' + name + '_target_set.pickle', 'rb') as handle:
#         data.target_set = pickle.load(handle)
        
#     # load the list(set) for all items in the data set
#     with open('../data/split/' + name + '_item_set.pickle', 'rb') as handle:
#         data.item_set = pickle.load(handle)
        
#     return data

def load_split(name):
    directory = '../data/split/'
    data = Data(name)
    data.load(directory + name + '.csv')
    return data

In [36]:
# data_train = load_split('train')
# data_test_warm = load_split('test_warm')
# data_test_cold_user = load_split('test_cold_user')
# data_test_cold_item = load_split('test_cold_item')

data_train = load_split('train')
data_test_warm = load_split('test_warm')
data_test_cold_user = load_split('test_cold_user')
data_test_cold_item = load_split('test_cold_item')
timer.end()

[2018-05-01 03:11:04] ( end ) Load Data [time elapsed: 0:03:30]


In [37]:
import keras
from keras.models import Model
from keras.layers import Dense, Input, Embedding, Dropout, Activation, Reshape, Lambda
from keras.layers.merge import concatenate, dot
from keras.layers.normalization import BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.regularizers import l2
from keras.initializers import RandomUniform, RandomNormal, TruncatedNormal, Zeros
from keras.optimizers import RMSprop, Adam, SGD
from sklearn.metrics import mean_squared_error
import tensorflow as tf

In [38]:
REG_LAMBDA = 0
EMBED_DIM = 64

vocab_size = num_user
user_embeddings = Embedding(
    input_dim = vocab_size,
    output_dim = EMBED_DIM,
    embeddings_initializer = RandomUniform(minval=-0.1, maxval=0.1),
    embeddings_regularizer = l2(REG_LAMBDA),
    input_length = 1,
    name = 'user_embed',
    trainable=True)

vocab_size = num_item
item_embeddings = Embedding(
    input_dim = vocab_size,
    output_dim = EMBED_DIM,
    embeddings_initializer = RandomUniform(minval=-0.1, maxval=0.1),
    embeddings_regularizer=l2(REG_LAMBDA),
    input_length=1,
    name = 'item_embed',
    trainable=True)

# embedding of user id
uid_input = Input(shape=(1,), dtype='int32')
embedded_user = user_embeddings(uid_input)
embedded_user = Reshape((EMBED_DIM,))(embedded_user)

# embedding of song id
iid_input = Input(shape=(1,), dtype='int32')
embedded_item = item_embeddings(iid_input)
embedded_item = Reshape((EMBED_DIM,))(embedded_item)

# dot production of embedded vectors
preds = dot([embedded_user, embedded_item], axes=1, name='dot_score')

# embedding model
user_embed_model = Model(inputs=uid_input, outputs=embedded_user)
item_embed_model = Model(inputs=iid_input, outputs=embedded_item)

model_MF = Model(inputs=[uid_input, iid_input], outputs=preds)
model_MF.compile(
    loss=keras.losses.mean_squared_error, 
    optimizer=RMSprop(lr=1e-3),
#     optimizer=SGD(lr=1e-4),
    metrics=[keras.metrics.mean_squared_error])

In [39]:
model_directory = '../model/mf/'
if not os.path.exists(model_directory):
    os.makedirs(model_directory)
model_path = model_directory + 'mf_model.h5'

In [40]:
dropout_model_path = '../model/dropout/model.h5'

In [41]:
# load the best model
model_MF.load_weights(model_path)

In [42]:
def single_top_k(score_list, k):
    ind = np.argpartition(score_list, -k)[-k:]
    top_k_ind = list(reversed(ind[np.argsort(score_list[ind])]))
    return np.array(top_k_ind)

# try to implement a two-dimensional top_k
def two_dim_top_k(a, k):
    return np.array([single_top_k(row, k) for row in a])

def top_k(a, k):
    if len(a.shape) == 1:
        return single_top_k(a, k)
    elif len(a.shape) == 2:
        return two_dim_top_k(a, k)
    else:
        return None

In [43]:
user = 1
item_list = np.array([1,2,3,4])
v_user = user_embed_model.predict(np.array([1, 2]))
v_item = item_embed_model.predict(item_list)

print(v_user.shape)

#_x = v_user @ v_item.T #require python 3.5
_x = np.matmul(v_user, v_item.T)
print(_x)

print(model_MF.predict([
    np.repeat(user, len(item_list)),
    item_list
]).flatten())

(2, 64)
[[  2.59375989e-01  -3.29650007e-02   4.17865440e-02   1.46565601e-01]
 [ -6.12905342e-03   2.17780974e-02  -5.20521775e-03  -2.27784272e-04]]
[ 0.25937599 -0.032965    0.04178654  0.1465656 ]


In [44]:
print(len(data_test_warm.item_list[1]), len(data_test_warm.target_set[1]))

(41, 19)


In [46]:
# recall at k
sess = tf.Session()
v_user_all = user_embed_model.predict(np.arange(num_user))
v_item_all = item_embed_model.predict(np.arange(num_item))
    
def __recall(klist, target, recommend_list):
    den = len(target) # denominator
    recall_value = 0.0
    recall_list = []
    for k in klist:
        if den < k:
            recall_value = 1.0
        if recall_value == 1.0: # if it's already 1.0, it should be 1.0 after
            recall_list.append(recall_value)
            continue
        recommend_set = set(recommend_list[:k])
        num = len(target & recommend_set)
        recall_value = float(num) / float(den)
        recall_list.append(recall_value)
    return recall_list


def recall_mf(model, klist, data):
    '''
    :param klist: the list of k's in recall@k, e.g. [50, 100, 150, ...]
    :param data: data set for evaluation
        - user_list
        - target_set
        - item_set
    :return: list(float) for recall at each k, with the same size as klist
    '''
    recall_at_k = []
    max_k = max(klist)
    t1, t2, t3, t4, t5 = 0, 0, 0, 0, 0
    for user in data.user_list:
        # get the corresponding embedded vectors
        v_user = v_user_all[user]
        v_item = v_item_all[data.item_list[user]]
        
        # compute the scores
        #score_list = v_user @ v_item.T
        score_list = np.matmul(v_user, v_item.T)
        score_list = score_list.flatten()
        # assert len(score_list) == len(data.item_list[user])
        
        k = min(max_k, len(data.item_list[user]))
        # get the recommended list
        indices = top_k(score_list, k)
        recommend_list = data.item_list[user][indices]
        
        # evaluate recall
        recall_at_k.append(__recall(klist, data.target_set[user], recommend_list))
    return np.mean(recall_at_k, axis=0)


def recall_random(klist, data):
    recall_at_k = []
    max_k = max(klist)
    for i, user in enumerate(data.user_list):
        # compute the scores
        score_list = np.random.uniform(low=0, high=1, size=len(data.item_list[user]))
        
        k = min(max_k, len(data.item_list[user]))
        indices = top_k(score_list, k)
        recommend_list = data.item_list[user][indices]
        
        # evaluate recall
        recall_at_k.append(__recall(klist, data.target_set[user], recommend_list))
    return np.mean(recall_at_k, axis=0)

The following code will be quite slow: each user takes around 4s to run.

```
score_list = model_MF.predict([
    np.repeat(user, len(item_list)),
    item_list
]).flatten()
```

**It's because fetching embeddings are slow.** So get the embedding first and use matrix multiplication!

After modification, it will still take > 30 mintues (I don't know how long...)

### Note!!

It's **far more** better to get more from the model at one time rather than calling the model multiple times!

The difference between warm and cold is due to the difference in denominator (maybe), so don't compare them with each other.

## TODO: Dropout Net

### Initialize the model

In [47]:
user_CATEGORICAL = [
    'city', 'gender', 'registered_via', 'registration_year', 
    'registration_month', 'registration_day', 'expiration_year', 
    'expiration_month', 'expiration_day']
user_NUMERICAL = ['age', 'weird_age', 'validate_days']
set(df_user_context.columns) - (set(user_CATEGORICAL).union(set(user_NUMERICAL))), \
set(user_CATEGORICAL).intersection(set(user_NUMERICAL))

({'Unnamed: 0', 'user_id'}, set())

In [48]:
item_CATEGORICAL = [
    'artist_name', 'composer', 'genre_ids', 'language', 
    'lyricist', 'song_year']
item_NUMERICAL = [
    'song_length', 'genre_count', 'lyricist_count',
    'composer_count', 'artist_count', 'is_featured',
    'artist_composer', 'artist_composer_lyricist', 
    'song_lang_boolean', 'smaller_song']
set(df_song_context.columns) - (set(item_CATEGORICAL).union(set(item_NUMERICAL))), \
set(item_CATEGORICAL).intersection(set(item_NUMERICAL))

({'Unnamed: 0', 'song_id'}, set())

In [49]:
def evaluate_cold(x, recall_at):
    embedding_prod_cold = tf.matual(x[0], x[1], transpose_b = True, name='pred_all_items')
    _, eval_preds_cold = tf.nn.top_k(embedding_prod_cold, k=recall_at[-1], sorted=True, name='topK_net_cold')
    return eval_preds_cold

def evaluate_warm(x, recall_at):
    embedding_prod_cold = tf.matual(x[0], x[1], transpose_b = True)
    embedding_prod_warm = tf.sparse_add(embedding_prod_cold, x[2])
    _, eval_preds_warm = tf.nn.top_k(embedding_prod_warm, k=recall_at[-1], sorted=True, name='topK_net_warm')
    return eval_preds_warm

def prediction(x):
    return tf.matmul(x[0], x[1], transpose_b=True)

def topk_vals(x, num_candidates):
    tf_topk_vals, _ = tf.nn.top_k(x, k=num_candidates, sorted=True)
    return tf.reshape(tf_topk_vals, [-1], name='select_y_vals')

def topk_inds(x, num_candidates):
    _, tf_topk_inds = tf.nn.top_k(x, k=num_candidates, sorted=True)
    return tf.reshape(tf_topk_inds, [-1], name='select_y_vals')

def random_target(x, num_candidates):
    preds_random = tf.gather_nd(x[0], x[1])
    return tf.reshape(preds_random, [-1], name='random_y_inds')

def latent_topk_cold(x, recall_at):
    _, tf_latent_topk_cold = tf.nn.top_k(x, k=recall_at[-1], sorted=True, name='topK_latent_cold')
    return tf_latent_topk_cold

def latent_topk_warm(x, recall_at):
    preds_pref_latent_warm = tf.sparse_add(x[0], x[1])
    _, tf_latent_topk_warm = tf.nn.top_k(preds_pref_latent_warm, k=recall_at[-1], sorted=True, name='topK_latent_warm')
    return tf_latent_topk_warm

def dense_batch_fc_tanh(x, units, scope, do_norm=False):
#     w_init = tf.truncated_normal_initializer(stddev=0.01)
#     b_init = tf.zeros_initializer()
#     h1 = Dense(units, kernel_initializer = w_init, bias_initializer = b_init)(x)
    h1 = Dense(units, kernel_initializer = TruncatedNormal(stddev=0.01), bias_initializer = Zeros())(x)
    if do_norm:
        # h2 = BatchNormalization(momentum = 0.9, center=True, scale=True, training=phase)(h1)
        h2 = BatchNormalization(momentum = 0.9, center=True, scale=True)(h1)
        return Activation('tanh')(h2)
    else:
        return Activation('tanh')(h1)

In [50]:
class DeepCF:
    """
    main model class implementing DeepCF
    also stores states for fast candidate generation

    latent_rank_in: rank of preference model input
    user_content_rank: rank of user content input
    item_content_rank: rank of item content input
    model_select: array of number of hidden unit,
        i.e. [200,100] indicate two hidden layer with 200 units followed by 100 units
    rank_out: rank of latent model output

    """

    def __init__(self, latent_rank_in, model_select, rank_out):
        
        self.rank_in = latent_rank_in
        self.model_select = model_select
        self.rank_out = rank_out

    def context_model(self, tag, df_context, CATEGORICAL, NUMERICAL):
        input_layers = []
        embed_layers = []
        for col in CATEGORICAL:
            input_layer = Input(shape=(1,), name=tag + '_' + col + '_input')
            input_layers.append(input_layer)
            vocab_size = df_context[col].max() + 1
            embed_size = np.power(2, int(np.ceil(np.log2(np.log2(vocab_size)))))
            print('[%s] %-20s\tvocab: %-8d, embed: %-4d' % (tag, col, vocab_size, embed_size))
            embed_layer = Embedding(
                input_dim = vocab_size,
                output_dim = embed_size,
                embeddings_initializer = RandomUniform(minval=-0.1, maxval=0.1),
                embeddings_regularizer = l2(1e-4),
                input_length = 1,
                name = tag + '_' + col+'_embed',
                trainable=True)
            embed_layer = embed_layer(input_layer)
            embed_layer = Reshape((embed_size,))(embed_layer)
            embed_layers.append(embed_layer)
            
        numerical_input = Input(shape=(len(NUMERICAL),), name=tag+'_numerical_input')
        input_layers.append(numerical_input)
        
        preds = concatenate(embed_layers + [numerical_input], name=tag + '_content')
#         preds = Dense(64, activation='relu', name=tag + '_content_dense1')(preds)
#         preds = Dropout(0.5, name=tag + '_content_dropout')(preds)
#         preds = Dense(64, name=tag + '_content_dense1')(preds)
        return input_layers, preds
            
    def build_model(self):
        self.Vin = Input(shape=(self.rank_in,), dtype='float32', name='V_in_raw')
        self.Uin = Input(shape=(self.rank_in,), dtype='float32', name='U_in_raw')
        
        self.user_inputs, self.Ucontent = self.context_model(
            'user', df_user_context, CATEGORICAL=user_CATEGORICAL, NUMERICAL=user_NUMERICAL)
        self.item_inputs, self.Vcontent = self.context_model(
            'item', df_song_context, CATEGORICAL=item_CATEGORICAL, NUMERICAL=item_NUMERICAL)
        
        u_concat = concatenate([self.Uin, self.Ucontent])
        v_concat = concatenate([self.Vin, self.Vcontent])
        u_last = u_concat
        v_last = v_concat
        for ihid, hid in enumerate(self.model_select):
            u_last = dense_batch_fc_tanh(u_last, hid, 'user_layer_%d' % (ihid + 1), do_norm=True)
            v_last = dense_batch_fc_tanh(v_last, hid, 'item_layer_%d' % (ihid + 1), do_norm=True)

        self.U_embedding = Dense(
            self.rank_out, 
            kernel_initializer = TruncatedNormal(stddev=0.01),
            bias_initializer = Zeros())(u_last)
        self.V_embedding = Dense(
            self.rank_out, 
            kernel_initializer = TruncatedNormal(stddev=0.01),
            bias_initializer = Zeros())(v_last)
        self.preds = dot([self.U_embedding, self.V_embedding], axes=1, name='dot_score')
        self.input_layers = [self.Uin, self.Vin] + self.user_inputs + self.item_inputs
        
        self.user_model = Model(inputs=[self.Uin] + self.user_inputs, outputs=self.U_embedding)
        self.item_model = Model(inputs=[self.Vin] + self.item_inputs, outputs=self.V_embedding)
        
        model = Model(inputs=self.input_layers, outputs=self.preds)
        model.compile(optimizer='rmsprop', loss='mean_squared_error')
        self.model = model

In [51]:
latent_rank_in = 64
model_select = [200, 100]
rank_out = 100

dropout_net = DeepCF(latent_rank_in, model_select, rank_out)
dropout_net.build_model()
# dropout_net.build_predictor(klist, n_scores_user)
dropout_net.model.summary()

[user] city                	vocab: 21      , embed: 8   
[user] gender              	vocab: 3       , embed: 2   
[user] registered_via      	vocab: 5       , embed: 4   
[user] registration_year   	vocab: 14      , embed: 4   
[user] registration_month  	vocab: 12      , embed: 4   
[user] registration_day    	vocab: 31      , embed: 8   
[user] expiration_year     	vocab: 18      , embed: 8   
[user] expiration_month    	vocab: 12      , embed: 4   
[user] expiration_day      	vocab: 31      , embed: 8   
[item] artist_name         	vocab: 40583   , embed: 16  
[item] composer            	vocab: 76064   , embed: 32  
[item] genre_ids           	vocab: 573     , embed: 16  
[item] language            	vocab: 10      , embed: 4   
[item] lyricist            	vocab: 33888   , embed: 16  
[item] song_year           	vocab: 100     , embed: 8   
__________________________________________________________________________________________________
Layer (type)                    Output Shape  

### Training

In [55]:
class Stopwatch():
    def __init__(self, info=''):
        self.total = 0
        self.info = info
    
    def clear(self):
        self.total = 0
    
    def tic(self):
        self.start_time = time.time()
    
    def toc(self):
        self.total += time.time() - self.start_time
    
    def show(self):
        print('%.3f seconds \t %s' % (self.total, self.info))

In [56]:
def generate_user_content(user_list):
    return [df_user_context.loc[user_list, col] for col in user_CATEGORICAL] + [df_user_context.loc[user_list, user_NUMERICAL]]
        
def generate_item_content(item_list):
    return [df_song_context.loc[item_list, col] for col in item_CATEGORICAL] + [df_song_context.loc[item_list, item_NUMERICAL]]

In [68]:
import utils

u_pref = v_user_all
v_pref = v_item_all
_, u_pref_scaled = utils.prep_standardize(u_pref)
_, v_pref_scaled = utils.prep_standardize(v_pref)
v_pref_expanded = np.vstack([v_pref_scaled, np.zeros_like(v_pref_scaled[0, :])])
v_pref_last = v_pref_scaled.shape[0] # the last v_pref_scaled TODO: maybe a all zero?
u_pref_expanded = np.vstack([u_pref_scaled, np.zeros_like(u_pref_scaled[0, :])])
u_pref_last = u_pref_scaled.shape[0]


# configuration
user_batch_size = 1000
# n_scores_user = 2500
n_scores_user = 100

data_batch_size = 100
max_data_per_step = 2500000
num_epoch = 1
_lr = 0.005
_decay_lr_every = 50
_lr_decay = 0.1
dropout = 0.5

# counting variables
n_step = 0
n_batch_trained = 0

# profiling
sw = [Stopwatch() for _ in range(20)]
row_index = np.copy(data_train.user_list)

#code testing

#u_pref = u_pref[:100, :]
#v_pref = v_pref[:100, :]


num_users = len(row_index)
num_items = v_pref.shape[0]
user_content_length = 10
item_content_length = 7
#print(u_pref.shape)
#print(v_pref.shape)
#print(num_users)
#print("row_index: ", row_index.shape)
#print(num_items)
#print (u_pref_expanded.shape)

print ("prepare data")
u_pref_train = np.zeros([num_users * n_scores_user * 2, u_pref.shape[1]])
v_pref_train = np.zeros([num_users * n_scores_user * 2, v_pref.shape[1]])
scores_train = np.zeros([num_users * n_scores_user * 2, ])
#user_content_train = np.empty([num_users * n_scores_user * 2, user_content_length])
#item_content_train = np.empty([num_users * n_scores_user * 2, item_content_length])
#user_content_train = []
#item_content_train = []
sw[0].tic()
np.random.shuffle(row_index)
ind = 0
for i in row_index:
    #print ("user ", i)
    
    score_vector = np.matmul(u_pref[i], v_pref.T)
    #target_users = np.repeat(b, n_scores_user)
    #print(target_users.shape)
    target_items = top_k(score_vector, k = n_scores_user).flatten()
    
    # get random_N
    
    #target_users_rand = np.repeat(np.arange(len(b)), n_scores_user)
    target_items_rand = np.random.choice(num_items, n_scores_user)
    target_items_rand = np.array(target_items_rand).flatten()
    

    
    target_scores = score_vector[target_items]
    random_scores = score_vector[target_items_rand]
    
    
    # merge topN and randomN items per user
   
    target_scores = np.append(target_scores, random_scores)
    target_items = np.append(target_items, target_items_rand)
    target_users = np.repeat(i, n_scores_user)
    target_users = np.append(target_users, np.repeat(num_users, n_scores_user))
    np.random.shuffle(target_users)
    
        
    
    #user_content = generate_user_content(i)
    #item_content = generate_item_content(i)
    
        
    n_targets = len(target_scores)
    
    u_pref_train[ind:ind+n_targets] = u_pref_expanded[target_users, :]
    v_pref_train[ind:ind+n_targets] = v_pref_expanded[target_users, :]
    #user_content_train.extend(user_content)
    #item_content_train.extend(item_content)
    scores_train[ind:ind+n_targets] = target_scores
    ind += n_targets  
sw[0].toc()

sw[1].tic()
#print(u_pref_train.shape)
#print(v_pref_train.shape)
row_index = np.repeat(row_index, 2 * n_scores_user)
user_content_train = generate_user_content(row_index)
item_content_train = generate_item_content(row_index)
#print(len(user_content_train))
#print(len(item_content_train))
sw[1].toc()

print("begin training")
sw[2].tic()
dropout_net.model.fit(
    x = [u_pref_train, v_pref_train] + user_content_train + item_content_train,
    y = scores_train,
    batch_size = 100,
    validation_split=0.1,
    epochs = 1,
    verbose = 1,
    shuffle = True)

sw[2].toc()
    
dropout_model_path = '../model/dropout/dnn_model.h5'
dropout_net.model.save_weights(dropout_model_path)



prepare data
begin training
Train on 4014720 samples, validate on 446080 samples
Epoch 1/1


In [69]:
for i in range(7):
    print(i)
    sw[i].show()

0
565.297 seconds 	 
1
213.430 seconds 	 
2
601.940 seconds 	 
3
0.000 seconds 	 
4
0.000 seconds 	 
5
0.000 seconds 	 
6
0.000 seconds 	 
