In [1]:
import time
import datetime
import numpy as np
import pandas as pd
import os
import random

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [3]:
print(time.strftime("%Y-%m-%d %H:%M:%S"))

2018-05-03 16:54:56


In [4]:
class Timer():
    def __init__(self):
        self.info = 'main'
        self.start_time = time.time()
    
    def start(self, info):
        self.info = info
        self.start_time = time.time()
        self.checkpoint('start', elapsed_on=False)
    
    def end(self):
        self.checkpoint(' end ')
        
    def checkpoint(self, tag, elapsed_on=True):
        if elapsed_on:
            elapsed = datetime.timedelta(seconds=round(time.time() - self.start_time))
            expanded_info = self.info + ' [time elapsed: %s]' % str(elapsed)
        else:
            expanded_info = self.info
        self.output(tag, info=expanded_info)
        
    def output(self, tag=' '*5, info=''):
        if type(info) != type(''):
            info = str(info)
        print('[%s] (%s) %s' % (Timer.get_current_time(), tag, info))
    
    @staticmethod
    def get_current_time():
        return time.strftime("%Y-%m-%d %H:%M:%S")

timer = Timer()
sub_timer = Timer()

# Load Data

In [5]:
timer.start('Load Data')
# directory = '../data/split/'
# df_train = pd.read_csv(directory + 'train.csv')
# df_test_warm = pd.read_csv(directory + 'test_warm.csv')
# df_test_cold_user = pd.read_csv(directory + 'test_cold_user.csv')
# df_test_cold_item = pd.read_csv(directory + 'test_cold_item.csv')

[2018-05-03 16:54:56] (start) Load Data


In [6]:
directory = '../data/context/'
# df_event_context = pd.read_csv(directory + 'event_context.csv')
df_song_context = pd.read_csv(directory + 'song_context.csv')
df_user_context = pd.read_csv(directory + 'user_context.csv')
timer.checkpoint('context')

[2018-05-03 16:55:01] (context) Load Data [time elapsed: 0:00:05]


In [7]:
num_user = len(df_user_context.user_id.unique())
num_item = len(df_song_context.song_id.unique())
print (num_user)
print (num_item)

30755
359966


In [9]:
class Data():
    def __init__(self, name):
        '''
        user_list: list(int), the list of user id's used in the dataset
        target_set: list(set), set of target items for each user
        item_list: list(numpy array), list of items used in the dataset for each user
        '''
        self.name = name
        self.df = None
        self.user_list = None
        self.item_list = None
        self.target_set = None
    
    def load(self, filename):
        self.df = pd.read_csv(filename)
        # prepare user list
        self.user_list = self.df['user_id'].unique()
        
        # prepare item list
        self.item_list = [[] for i in range(num_user)]
        self.df.apply(
            lambda row: self.item_list[row['user_id']].append(row['song_id']),
            axis=1
        )
        self.item_list = list(map(np.array, self.item_list))
        
        # prepare target set
        self.target_set = [set() for i in range(num_user)]
        self.df[self.df['target'] == 1].apply(
            lambda row: self.target_set[row['user_id']].add(row['song_id']),
            axis=1
        )

def load_split(name):
    directory = '../data/split/'
    data = Data(name)
    data.load(directory + name + '.csv')
    return data

In [10]:
data_train = load_split('train')
data_test_warm = load_split('test_warm')
data_test_cold_user = load_split('test_cold_user')
data_test_cold_item = load_split('test_cold_item')
timer.end()

[2018-05-03 17:00:13] ( end ) Load Data [time elapsed: 0:05:17]


In [11]:
# dump the class for more efficient data preparing
import pickle
with open('../data/split/data_train.pickle', 'wb') as handle:
    pickle.dump(data_train, handle)
with open('../data/split/data_test_cold_user.pickle', 'wb') as handle:
    pickle.dump(data_test_cold_user, handle)
with open('../data/split/data_test_cold_item.pickle', 'wb') as handle:
    pickle.dump(data_test_cold_item, handle)

In [19]:
# load the data class
import pickle
with open('../data/split/data_train.pickle', 'rb') as handle:
    data_train = pickle.load(handle)
with open('../data/split/data_test_cold_user.pickle', 'rb') as handle:
    data_test_cold_user = pickle.load(handle)
with open('../data/split/data_test_cold_item.pickle', 'rb') as handle:
    data_test_cold_item = pickle.load(handle)

In [25]:
import keras
from keras.models import Model
from keras.layers import Dense, Input, Embedding, Dropout, Activation, Reshape, Lambda, Multiply
from keras.layers.merge import concatenate, dot
from keras.layers.normalization import BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.regularizers import l2
from keras.initializers import RandomUniform, RandomNormal, TruncatedNormal, Zeros
from keras.optimizers import RMSprop, Adam, SGD
from sklearn.metrics import mean_squared_error
import tensorflow as tf

### Define and load the MF model for comparing

In [14]:
REG_LAMBDA = 0
EMBED_DIM = 64

vocab_size = num_user
user_embeddings = Embedding(
    input_dim = vocab_size,
    output_dim = EMBED_DIM,
    embeddings_initializer = RandomUniform(minval=-0.1, maxval=0.1),
    embeddings_regularizer = l2(REG_LAMBDA),
    input_length = 1,
    name = 'user_embed',
    trainable=True)

vocab_size = num_item
item_embeddings = Embedding(
    input_dim = vocab_size,
    output_dim = EMBED_DIM,
    embeddings_initializer = RandomUniform(minval=-0.1, maxval=0.1),
    embeddings_regularizer=l2(REG_LAMBDA),
    input_length=1,
    name = 'item_embed',
    trainable=True)

# embedding of user id
uid_input = Input(shape=(1,), dtype='int32')
embedded_user = user_embeddings(uid_input)
embedded_user = Reshape((EMBED_DIM,))(embedded_user)

# embedding of song id
iid_input = Input(shape=(1,), dtype='int32')
embedded_item = item_embeddings(iid_input)
embedded_item = Reshape((EMBED_DIM,))(embedded_item)

# dot production of embedded vectors
preds = dot([embedded_user, embedded_item], axes=1, name='dot_score')

# embedding model
user_embed_model = Model(inputs=uid_input, outputs=embedded_user)
item_embed_model = Model(inputs=iid_input, outputs=embedded_item)

model_MF = Model(inputs=[uid_input, iid_input], outputs=preds)
model_MF.compile(
    loss=keras.losses.mean_squared_error, 
    optimizer=RMSprop(lr=1e-3),
#     optimizer=SGD(lr=1e-4),
    metrics=[keras.metrics.mean_squared_error])


Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [15]:
model_directory = '../model/mf/'
if not os.path.exists(model_directory):
    os.makedirs(model_directory)
model_path = model_directory + 'mf_model.h5'

In [16]:
# load the best model
model_MF.load_weights(model_path)

In [17]:
def single_top_k(score_list, k):
    ind = np.argpartition(score_list, -k)[-k:]
    top_k_ind = list(reversed(ind[np.argsort(score_list[ind])]))
    return np.array(top_k_ind)

# try to implement a two-dimensional top_k
def two_dim_top_k(a, k):
    return np.array([single_top_k(row, k) for row in a])

def top_k(a, k):
    if len(a.shape) == 1:
        return single_top_k(a, k)
    elif len(a.shape) == 2:
        return two_dim_top_k(a, k)
    else:
        return None

In [18]:
# recall at k
sess = tf.Session()
v_user_all = user_embed_model.predict(np.arange(num_user))
v_item_all = item_embed_model.predict(np.arange(num_item))
    
def __recall(klist, target, recommend_list):
    den = len(target) # denominator
    recall_value = 0.0
    recall_list = []
    for k in klist:
        if den < k:
            recall_value = 1.0
        if recall_value == 1.0: # if it's already 1.0, it should be 1.0 after
            recall_list.append(recall_value)
            continue
        recommend_set = set(recommend_list[:k])
        num = len(target & recommend_set)
        recall_value = float(num) / float(den)
        recall_list.append(recall_value)
    return recall_list


def recall_mf(model, klist, data):
    '''
    :param klist: the list of k's in recall@k, e.g. [50, 100, 150, ...]
    :param data: data set for evaluation
        - user_list
        - target_set
        - item_set
    :return: list(float) for recall at each k, with the same size as klist
    '''
    recall_at_k = []
    max_k = max(klist)
    t1, t2, t3, t4, t5 = 0, 0, 0, 0, 0
    for user in data.user_list:
        # get the corresponding embedded vectors
        v_user = v_user_all[user]
        v_item = v_item_all[data.item_list[user]]
        
        # compute the scores
        #score_list = v_user @ v_item.T
        score_list = np.matmul(v_user, v_item.T)
        score_list = score_list.flatten()
        # assert len(score_list) == len(data.item_list[user])
        
        k = min(max_k, len(data.item_list[user]))
        # get the recommended list
        indices = top_k(score_list, k)
        recommend_list = data.item_list[user][indices]
        
        # evaluate recall
        recall_at_k.append(__recall(klist, data.target_set[user], recommend_list))
    return np.mean(recall_at_k, axis=0)


def recall_random(klist, data):
    recall_at_k = []
    max_k = max(klist)
    for i, user in enumerate(data.user_list):
        # compute the scores
        score_list = np.random.uniform(low=0, high=1, size=len(data.item_list[user]))
        
        k = min(max_k, len(data.item_list[user]))
        indices = top_k(score_list, k)
        recommend_list = data.item_list[user][indices]
        
        # evaluate recall
        recall_at_k.append(__recall(klist, data.target_set[user], recommend_list))
    return np.mean(recall_at_k, axis=0)

The following code will be quite slow: each user takes around 4s to run.

```
score_list = model_MF.predict([
    np.repeat(user, len(item_list)),
    item_list
]).flatten()
```

**It's because fetching embeddings are slow.** So get the embedding first and use matrix multiplication!

After modification, it will still take > 30 mintues (I don't know how long...)

### Note!!

It's **far more** better to get more from the model at one time rather than calling the model multiple times!

The difference between warm and cold is due to the difference in denominator (maybe), so don't compare them with each other.

## Variation of the DropoutNet

- replace the underlying WMF
    - user/item id -> Embedding Layers -> Preference Latent vector;
    - output: target
    - just use the known pairs
    - use the normal dropout (could experiment on this)

To test:

- use the all-zero dropout

### Define and initialize the model

In [20]:
user_CATEGORICAL = [
    'city', 'gender', 'registered_via', 'registration_year', 
    'registration_month', 'registration_day', 'expiration_year', 
    'expiration_month', 'expiration_day']
user_NUMERICAL = ['age', 'weird_age', 'validate_days']
set(df_user_context.columns) - (set(user_CATEGORICAL).union(set(user_NUMERICAL))), \
set(user_CATEGORICAL).intersection(set(user_NUMERICAL))

({'Unnamed: 0', 'user_id'}, set())

In [21]:
item_CATEGORICAL = [
    'artist_name', 'composer', 'genre_ids', 'language', 
    'lyricist', 'song_year']
item_NUMERICAL = [
    'song_length', 'genre_count', 'lyricist_count',
    'composer_count', 'artist_count', 'is_featured',
    'artist_composer', 'artist_composer_lyricist', 
    'song_lang_boolean', 'smaller_song']
set(df_song_context.columns) - (set(item_CATEGORICAL).union(set(item_NUMERICAL))), \
set(item_CATEGORICAL).intersection(set(item_NUMERICAL))

({'Unnamed: 0', 'song_id'}, set())

In [22]:
def dense_batch_fc_tanh(x, units, scope, do_norm=False):
#     w_init = tf.truncated_normal_initializer(stddev=0.01)
#     b_init = tf.zeros_initializer()
#     h1 = Dense(units, kernel_initializer = w_init, bias_initializer = b_init)(x)
    h1 = Dense(units, kernel_initializer = TruncatedNormal(stddev=0.01), bias_initializer = Zeros())(x)
    if do_norm:
        # h2 = BatchNormalization(momentum = 0.9, center=True, scale=True, training=phase)(h1)
        h2 = BatchNormalization(momentum = 0.9, center=True, scale=True)(h1)
        return Activation('tanh')(h2)
    else:
        return Activation('tanh')(h1)

In [29]:
class DeepCF:
    """
    main model class implementing DeepCF
    also stores states for fast candidate generation

    latent_rank_in: rank of preference model input
    user_content_rank: rank of user content input
    item_content_rank: rank of item content input
    model_select: array of number of hidden unit,
        i.e. [200,100] indicate two hidden layer with 200 units followed by 100 units
    rank_out: rank of latent model output

    """

    def __init__(self, latent_rank_in, model_select, rank_out):
        self.rank_in = latent_rank_in
        self.model_select = model_select
        self.rank_out = rank_out

    def context_model(self, tag, df_context, CATEGORICAL, NUMERICAL):
        input_layers = []
        embed_layers = []
        for col in CATEGORICAL:
            input_layer = Input(shape=(1,), name=tag + '_' + col + '_input')
            input_layers.append(input_layer)
            vocab_size = df_context[col].max() + 1
            embed_size = np.power(2, int(np.ceil(np.log2(np.log2(vocab_size)))))
            print('[%s] %-20s\tvocab: %-8d, embed: %-4d' % (tag, col, vocab_size, embed_size))
            embed_layer = Embedding(
                input_dim = vocab_size,
                output_dim = embed_size,
                embeddings_initializer = RandomUniform(minval=-0.1, maxval=0.1),
                embeddings_regularizer = l2(1e-4),
                input_length = 1,
                name = tag + '_' + col+'_embed',
                trainable=True)
            embed_layer = embed_layer(input_layer)
            embed_layer = Reshape((embed_size,))(embed_layer)
            embed_layers.append(embed_layer)
            
        numerical_input = Input(shape=(len(NUMERICAL),), name=tag+'_numerical_input')
        input_layers.append(numerical_input)
        
        preds = concatenate(embed_layers + [numerical_input], name=tag + '_content')
#         preds = Dense(64, activation='relu', name=tag + '_content_dense1')(preds)
#         preds = Dropout(0.5, name=tag + '_content_dropout')(preds)
#         preds = Dense(64, name=tag + '_content_dense1')(preds)
        return input_layers, preds
        
    @staticmethod
    def embed_pref(name, input_layer, dropout_constant, vocab_size):
        '''
        note: NO regularizer here as I think dropout will do the regularization
        '''
        dropout_rate = 0.5
        embed_size = 64 # embedding size of latent preference vector
        embed_layer = Embedding(
            input_dim = vocab_size,
            output_dim = embed_size,
            embeddings_initializer = RandomUniform(minval=-0.1, maxval=0.1),
            input_length = 1,
            name = name + '_pref_embed',
            trainable=True)
        embed_vector = embed_layer(input_layer)
        embed_vector = Reshape((embed_size, ))(embed_vector)
        # add a layer of all zero dropout
        dropout_vector = Multiply()([embed_vector, dropout_constant])
        return dropout_vector
    
    def build_model(self):
        # user/item ids + embedding instead of pre-trained MF latent vectors
        self.user_ids = Input(shape=(1, ), dtype='int32', name='user_id')
        self.item_ids = Input(shape=(1, ), dtype='int32', name='item_id')
        self.user_dropout_constant = Input(shape=(1, ))
        self.item_dropout_constant = Input(shape=(1, ))
        self.Upref = DeepCF.embed_pref('user', self.user_ids, self.user_dropout_constant, num_user)
        self.Vpref = DeepCF.embed_pref('item', self.item_ids, self.item_dropout_constant, num_item)
        
        # content part
        self.user_inputs, self.Ucontent = self.context_model(
            'user', df_user_context, CATEGORICAL=user_CATEGORICAL, NUMERICAL=user_NUMERICAL)
        self.item_inputs, self.Vcontent = self.context_model(
            'item', df_song_context, CATEGORICAL=item_CATEGORICAL, NUMERICAL=item_NUMERICAL)
        
        u_concat = concatenate([self.Upref, self.Ucontent])
        v_concat = concatenate([self.Vpref, self.Vcontent])
        u_last = u_concat
        v_last = v_concat
        for ihid, hid in enumerate(self.model_select):
            u_last = dense_batch_fc_tanh(u_last, hid, 'user_layer_%d' % (ihid + 1), do_norm=True)
            v_last = dense_batch_fc_tanh(v_last, hid, 'item_layer_%d' % (ihid + 1), do_norm=True)

        self.U_embedding = Dense(
            self.rank_out, 
            kernel_initializer = TruncatedNormal(stddev=0.01),
            bias_initializer = Zeros())(u_last)
        self.V_embedding = Dense(
            self.rank_out, 
            kernel_initializer = TruncatedNormal(stddev=0.01),
            bias_initializer = Zeros())(v_last)
        self.preds = dot([self.U_embedding, self.V_embedding], axes=1, name='dot_score')
        self.input_layers = [self.user_ids, self.item_ids, self.user_dropout_constant, self.item_dropout_constant] + \
                            self.user_inputs + self.item_inputs
        
        self.user_model = Model(inputs=[self.user_ids, self.user_dropout_constant] + self.user_inputs, outputs=self.U_embedding)
        self.item_model = Model(inputs=[self.item_ids, self.item_dropout_constant] + self.item_inputs, outputs=self.V_embedding)
        
        model = Model(inputs=self.input_layers, outputs=self.preds)
        # optimizer = RMSprop()
        optimizer = RMSprop(lr=5e-3)
        model.compile(optimizer=optimizer, loss='mean_squared_error')
        self.model = model

In [30]:
latent_rank_in = 64
model_select = [200, 100]
rank_out = 100

dropout_net = DeepCF(latent_rank_in, model_select, rank_out)
dropout_net.build_model()
dropout_net.model.summary()

[user] city                	vocab: 21      , embed: 8   
Instructions for updating:
keep_dims is deprecated, use keepdims instead
[user] gender              	vocab: 3       , embed: 2   
[user] registered_via      	vocab: 5       , embed: 4   
[user] registration_year   	vocab: 14      , embed: 4   
[user] registration_month  	vocab: 12      , embed: 4   
[user] registration_day    	vocab: 31      , embed: 8   
[user] expiration_year     	vocab: 18      , embed: 8   
[user] expiration_month    	vocab: 12      , embed: 4   
[user] expiration_day      	vocab: 31      , embed: 8   
[item] artist_name         	vocab: 40583   , embed: 16  
[item] composer            	vocab: 76064   , embed: 32  
[item] genre_ids           	vocab: 573     , embed: 16  
[item] language            	vocab: 10      , embed: 4   
[item] lyricist            	vocab: 33888   , embed: 16  
[item] song_year           	vocab: 100     , embed: 8   
________________________________________________________________________

### Help Functions

In [31]:
class Stopwatch():
    def __init__(self, info=''):
        self.total = 0
        self.info = info
    
    def clear(self):
        self.total = 0
    
    def tic(self):
        self.start_time = time.time()
    
    def toc(self):
        self.total += time.time() - self.start_time
    
    def show(self):
        print('%.3f seconds \t %s' % (self.total, self.info))

In [32]:
def generate_user_content(user_list):
    return [df_user_context.loc[user_list, col] for col in user_CATEGORICAL] + [df_user_context.loc[user_list, user_NUMERICAL]]
        
def generate_item_content(item_list):
    return [df_song_context.loc[item_list, col] for col in item_CATEGORICAL] + [df_song_context.loc[item_list, item_NUMERICAL]]

### Preprocessing

In [33]:
# generate content data for the data_train
timer.start('content preparing')
train_user_content = generate_user_content(data_train.df['user_id'])
train_item_content = generate_item_content(data_train.df['song_id'])
timer.end()

[2018-05-03 18:19:10] (start) content preparing
[2018-05-03 18:23:58] ( end ) content preparing [time elapsed: 0:04:48]


### Training

In [34]:
timer.start('training')
dropout_rate = 0.5

num_epoch = 10
'''
for batch size = 2**13, it takes around 400s for 1 epoch on my laptop
for batch size = 2**10, it takes around 2000s for 1 epoch on my laptop
for batch size = 100, it takes > 5h for 1 epoch on my laptop
'''
data_batch_size = 2**13
print('batch size: %d' % data_batch_size)

for epoch in range(num_epoch):
    # TODO generate random dropout constant (0/1)
    user_dropout_constant = np.random.random(size=len(data_train.df)) > dropout_rate
    item_dropout_constant = np.random.random(size=len(data_train.df)) > dropout_rate
    
    # fit
    dropout_net.model.fit(
        x = [data_train.df['user_id'], data_train.df['song_id'], user_dropout_constant, item_dropout_constant] + \
        train_user_content + train_item_content,
        y = data_train.df['target'],
        epochs = 1,
        batch_size = data_batch_size,
        shuffle = True
    )
    model_path = '../model/dropout/variation1_AllZeroDropout_%d.hf5' % epoch
    dropout_net.model.save_weights(model_path)

timer.end()

[2018-05-03 18:25:28] (start) training
batch size: 8192
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
[2018-05-03 19:30:33] ( end ) training [time elapsed: 1:05:05]


In [None]:
# # load previous model
# model_path = '../model/dropout/variation1.hf5'
# dropout_net.model.load_weights(model_path)

## Evaluations

In [35]:
# Evaluation
def recall_score_model(klist, data, v_user_all, v_item_all):
    recall_at_k = []
    max_k = max(klist)
    for user in data.user_list:
        # get the corresponding embedded vectors
        v_user = v_user_all[user]
        v_item = v_item_all[data.item_list[user]]
        
        # compute the scores
        score_list = np.matmul(v_user, v_item.T)
        score_list = score_list.flatten()
        # assert len(score_list) == len(data.item_list[user])
        
        k = min(max_k, len(data.item_list[user]))
        # get the recommended list
        indices = top_k(score_list, k)
        recommend_list = data.item_list[user][indices]
        
        # evaluate recall
        recall_at_k.append(__recall(klist, data.target_set[user], recommend_list))
    return np.mean(recall_at_k, axis=0)

In [37]:
sub_timer.start('evaluation of dropout net')
for epoch in range(num_epoch):
    model_path = '../model/dropout/variation1_AllZeroDropout_%d.hf5' % epoch
    dropout_net.model.load_weights(model_path)
    print('-'*40)
    print(model_path)
    user_content = generate_user_content(np.arange(num_user))
    item_content = generate_item_content(np.arange(num_item))
    dropout_user_all = dropout_net.user_model.predict([np.arange(num_user), np.ones(num_user)] + user_content)
    dropout_item_all = dropout_net.item_model.predict([np.arange(num_item), np.ones(num_item)] + item_content)

    klist = list(range(5, 51, 5))
    print('train')
    print(recall_random(klist, data_train))
    print(recall_score_model(klist, data_train, v_user_all, v_item_all))
    print(recall_score_model(klist, data_train, dropout_user_all, dropout_item_all))
    print()

    print('test warm')
    print(recall_random(klist, data_test_warm))
    print(recall_score_model(klist, data_test_warm, v_user_all, v_item_all))
    print(recall_score_model(klist, data_test_warm, dropout_user_all, dropout_item_all))
    print()
    
    print('test cold user')
    print(recall_random(klist, data_test_cold_user))
    print(recall_score_model(klist, data_test_cold_user, v_user_all, v_item_all))
    print(recall_score_model(klist, data_test_cold_user, dropout_user_all, dropout_item_all))
    print()

    print('test cold item')
    print(recall_random(klist, data_test_cold_item))
    print(recall_score_model(klist, data_test_cold_item, v_user_all, v_item_all))
    print(recall_score_model(klist, data_test_cold_item, dropout_user_all, dropout_item_all))
    print()
sub_timer.end()

[2018-05-03 19:31:56] (start) evaluation of dropout net
----------------------------------------
../model/dropout/variation1_AllZeroDropout_0.hf5
train
[ 0.28407339  0.31362877  0.33468879  0.35296017  0.37254039  0.39203188
  0.41096298  0.43106178  0.45115515  0.47032577]
[ 0.31114333  0.35040459  0.38176863  0.41004362  0.43711575  0.46288058
  0.48721552  0.51096305  0.53371918  0.55523986]
[ 0.28416615  0.31399136  0.33551324  0.35381766  0.37348277  0.39306734
  0.41198102  0.43213922  0.45215531  0.4712119 ]

test warm
[ 0.2035907   0.42481737  0.60532398  0.73481559  0.81976243  0.87811801
  0.91572944  0.94083731  0.9575706   0.96854423]
[ 0.26750232  0.49173467  0.65854236  0.77200428  0.84624367  0.89548443
  0.92770755  0.94943105  0.96372148  0.97313665]
[ 0.20532238  0.42643401  0.6067559   0.73536969  0.82016979  0.87825056
  0.91559442  0.94059791  0.95745456  0.96841787]

test cold user
[ 0.54533527  0.67519988  0.75797801  0.81746578  0.86177556  0.89509069
  0.921028

train
[ 0.28389301  0.3135323   0.33483347  0.35304765  0.37260084  0.39215779
  0.41122476  0.43137525  0.45155533  0.47070344]
[ 0.31114333  0.35040459  0.38176863  0.41004362  0.43711575  0.46288058
  0.48721552  0.51096305  0.53371918  0.55523986]
[ 0.29868141  0.3309469   0.35614853  0.378882    0.40234283  0.42482983
  0.44638775  0.46845513  0.48993173  0.51028524]

test warm
[ 0.20381367  0.42496331  0.6061124   0.73538372  0.81982658  0.87803424
  0.91548755  0.94067337  0.9574204   0.96841562]
[ 0.26750232  0.49173467  0.65854236  0.77200428  0.84624367  0.89548443
  0.92770755  0.94943105  0.96372148  0.97313665]
[ 0.24993065  0.47476635  0.6456459   0.76316912  0.83952174  0.89100235
  0.9247127   0.94716483  0.9620943   0.97200639]

test cold user
[ 0.54571189  0.67456393  0.75800389  0.81704103  0.86126624  0.89465557
  0.92064728  0.94115221  0.95601025  0.9669628 ]
[ 0.54603787  0.67667078  0.75895311  0.81788323  0.8618485   0.89507481
  0.92072056  0.94114917  0.95592

# Archived