<i>Copyright (c) Microsoft Corporation. All rights reserved.</i>

<i>Licensed under the MIT License.</i>

# Wide and Deep Model for Movie Recommendation
<br>


This notebook shows how to build and test [**wide-and-deep model**](https://arxiv.org/abs/1606.07792)--linear combination of the linear and DNN models--using [TensorFlow high-level Estimator API](https://www.tensorflow.org/api_docs/python/tf/estimator/DNNLinearCombinedRegressor).

For more details about hyperparameter tuning via Azure Machine Learning service, see [AML_Hyperparameter_Tuning notebook](../04_model_select_and_optimize/aml_hyperparameter_tuning.ipynb).

### Prerequisite
* tensorflow (version 1.8 or higher) - GPU version is preferable

In [1]:
import sys
sys.path.append("../../")

import os
import shutil

import tensorflow as tf
import pandas as pd
import numpy as np
import sklearn.preprocessing

from reco_utils.common import tf_utils
from reco_utils.dataset import movielens
from reco_utils.dataset.pandas_df_utils import user_item_pairs
from reco_utils.dataset.python_splitters import python_random_split
from reco_utils.evaluation.python_evaluation import (
    rmse, mae, rsquared, exp_var,
    map_at_k, ndcg_at_k, precision_at_k, recall_at_k
)

In [2]:
from tensorflow.python.client import device_lib

print("Tensorflow Version:", tf.__version__)

devices = device_lib.list_local_devices()
[x.name for x in devices]

Tensorflow Version: 1.12.0


['/device:CPU:0', '/device:GPU:0']

### Data loading

Download [MovieLens](https://grouplens.org/datasets/movielens/) data and split train / test set.
We use genres as item features.
We don't use timestamp since we don't want our model to fit to time information instead of movie.

In [3]:
# top k items to recommend
TOP_K = 10

# Select Movielens data size: 100k, 1m, 10m, or 20m
MOVIELENS_DATA_SIZE = '1m'

USER_COL = 'UserId'
ITEM_COL = 'MovieId'
RATING_COL = 'Rating'
ITEM_FEAT_COL = 'Genres'

In [4]:
data = movielens.load_pandas_df(
    size=MOVIELENS_DATA_SIZE,
    header=[USER_COL, ITEM_COL, RATING_COL],
    genres_col='_Genres'
)
data.head()

Unnamed: 0,UserId,MovieId,Rating,_Genres
0,1,1193,5.0,Drama
1,2,1193,5.0,Drama
2,12,1193,4.0,Drama
3,15,1193,4.0,Drama
4,17,1193,5.0,Drama


In [5]:
# Encode 'genres' into int array (multi-hot representation) to use as item features
genres_encoder = sklearn.preprocessing.MultiLabelBinarizer()
data[ITEM_FEAT_COL] = genres_encoder.fit_transform(
    data['_Genres'].apply(lambda s: s.split("|"))
).tolist()
print("Genres:", genres_encoder.classes_)

Genres: ['Action' 'Adventure' 'Animation' "Children's" 'Comedy' 'Crime'
 'Documentary' 'Drama' 'Fantasy' 'Film-Noir' 'Horror' 'Musical' 'Mystery'
 'Romance' 'Sci-Fi' 'Thriller' 'War' 'Western']


In [6]:
items = data.drop_duplicates(ITEM_COL)[[ITEM_COL, '_Genres', ITEM_FEAT_COL]].reset_index(drop=True)
print(items.head())

# We don't use _Genres column
items.drop('_Genres', axis=1, inplace=True)

   MovieId                       _Genres  \
0     1193                         Drama   
1      661  Animation|Children's|Musical   
2      914               Musical|Romance   
3     3408                         Drama   
4     2355   Animation|Children's|Comedy   

                                              Genres  
0  [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...  
1  [0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...  
2  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, ...  
3  [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...  
4  [0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  


In [7]:
users = data.drop_duplicates(USER_COL)[[USER_COL]].reset_index(drop=True)
users.head()

Unnamed: 0,UserId
0,1
1,2
2,12
3,15
4,17


In [8]:
train, test = python_random_split(
    data.drop('_Genres', axis=1),
    ratio=0.75,
    seed=123
)

### Modeling

'--model-type', 'wide_deep', '--epochs', '50', '--batch-size', '128', '--dnn-batch-norm', 'True', '--dnn-hidden-units', '256,64,256', '--dnn-item-embedding-dim', '4', '--dnn-optimizer', 'Adam', '--dnn-optimizer-lr', '0.02039596884309', '--dnn-user-embedding-dim', '128', '--dropout', '0.289914344143064', '--l1-reg', '0.0101626002122412', '--linear-optimizer', 'Ftrl', '--linear-optimizer-lr', '0.0369276616034676'

In [9]:
""" Hyper parameters
"""
BATCH_SIZE = 128
NUM_EPOCHS = 50

LINEAR_OPTIMIZER = tf.train.FtrlOptimizer(
    learning_rate=0.03,
    l1_regularization_strength=0.01
)
DNN_OPTIMIZER = tf.train.AdamOptimizer(
    learning_rate=0.02
)
DNN_HIDDEN_UNITS = [256,64,256]
DNN_DROPOUT = 0.2
DNN_BATCH_NORM = True

# Rule of thumb for embedding_dimensions =  number_of_categories ** 0.25
DNN_USER_DIM = int(len(users) ** 0.25)
DNN_ITEM_DIM = int(len(items) ** 0.25)

print("Embedding {} users to {}-dim vector".format(len(users), DNN_USER_DIM))
print("Embedding {} items to {}-dim vector".format(len(items), DNN_ITEM_DIM))

Embedding 6040 users to 8-dim vector
Embedding 3706 items to 7-dim vector


### Feature embedding

Wide and deep model utilizes two different types of feature set: 1) a wide set of cross-producted features to capture how the co-occurrence of a query-item feature pair correlates with the target label or rating, and 2) a deep, lower-dimensional embedding vectors for every query and item.

Genres as item feature

Not using Timestamp for two reason:
1. doesn't make sense to predict ratings
2. For top-k recommendation scenario, we don't have TS for the items user haven't watched. Model will bias to learn those items.


In [10]:
user_id = tf.feature_column.categorical_column_with_vocabulary_list(USER_COL, users[USER_COL].values)
item_id = tf.feature_column.categorical_column_with_vocabulary_list(ITEM_COL, items[ITEM_COL].values)

wide_columns = [
    tf.feature_column.crossed_column([user_id, item_id], hash_bucket_size=1000)
]

deep_columns = [
    # User embedding
    tf.feature_column.embedding_column(
        categorical_column=user_id,
        dimension=DNN_USER_DIM,
        max_norm=DNN_USER_DIM ** .5
    ),
    # Item embedding
    tf.feature_column.embedding_column(
        categorical_column=item_id,
        dimension=DNN_ITEM_DIM,
        max_norm=DNN_ITEM_DIM ** .5
    ),
    # Item feature
    tf.feature_column.numeric_column(
        ITEM_FEAT_COL,
        shape=len(genres_encoder.classes_),
        dtype=tf.float32
    )
]

for c in wide_columns + deep_columns:
    print(str(c)[:100], "...")

_CrossedColumn(keys=(_VocabularyListCategoricalColumn(key='UserId', vocabulary_list=(1, 2, 12, 15, 1 ...
_EmbeddingColumn(categorical_column=_VocabularyListCategoricalColumn(key='UserId', vocabulary_list=( ...
_EmbeddingColumn(categorical_column=_VocabularyListCategoricalColumn(key='MovieId', vocabulary_list= ...
_NumericColumn(key='Genres', shape=(18,), default_value=None, dtype=tf.float32, normalizer_fn=None) ...


Model.

In [14]:
MODEL_DIR = os.path.join('.', 'model_checkpoints')

try:
    # Clean-up previous dir if exists
    shutil.rmtree(MODEL_DIR)
except (PermissionError, FileNotFoundError):
    pass

# Log less-frequently
run_config = tf.estimator.RunConfig()
run_config = run_config.replace(log_step_count_steps=1000)

# We use regressor for rating prediction
model = tf.estimator.DNNLinearCombinedRegressor(
    model_dir=MODEL_DIR,
    config=run_config,
    # wide model args
    linear_feature_columns=wide_columns,
    linear_optimizer=LINEAR_OPTIMIZER,
    # deep model args
    dnn_feature_columns=deep_columns,
    dnn_hidden_units=DNN_HIDDEN_UNITS,
    dnn_optimizer=DNN_OPTIMIZER,
    dnn_dropout=DNN_DROPOUT,
    batch_norm=DNN_BATCH_NORM
)

INFO:tensorflow:Using config: {'_model_dir': '.\\wide_deep_models', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 1000, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000024D3A35E048>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


Can add additional metrics.

In [15]:
metrics_fn = (lambda labels, predictions: {
    'mae': tf.metrics.mean_absolute_error(
        tf.cast(labels, tf.float32),
        predictions['predictions']
    )
})

model = tf.contrib.estimator.add_metrics(model, metrics_fn)

INFO:tensorflow:Using config: {'_model_dir': '.\\wide_deep_models', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 1000, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000024D5230DE80>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


### Training and Evaluation

If you want to do hyperparam tuning, split set into 3: training, evaluation, testing
Here, we use a known parameter, so we just use training / test set.

We evaluate every 100 iter, MAP and MAE.
Note loss == MSE so we can easily get RMSE by sqrt.

Prepare a recommendation pool for recommend k-item (ranking) scenario 
1. get all user-item pairs with remove seen-items (optional. Movie recommendation scenario)
3. add genres

Can use ndcg hook, map hook, etc...

In [16]:
# User-item pairs for ranking (recommend k-item) evaluation. We use this set for eval-log-hook too.
ranking_pool = user_item_pairs(
    user_df=users,
    item_df=items,
    user_col=USER_COL,
    item_col=ITEM_COL,
    user_item_filter_df=train,
    shuffle=True
)

ranking_pool.head()

Unnamed: 0,UserId,MovieId,Genres
0,4697,3271,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ..."
1,898,3934,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."
2,400,2427,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ..."
3,1088,2131,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ..."
4,5173,1832,"[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ..."


In [17]:
eval_kwargs = {
    'col_user': USER_COL,
    'col_item': ITEM_COL,
    'col_rating': RATING_COL,
    'col_prediction': 'prediction',
    'k': TOP_K
}

precision_eval_hook = tf_utils.TrainLogHook(
    model_dir=MODEL_DIR,
    model=model,
    true_df=test,
    y_col=RATING_COL,
    eval_df=ranking_pool,
    every_n_iter=10000,
    eval_fn=precision_at_k,
    **eval_kwargs
)

model.train(
    input_fn=tf_utils.pandas_input_fn(
        df=train,
        y_col=RATING_COL,
        batch_size=BATCH_SIZE,
        num_epochs=NUM_EPOCHS,
        shuffle=True
    ),
    hooks=[precision_eval_hook]
)

Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Saving checkpoints for 0 into .\wide_deep_models\model.ckpt.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-01-23-15:10:18
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from .\wide_deep_models\model.ckpt-0
INFO:tensorflow:Running local_init_op.
INFO:tens

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-01-23-17:22:46
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from .\wide_deep_models\model.ckpt-20002
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-01-23-17:23:10
INFO:tensorflow:Saving dict for global step 20002: average_loss = 0.8214793, global_step = 20002, label/mean = 3.580709, loss = 105.12455, mae = 0.7012339, prediction/mean = 3.715894
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 20002: .\wide_deep_models\model.ckpt-20002
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from .\wide_deep_models\mod

KeyboardInterrupt: 

### Testing

We predict the ratings by using the wide-deep model we trained. Finally, we also generate top-k movie recommentation for each user and test the performance.

1. Item rating prediction

In [None]:
cols = {
    'col_user': USER_COL,
    'col_item': ITEM_COL,
    'col_rating': RATING_COL,
    'col_prediction': 'prediction'
}

predictions = list(model.predict(input_fn=tf_utils.pandas_input_fn(df=test)))
prediction_df = test.drop(RATING_COL, axis=1)
prediction_df['prediction'] = [p['predictions'][0] for p in predictions]
prediction_df['prediction'].describe()

In [None]:
eval_rmse = rmse(test, prediction_df, **cols)
eval_mae = mae(test, prediction_df, **cols)
eval_rsquared = rsquared(test, prediction_df, **cols)
eval_exp_var = exp_var(test, prediction_df, **cols)

print("RMSE:\t\t%f" % eval_rmse,
      "MAE:\t\t%f" % eval_mae,
      "rsquared:\t%f" % eval_rsquared,
      "exp var:\t%f" % eval_exp_var, sep='\n')

In [None]:
eval_results = model.evaluate(
    input_fn=tf_utils.pandas_input_fn(
        df=test,
        y_col=RATING_COL
    ),
    steps=None
)

print(eval_results)

2. Recommend k items

In [None]:
predictions = list(model.predict(input_fn=tf_utils.pandas_input_fn(df=ranking_pool)))
prediction_df = ranking_pool.copy()
prediction_df['prediction'] = [p['predictions'][0] for p in predictions]

# TODO for now, fix TOP_K
eval_map = map_at_k(test, prediction_df, k=TOP_K, **cols)
eval_ndcg = ndcg_at_k(test, prediction_df, k=TOP_K, **cols)
eval_precision = precision_at_k(test, prediction_df, k=TOP_K, **cols)
eval_recall = recall_at_k(test, prediction_df, k=TOP_K, **cols)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

### Tensorboard

To see Tensorboard, run `tensorboard --logdir=wide_deep_model` (Note, our MODEL_DIR = wide_deep_model)
`localhost:6006`

TODO: add screenshot

### Export Model

In [None]:
EXPORT_DIR_BASE = os.path.join('.', 'saved_model')
os.makedirs(EXPORT_DIR, exist_ok=True)

train_rcvr_fn = tf.contrib.estimator.build_supervised_input_receiver_fn_from_input_fn(
    tf_utils.pandas_input_fn(
        df=train,
        y_col=RATING_COL,
        batch_size=BATCH_SIZE,
        num_epochs=NUM_EPOCHS,
        shuffle=True
    )
)
eval_rcvr_fn = tf.contrib.estimator.build_supervised_input_receiver_fn_from_input_fn(
    tf_utils.pandas_input_fn(
        df=test,
        y_col=RATING_COL
    )
)
serve_rcvr_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
    tf.feature_column.make_parse_example_spec(wide_columns+deep_columns)
)
rcvr_fn_map = {
    tf.estimator.ModeKeys.TRAIN: train_rcvr_fn,
    tf.estimator.ModeKeys.EVAL: eval_rcvr_fn,
    tf.estimator.ModeKeys.PREDICT: serve_rcvr_fn
}

export_dir = tf.contrib.estimator.export_all_saved_models(
    model,
    export_dir_base=EXPORT_DIR_BASE,
    input_receiver_fn_map=rcvr_fn_map
)

print("Model exported to", export_dir)

In [None]:
saved_model = tf.contrib.estimator.SavedModelEstimator(export_dir)

result = saved_model.evaluate(
    tf_utils.pandas_input_fn(
        df=test,
        y_col=RATING_COL
    ),
    steps=None
)
print(result)

In [None]:
test_sample = test.iloc[0]
test_sample

In [None]:
def predict_input_fn():
    example = tf.train.Example()
    
    example.features.feature[USER_COL].int64_list.value.extend([test_sample[USER_COL]])
    example.features.feature[ITEM_COL].int64_list.value.extend([test_sample[ITEM_COL]])
    example.features.feature[ITEM_FEAT_COL].float_list.value.extend(test_sample[ITEM_FEAT_COL])
    return {'inputs':tf.constant([example.SerializeToString()])}

# prediction = list()
print(next(saved_model.predict(predict_input_fn)))
# def predict_input_fn():
#     example = tf.train.Example()
#     example.features.feature['UserId'].bytes_list.value.extend(['496'])
#     example.features.feature['MovieId'].bytes_list.value.extend(['136'])
#     return {'inputs': tf.constant([example.SerializeToString()])}




# Convert input data into serialized Example strings.


# features = tf.parse_example(
#     serialized=serialized_examples,
#     features=make_parse_example_spec(feature_columns))
# predictions_dict = next(prediction)
# predictions_dict
# pred_input_fn = tf.estimator.inputs.pandas_input_fn(
#     x=test_x,
#     num_epochs=1,
#     shuffle=False
# )



In [None]:
# TODO Cleanup EXPORT_DIR_BASE and MODEL_DIR
