# Input Data 

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Embedding, Flatten, \
  Concatenate

print(tf.__version__)

2.0.1


In [17]:
!pip install sklearn

Processing /home/rmedal/.cache/pip/wheels/76/03/bb/589d421d27431bcd2c6da284d5f2286c8e3b2ea3cf1594c074/sklearn-0.0-py2.py3-none-any.whl
Collecting scikit-learn
  Using cached scikit_learn-0.22.2-cp36-cp36m-manylinux1_x86_64.whl (7.1 MB)
Collecting joblib>=0.11
  Using cached joblib-0.14.1-py2.py3-none-any.whl (294 kB)
Installing collected packages: joblib, scikit-learn, sklearn
Successfully installed joblib-0.14.1 scikit-learn-0.22.2 sklearn-0.0


In [26]:
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

#import sklearn
from sklearn.utils import shuffle

import numpy as np
import pandas as pd
import os

In [4]:
df = pd.read_csv('train/local-test/test_dir/input/data/ratings.csv')
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580


In [5]:
df.userId = pd.Categorical(df.userId)
df['new_user_id'] = df.userId.cat.codes
df.userId[12:13]

12    1
Name: userId, dtype: category
Categories (138493, int64): [1, 2, 3, 4, ..., 138490, 138491, 138492, 138493]

In [6]:
df.movieId = pd.Categorical(df.movieId)
df['new_movie_id'] = df.movieId.cat.codes
df.movieId[12:13]

12    318
Name: movieId, dtype: category
Categories (26744, int64): [1, 2, 3, 4, ..., 131256, 131258, 131260, 131262]

In [7]:
# Get user IDs, movie IDs, and ratings as separate arrays
user_ids = df['new_user_id'].values
movie_ids = df['new_movie_id'].values
ratings = df['rating'].values
df.rating[12:13]

12    4.0
Name: rating, dtype: float64

In [8]:
print(user_ids)
print(user_ids[12:13])

[     0      0      0 ... 138492 138492 138492]
[0]


In [9]:
print(movie_ids)
print(movie_ids[12:13])

[    1    28    31 ... 13875 13993 14277]
[315]


In [10]:
print(ratings[12:13])

[4.]


In [11]:
# # Get number of users and number of movies
N = len(set(user_ids))
M = len(set(movie_ids))

# Set embedding dimension
K = 10

# N = 1
# M = 1


In [21]:
# Make a neural network

# User input
u = Input(shape=(1,))

# Movie input
m = Input(shape=(1,))

# User embedding
u_emb = Embedding(N, K)(u) # output is (num_samples, 1, K)

# Movie embedding
m_emb = Embedding(M, K)(m) # output is (num_samples, 1, K)

# Flatten both embeddings
u_emb = Flatten()(u_emb) # now it's (num_samples, K)
m_emb = Flatten()(m_emb) # now it's (num_samples, K)

# Concatenate user-movie embeddings into a feature vector
x = Concatenate()([u_emb, m_emb]) # now it's (num_samples, 2K)

# Now that we have a feature vector, it's just a regular ANN
x = Dense(1024, activation='relu')(x)
# x = Dense(400, activation='relu')(x)
# x = Dense(400, activation='relu')(x)
x = Dense(1)(x)

In [13]:
print(x)

Tensor("concatenate/Identity:0", shape=(None, 20), dtype=float32)


In [22]:
# Build the model and compile
model = Model(inputs=[u, m], outputs=x)
model.compile(
  loss='mse',
  optimizer=SGD(lr=0.08, momentum=0.9),
)

In [23]:
# split the data
user_ids, movie_ids, ratings = shuffle(user_ids, movie_ids, ratings)
Ntrain = int(0.8 * len(ratings))
train_user = user_ids[:Ntrain]
train_movie = movie_ids[:Ntrain]
train_ratings = ratings[:Ntrain]

test_user = user_ids[Ntrain:]
test_movie = movie_ids[Ntrain:]
test_ratings = ratings[Ntrain:]

# center the ratings
avg_rating = train_ratings.mean()
train_ratings = train_ratings - avg_rating
test_ratings = test_ratings - avg_rating

In [24]:
r = model.fit(
  x=[train_user, train_movie],
  y=train_ratings,
  epochs=25,
  batch_size=1024,
  verbose=2, # goes a little faster when you don't print the progress bar
  validation_data=([test_user, test_movie], test_ratings),
)

Train on 16000210 samples, validate on 4000053 samples
Epoch 1/25
16000210/16000210 - 115s - loss: 0.7763 - val_loss: 0.7232
Epoch 2/25
16000210/16000210 - 113s - loss: 0.7014 - val_loss: 0.7038
Epoch 3/25
16000210/16000210 - 110s - loss: 0.6797 - val_loss: 0.6862
Epoch 4/25
16000210/16000210 - 111s - loss: 0.6640 - val_loss: 0.6759
Epoch 5/25
16000210/16000210 - 116s - loss: 0.6534 - val_loss: 0.6719
Epoch 6/25
16000210/16000210 - 111s - loss: 0.6419 - val_loss: 0.6607
Epoch 7/25
16000210/16000210 - 107s - loss: 0.6253 - val_loss: 0.6492
Epoch 8/25
16000210/16000210 - 107s - loss: 0.6118 - val_loss: 0.6439
Epoch 9/25
16000210/16000210 - 107s - loss: 0.6021 - val_loss: 0.6426
Epoch 10/25
16000210/16000210 - 106s - loss: 0.5935 - val_loss: 0.6381
Epoch 11/25
16000210/16000210 - 106s - loss: 0.5857 - val_loss: 0.6411
Epoch 12/25
16000210/16000210 - 106s - loss: 0.5785 - val_loss: 0.6353
Epoch 13/25
16000210/16000210 - 106s - loss: 0.5721 - val_loss: 0.6323
Epoch 14/25
16000210/16000210 -

In [27]:
# Save the model to a temporary directory
import tempfile

MODEL_DIR = tempfile.gettempdir()
version = 1
export_path = os.path.join(MODEL_DIR, str(version))
print('export_path = {}\n'.format(export_path))
if os.path.isdir(export_path):
  print('\nAlready saved a model, cleaning up\n')
  !rm -r {export_path}

tf.saved_model.save(model, export_path)

print('\nSaved model:')
!ls -l {export_path}

export_path = /tmp/1

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: /tmp/1/assets

Saved model:
total 124
drwxr-xr-x 2 rmedal rmedal   4096 Mar  2 15:09 assets
-rw-r--r-- 1 rmedal rmedal 117609 Mar  2 15:09 saved_model.pb
drwxr-xr-x 2 rmedal rmedal   4096 Mar  2 15:09 variables


In [28]:
!saved_model_cli show --dir {export_path} --all


MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['__saved_model_init_op']:
  The given SavedModel SignatureDef contains the following input(s):
  The given SavedModel SignatureDef contains the following output(s):
    outputs['__saved_model_init_op'] tensor_info:
        dtype: DT_INVALID
        shape: unknown_rank
        name: NoOp
  Method name is: 

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input_3'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 1)
        name: serving_default_input_3:0
    inputs['input_4'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 1)
        name: serving_default_input_4:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['dense_1'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 1)
        name: StatefulPartitionedCall:0
  Method name is: tensorflow/serving/predict


In [49]:
x_test=[train_user[1:2], train_movie[1:2]]

In [53]:
x_test

[array([99878], dtype=int32), array([3593], dtype=int16)]

In [55]:
from json import JSONEncoder 

In [58]:
class NumpyArrayEncoder(JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return JSONEncoder.default(self, obj)

In [59]:
import json
data = json.dumps({"signature_name": "serving_default", "instances": x_test}, cls=NumpyArrayEncoder)
print(data)

{"signature_name": "serving_default", "instances": [[99878], [3593]]}
