MovieLensを利用してサンプルを作成する  
MovieLensのダウンロード

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import sys
import os
sys.path.append(os.path.dirname(Path().resolve()))
from datasets.movie_lens import MovieLensDataset

In [2]:
movielens_datasets = MovieLensDataset()

In [3]:
movielens_datasets.ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp,userIdx,movieIdx
0,1,307,3.5,1256677221,0,304
1,6,307,4.0,832059248,5,304
2,56,307,4.0,1383625728,55,304
3,71,307,5.0,1257795414,70,304
4,84,307,3.0,999055519,83,304


# TensorFlowを使う

In [4]:
!pip install tensorflow

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m22.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [5]:
import tensorflow as tf
from collections import Counter
from sklearn.model_selection import train_test_split

caused by: ["[Errno 2] The file to load file system plugin from does not exist.: '/usr/local/lib/python3.8/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so'"]
caused by: ['/usr/local/lib/python3.8/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: cannot open shared object file: No such file or directory']


In [6]:
def create_model(user_num: int, item_num: int, dimension: int = 10) -> tf.keras.models.Model:
    user_input_layer = tf.keras.layers.Input(shape=(1, ))
    item_input_layer = tf.keras.layers.Input(shape=(1, ))
    user_embedding_layer = tf.keras.layers.Embedding(user_num, dimension)(user_input_layer)
    item_embedding_layer = tf.keras.layers.Embedding(item_num, dimension)(item_input_layer)
    user_bias_layer = tf.keras.layers.Embedding(user_num, 1)(user_input_layer)
    item_bias_layer = tf.keras.layers.Embedding(item_num, 1)(item_input_layer)

    x = tf.keras.layers.Dot(axes=2)([user_embedding_layer, item_embedding_layer])
    x = tf.keras.layers.Add()([x, user_bias_layer, item_bias_layer])
    x = tf.keras.layers.Flatten()(x)

    model = tf.keras.models.Model(inputs=[user_input_layer, item_input_layer], outputs=x)
    opt = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9)

    model.compile(
        loss=tf.keras.losses.MeanSquaredError(),
        optimizer=opt,
        metrics=[tf.keras.metrics.RootMeanSquaredError()],
    )

    return model

In [7]:
# 学習データと検証データの用意
train, test = train_test_split(movielens_datasets.ratings, test_size=0.3, shuffle=True, random_state=42)
print(train.shape, test.shape)

(19427410, 6) (8326034, 6)


In [8]:
# モデルの作成
user_num = len(movielens_datasets.users)
content_num = len(movielens_datasets.movies)
model = create_model(user_num, content_num)

In [9]:
# 学習
tf.keras.utils.plot_model(model, show_shapes=True)

# callback関数
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=0
    )
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'keras.h5',
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=True,
    mode='min',
    verbose=0
)

# 学習実行
result = model.fit(
    x=[train['userIdx'].values, train['movieIdx'].values],
    y=train['rating'].values,
    epochs=200,
    batch_size=1024,
    validation_data=(
        [test['userIdx'].values, test['movieIdx'].values],
        test['rating'].values
    ),
    callbacks=[early_stopping, checkpoint],
    verbose=1
)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200


Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200


In [10]:
result

<keras.callbacks.History at 0xffff0c2bc3a0>

In [11]:
model.save('../datasets/matrix-factorization.h5')

In [29]:
model.predict([np.array([283214, 283214, 283214]), np.array([1, 2, 3])], verbose=1)



array([[3.952248 ],
       [3.679933 ],
       [3.3663383]], dtype=float32)