In [0]:
!wget "http://files.grouplens.org/datasets/movielens/ml-100k.zip"

In [0]:
!unzip ml-100k.zip

In [0]:
ls

In [0]:
ls ml-100k/

In [0]:
!pip install pydot

In [0]:
!apt-get install graphviz

In [0]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import pydot
%matplotlib inline

In [0]:
dataset = pd.read_csv("ml-100k/u.data",
                      sep='\t',
                      names="user_id,item_id,rating,timestamp".split(","))

In [0]:
dataset.head()

In [0]:
# timestamp列を秒単位から、年月日時分秒単位に変える
dataset.timestamp = pd.to_datetime(dataset.timestamp, unit='s')

In [0]:
dataset.head()

In [0]:
len(dataset.user_id.unique())

In [0]:
len(dataset.item_id.unique())

In [0]:
dataset.user_id.astype('category').head()

In [0]:
dataset.user_id.astype('category').cat.codes.head()

In [0]:
dataset.user_id.astype('category').cat.codes.values[0:5]

In [0]:
dataset.user_id = dataset.user_id.astype('category').cat.codes.values
dataset.item_id = dataset.item_id.astype('category').cat.codes.values

In [0]:
dataset.head()

In [0]:
from sklearn.model_selection import train_test_split

In [0]:
train, test = train_test_split(dataset, 
                               test_size=0.2, 
                               random_state=1234)

In [0]:
train.shape

In [0]:
test.shape

In [0]:
train.head(3)

In [0]:
test.head(3)

In [0]:
y_true = test.rating

In [0]:
y_true.head()

## modelの作成



In [0]:
import keras
from keras import Model
from keras.layers import Input, Dense, BatchNormalization, Dropout, concatenate
from keras.layers import Embedding, Flatten

In [0]:
#userの潜在次元数
n_latent_factors_user = 8
#movieの潜在次元数
n_latent_factors_movie = 10
#userとmovieのMatrix Factorizationの次元数
n_latent_factors_mf = 3

In [0]:
n_users = len(dataset.user_id.unique())
n_movies = len(dataset.item_id.unique())

In [0]:
print("n_users:", n_users)
print("n_movies:", n_movies)

In [0]:
#Movie_Embedding_MLP
movie_input = Input(shape=[1],name='Item')
movie_embedding_mlp = Embedding(n_movies + 1, n_latent_factors_movie, name='Movie-Embedding-MLP')(movie_input)
movie_vec_mlp = Flatten(name='FlattenMovies-MLP')(movie_embedding_mlp)
movie_vec_mlp = Dropout(0.2)(movie_vec_mlp)

In [0]:
# Movie_Embedding_Mf
movie_embedding_mf = Embedding(n_movies + 1, n_latent_factors_mf, name='Movie-Embedding-MF')(movie_input)
movie_vec_mf = Flatten(name='FlattenMovies-MF')(movie_embedding_mf)
movie_vec_mf = Dropout(0.2)(movie_vec_mf)

In [0]:
# User_Embedding_MLP
user_input = Input(shape=[1],name='User')
user_embedding_mlp = Embedding(n_users + 1, n_latent_factors_user,name='User-Embedding-MLP')(user_input)
user_vec_mlp = Flatten(name='FlattenUsers-MLP')(user_embedding_mlp)
user_vec_mlp = Dropout(0.2)(user_vec_mlp)

In [0]:
# User-Embedding-Mf
user_embedding_mf = Embedding(n_users + 1, n_latent_factors_mf,name='User-Embedding-MF')(user_input)
user_vec_mf = Flatten(name='FlattenUsers-MF')(user_embedding_mf)
user_vec_mf = Dropout(0.2)(user_vec_mf)

In [0]:
# Movie_Embedding_MLP と User-Embedding-MLP
x = concatenate([movie_vec_mlp, user_vec_mlp],name='Concat')
x = Dropout(0.2)(x)
x = Dense(200,name='FullyConnected')(x)
x = BatchNormalization(name='Batch')(x)
x = Dropout(0.2,name='Dropout-1')(x)
x = Dense(100,name='FullyConnected-1')(x)
x = BatchNormalization(name='Batch-2')(x)
x = Dropout(0.2,name='Dropout-2')(x)
x = Dense(50,name='FullyConnected-2')(x)
x = Dense(20,name='FullyConnected-3', activation='relu')(x)
pred_mlp = Dense(1, activation='relu',name='Activation')(x)

In [0]:
# Movie_Embedding_Mf と User-Embedding-Mf
pred_mf = keras.layers.dot([movie_vec_mf, user_vec_mf], axes=1, name='Dot')

In [0]:
# Combine_Two_Result
combine_mlp_mf = concatenate([pred_mf, pred_mlp],name='Concat-MF-MLP')

In [0]:
# Final_MLP
x = Dense(100,name='Combine-MF-MLP')(combine_mlp_mf)
x = Dense(100,name='FullyConnected-4')(x)
result = Dense(1,name='Prediction')(x)

In [0]:
# Input_and_Output
model = keras.Model([user_input, movie_input], result)

In [0]:
# Model_Compiling
model.compile(optimizer='adam',loss= 'mean_absolute_error')

In [0]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
SVG(model_to_dot(model,  show_shapes=False, show_layer_names=True, rankdir='HB').create(prog='dot', format='svg'))

In [0]:
model.summary()

In [0]:
history = model.fit([train.user_id, train.item_id], 
                    train.rating, 
                    epochs=35, 
                    verbose=1, 
                    validation_split=0.1)

In [0]:
from sklearn.metrics import mean_absolute_error

In [0]:
prediction = model.predict([test.user_id, test.item_id])

In [0]:
prediction.shape

In [0]:
prediction[0:5]

In [0]:
# np.roundでdecimals=0だと、小数第一位で四捨五入
y_hat = np.round(prediction, decimals=0)

In [0]:
y_hat[0:5]

In [0]:
# 予測値を評価
print("Mean_Absolute_Error:", mean_absolute_error(y_true, y_hat))

In [0]:
# 予測値をそのまま使って評価
print("Mean_Absolute_Error:", mean_absolute_error(y_true, prediction))

In [0]:
model.save('neural_colab_filtering.h5')

In [0]:
ls

In [0]:
#学習済みモデルのダウンロード
from google.colab import files
files.download('neural_colab_filtering.h5') 

In [0]:
ls