In [4]:
# 第 1 部分
# 載入函式庫與資料集
from keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn import metrics

import numpy as np
import pandas as pd

np.random.seed(123456)
data = pd.read_csv('../Data/ratings.csv')

In [5]:
data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [6]:
# 第 2 部分
# 資料清理
def get_data(data):

    # 刪除時間
    data.drop('timestamp', axis=1, inplace=True)
    
    # 抓出所有使用者跟電影索引
    users = data.userId.unique()
    movies = data.movieId.unique()
 
    # 找出新索引跟原始索引的對應關係
    # 原始索引是moveies[i]，會改成i
    moviemap={}
    for i in range(len(movies)):
        moviemap[movies[i]]=i
    usermap={}
    for i in range(len(users)):
        usermap[users[i]]=i
    
    # 根據對應關係，將原始索引更改成新索(連續整數值)
    data.movieId = data.movieId.apply(lambda x: moviemap[x])    
    data.userId = data.userId.apply(lambda x: usermap[x])    
        
    # 打亂資料
    data = data.sample(frac=1.0).reset_index(drop=True)
    
    # 建立訓練資料、測試資料集
    train, test = train_test_split(data, test_size=0.2)
    
    n_users = len(users)
    n_movies = len(movies)

    return train, test, n_users, n_movies

train, test, n_users, n_movies = get_data(data)

In [7]:
# 第 3 部分
# 建立神經網路結構

fts = 5 # 嵌入層的輸出維度

# 輸入層接收資料
# 嵌入層將資料轉換成5維矩陣
# 展平層將5維矩陣拉直成陣列

# 處理電影索引的嵌入層
movie_in = Input(shape=[1], name="Movie")
mov_embed = Embedding(n_movies, fts, name="Movie_Embed")(movie_in)
flat_movie = Flatten(name="FlattenM")(mov_embed)

# 處理使用者索引的嵌入層
user_in = Input(shape=[1], name="User")
user_inuser_embed = Embedding(n_users, fts, name="User_Embed")(user_in)
flat_user = Flatten(name="FlattenU")(user_inuser_embed)

# 將兩個嵌入層的輸出值串接之後餵入密集層
concat = Concatenate()([flat_movie, flat_user])
dense_1 = Dense(128)(concat)
dense_2 = Dense(32)(dense_1)
out = Dense(1)(dense_2)

# 編譯模型
model = Model([user_in, movie_in], out)
model.compile('adam', 'mean_squared_error')
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Movie (InputLayer)              (None, 1)            0                                            
__________________________________________________________________________________________________
User (InputLayer)               (None, 1)            0                                            
__________________________________________________________________________________________________
Movie_Embed (Embedding)         (None, 1, 5)         48620       Movie[0][0]                      
__________________________________________________________________________________________________
User_Embed (Embedding)          (None, 1, 5)         3050        User[0][0]                       
____________________________________________________________________________________________

In [8]:
# 第 4 部分
# 訓練神經網路
model.fit([train.userId, train.movieId], train.rating, epochs=10, verbose=1)

# 評估神經網路
print("MSE:", 
      metrics.mean_squared_error(test.rating, 
                                 model.predict([test.userId, test.movieId])))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
MSE: 0.7704167223986362
