In [10]:
# 第 1 部分
# 載入函式庫與資料集
from keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from keras.models import Model
from keras.callbacks import LambdaCallback
from sklearn.model_selection import train_test_split
from sklearn import metrics

import numpy as np
import pandas as pd

np.random.seed(123456)
data = pd.read_csv('../Data/ratings.csv')

In [11]:
data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [12]:
# 第 2 部分
# 資料清理
def get_data(data):

    # 刪除時間
    data.drop('timestamp', axis=1, inplace=True)
    
    # 抓出所有使用者跟電影索引
    users = data.userId.unique()
    movies = data.movieId.unique()
 
    # 找出新索引跟原始索引的對應關係
    # 原始索引是moveies[i]，會改成i
    moviemap={}
    for i in range(len(movies)):
        moviemap[movies[i]]=i
    usermap={}
    for i in range(len(users)):
        usermap[users[i]]=i
    
    # 根據對應關係，將原始索引更改成新索(連續整數值)
    data.movieId = data.movieId.apply(lambda x: moviemap[x])    
    data.userId = data.userId.apply(lambda x: usermap[x])    
        
    # 打亂資料
    data = data.sample(frac=1.0).reset_index(drop=True)
    
    # 建立訓練資料、測試資料集
    train, test = train_test_split(data, test_size=0.2)
    
    n_users = len(users)
    n_movies = len(movies)

    return train, test, n_users, n_movies

train, test, n_users, n_movies = get_data(data)

## 只留編譯模型的部分於create_model函式

In [13]:
# 第 3 部分
# 定義基學習器的相關函式
def create_model(n_features=5, 
                 train_model=True, 
                 load_weights=False):

    fts = n_features
    
    # 輸入層接收資料
    # 嵌入層將資料轉換成n維矩陣
    # 展平層將n維矩陣拉直成陣列
    
    # 處理電影索引的嵌入層
    movie_in = Input(shape=[1], name="Movie")
    mov_embed = Embedding(n_movies, fts, name="Movie_Embed")(movie_in)
    flat_movie = Flatten(name="FlattenM")(mov_embed)
    
    # 處理使用者索引的嵌入層
    user_in = Input(shape=[1], name="User")
    user_inuser_embed = Embedding(n_users, fts, name="User_Embed")(user_in)
    flat_user = Flatten(name="FlattenU")(user_inuser_embed)
    
    # 將兩個嵌入層的輸出值串接之後餵入密集層
    concat = Concatenate()([flat_movie, flat_user])
    dense_1 = Dense(128)(concat)
    dense_2 = Dense(32)(dense_1)
    out = Dense(1)(dense_2)
    
    # 編譯模型
    model = Model([user_in, movie_in], out)
    model.compile('adam', 'mean_squared_error')
    
    return model

## 定義一個回呼函數(Callback function)可以在訓練基學習器的過程中每次epoch結束時，將基學習器的參數儲存在一個字典裡

In [None]:
# https://stackoverflow.com/questions/53943877/how-to-fix-unsupported-operand-error-in-keras-callbacks-earlystopping

In [35]:
model5 = create_model(5)

In [78]:
# https://keras.io/guides/writing_your_own_callbacks/
class LossAndErrorPrintingCallback(keras.callbacks.Callback):
#     def on_train_batch_end(self, batch, logs=None):
#         print(
#             "Up to batch {}, the average loss is {:7.2f}.".format(batch, logs["loss"])
#         )

#     def on_test_batch_end(self, batch, logs=None):
#         print(
#             "Up to batch {}, the average loss is {:7.2f}.".format(batch, logs["loss"])
#         )

    def on_epoch_end(self, epoch, logs=None):
        print(epoch, logs)


model = create_model()
model.fit([train.userId, train.movieId],
                    train.rating,
          epochs=2,
          verbose=1,
          callbacks=[LossAndErrorPrintingCallback()],
)



Epoch 1/2
0 {'loss': 0.9564280376564607}
Epoch 2/2
1 {'loss': 0.7294657855358513}


<keras.callbacks.callbacks.History at 0x1897e9dcbc8>

In [85]:
# https://keras.io/guides/writing_your_own_callbacks/

class EarlyStoppingAtMinLoss(keras.callbacks.Callback):
    """Stop training when the loss is at its min, i.e. the loss stops decreasing.

  Arguments:
      patience: Number of epochs to wait after min has been hit. After this
      number of no improvement, training stops.
  """

    def __init__(self, patience=0):
        super(EarlyStoppingAtMinLoss, self).__init__()
        self.patience = patience
        # best_weights to store the weights at which the minimum loss occurs.
        self.best_weights = None

#     def on_train_begin(self, logs=None):
#         # The number of epoch it has waited when loss is no longer minimum.
#         self.wait = 0
#         # The epoch the training stops at.
#         self.stopped_epoch = 0
#         # Initialize the best as infinity.
        self.best = np.Inf

    def on_epoch_end(self, epoch, logs=None):
        current = logs.get("loss")
        if np.less(current, self.best):
            self.best = current
            self.wait = 0
            # Record the best weights if current results is better (less).
            self.best_weights = self.model.get_weights()
        else:
            self.wait += 1
            if self.wait >= self.patience:
                self.stopped_epoch = epoch
                self.model.stop_training = True
                print("Restoring model weights from the end of the best epoch.")
                self.model.set_weights(self.best_weights)

#     def on_train_end(self, logs=None):
#         if self.stopped_epoch > 0:
#             print("Epoch %05d: early stopping" % (self.stopped_epoch + 1))


model = create_model()
model.fit([train.userId, train.movieId],
                    train.rating,
          epochs=2,
          verbose=1,
          callbacks=[EarlyStoppingAtMinLoss()],
)



Epoch 1/2
Epoch 2/2


<keras.callbacks.callbacks.History at 0x18900120bc8>

In [None]:
# https://blog.csdn.net/breeze5428/article/details/80875323
# study

In [113]:
# https://stackoverflow.com/questions/51186330/save-model-weights-at-the-end-of-every-n-epochs

mc = keras.callbacks.ModelCheckpoint('weights{epoch:08d}.h5', 
                                     save_weights_only=True,  # 若設置為True，則只保存模型權重，否則將保存整個模型（包括模型結構，配置信息等）
                                     period=5)  # CheckPoint之間的間隔的epoch數
model.fit([train.userId, train.movieId],
                    train.rating,
          epochs=2,
          verbose=1, callbacks=[mc])

Epoch 1/2
Epoch 2/2


<keras.callbacks.callbacks.History at 0x189017d47c8>

In [119]:
mc.model.get_weights()

[array([[ 0.1168268 , -0.08998933,  0.13771272,  0.1354956 ,  0.0913923 ],
        [ 0.03764403,  0.01613329, -0.02355681,  0.02295799, -0.00865662],
        [ 0.09878837, -0.08519192,  0.17101151,  0.11389064,  0.10772514],
        ...,
        [-0.13834769,  0.043893  , -0.06256793, -0.07633436, -0.14659919],
        [-0.01657953,  0.00771682, -0.02444046,  0.00836712, -0.00423916],
        [ 0.01648791, -0.02645889, -0.01764667,  0.02412969, -0.0172171 ]],
       dtype=float32),
 array([[-0.28178084, -0.23485656,  0.18839906,  0.1842154 , -0.00324213],
        [-0.0255482 , -0.04776986, -0.02001251, -0.00028417, -0.02157106],
        [ 0.24473113,  0.27094802, -0.28041205, -0.28138185,  0.04199642],
        ...,
        [ 0.04760585, -0.00594525, -0.04479581,  0.0369049 ,  0.03734703],
        [ 0.07084848,  0.05005903, -0.03281233, -0.04903731,  0.0481046 ],
        [-0.09452103, -0.08846413,  0.03650123,  0.03404885, -0.03009002]],
       dtype=float32),
 array([[ 0.15336093, -0.1

In [223]:
acc=[]
lc = LambdaCallback(  on_epoch_end=lambda epochs,logs:acc.append([logs.get('accuracy'), logs.get('val_loss')]))

avg_weight = []
print_weights = LambdaCallback(on_epoch_end=lambda batch, logs: avg_weight.append([(model.layers[0].get_weights())[0].mean(),logs.get('acc')]))


weights_dict_ = {}
weight_callback = LambdaCallback(on_epoch_end=lambda epoch, logs: weights_dict_.update({epoch:model.layers[0].get_weights()}))

history = model.fit([train.userId, train.movieId],
                    train.rating,
          epochs=2,
          verbose=1, callbacks=[weight_callback])

Epoch 1/2
Epoch 2/2


In [230]:
model.weights

[<tf.Variable 'Movie_Embed_21/embeddings:0' shape=(9724, 5) dtype=float32>,
 <tf.Variable 'User_Embed_21/embeddings:0' shape=(610, 5) dtype=float32>,
 <tf.Variable 'dense_64/kernel:0' shape=(10, 128) dtype=float32>,
 <tf.Variable 'dense_64/bias:0' shape=(128,) dtype=float32>,
 <tf.Variable 'dense_65/kernel:0' shape=(128, 32) dtype=float32>,
 <tf.Variable 'dense_65/bias:0' shape=(32,) dtype=float32>,
 <tf.Variable 'dense_66/kernel:0' shape=(32, 1) dtype=float32>,
 <tf.Variable 'dense_66/bias:0' shape=(1,) dtype=float32>]

In [224]:
weights_dict_

{0: [], 1: []}

In [219]:
history.history.keys()

dict_keys(['loss'])

[array([[ 0.09053749, -0.0654102 ,  0.10844129,  0.1164945 ,  0.05998673],
        [ 0.02963471,  0.01982243, -0.03285991,  0.00555193, -0.01648581],
        [ 0.06897778, -0.06594787,  0.13156101,  0.08413618,  0.06865222],
        ...,
        [-0.13724647,  0.04262668, -0.06070407, -0.07489233, -0.14427912],
        [-0.0101266 , -0.0003538 , -0.017389  ,  0.01370858,  0.00198212],
        [ 0.01648791, -0.02645889, -0.01764667,  0.02412969, -0.0172171 ]],
       dtype=float32),
 array([[-0.2604033 , -0.21135528,  0.20776267,  0.1908926 ,  0.02987204],
        [-0.03994836, -0.03738501, -0.00254506,  0.02436391,  0.00675101],
        [ 0.18919691,  0.20764008, -0.2624756 , -0.22690986,  0.09836693],
        ...,
        [ 0.05900272,  0.00830745,  0.00455583, -0.00056477,  0.01343446],
        [ 0.04520173,  0.02061272, -0.03078715, -0.02565353,  0.0554044 ],
        [-0.07803461, -0.07059213,  0.0554729 ,  0.03982537, -0.07071695]],
       dtype=float32),
 array([[ 0.16305578, -0.1

In [112]:
len(h.model.get_weights()[0])

9724

In [89]:
h.model.get_weights()

[array([[ 0.07276482, -0.06521675,  0.09494565,  0.11796512,  0.04905754],
        [ 0.03763324,  0.00418596, -0.02507049,  0.02304847, -0.00924062],
        [ 0.03872851, -0.03703312,  0.09912799,  0.08059283,  0.03255887],
        ...,
        [-0.1435695 ,  0.04807957, -0.06553981, -0.08638082, -0.14955439],
        [ 0.01101597, -0.0223999 ,  0.00260028,  0.03528346,  0.02119745],
        [ 0.01648791, -0.02645889, -0.01764667,  0.02412969, -0.0172171 ]],
       dtype=float32),
 array([[-2.0675877e-01, -1.6170095e-01,  1.8578410e-01,  1.5414038e-01,
         -5.0105643e-02],
        [-6.2785044e-02, -6.3513145e-02,  3.7298996e-02,  3.3678715e-03,
         -1.7704222e-02],
        [ 1.3654581e-01,  1.5819500e-01, -2.0944935e-01, -2.0608018e-01,
          1.0234569e-01],
        ...,
        [ 4.1206617e-02, -8.6588552e-06,  1.1125995e-02, -5.1906002e-03,
          6.3741938e-03],
        [ 5.0820142e-02,  3.7953772e-02, -3.7302617e-02, -2.5076460e-02,
          1.5977677e-02],
     

In [206]:
# 第 4 部分
# 定義回呼函數並訓練模型
weights_dict = {}
weight_callback = LambdaCallback(on_epoch_end=lambda epoch, logs: weights_dict.update({epoch:model.get_weights()}))


model = create_model(5)

history = model.fit([train.userId, train.movieId],
                    train.rating, 
                    epochs=10, 
                    callbacks=[weight_callback],
                    verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [211]:
history.history.keys()

dict_keys(['loss'])

In [207]:
len(weights_dict) 

10

In [208]:
weights_dict[9] 

[array([[ 0.16831529,  0.04529983, -0.11541461, -0.12261596, -0.10509757],
        [-0.00850782,  0.02272367,  0.03372698,  0.00627651, -0.1120891 ],
        [ 0.16220663,  0.11641676, -0.07183731, -0.10556759, -0.04595497],
        ...,
        [-0.10286915, -0.08096343,  0.11954395,  0.14518508,  0.13354221],
        [ 0.04689776,  0.00739368, -0.00493831, -0.04366955, -0.0677544 ],
        [ 0.0224624 ,  0.04178615,  0.02189574,  0.01911179,  0.04388652]],
       dtype=float32),
 array([[ 0.18446396, -0.32468605, -0.22613357, -0.174428  ,  0.02995373],
        [ 0.19289885, -0.01055067, -0.08803312, -0.15899211, -0.05517348],
        [-0.15938433,  0.36278868,  0.2808494 ,  0.21153304,  0.06805439],
        ...,
        [ 0.01396004,  0.04183837,  0.01552383, -0.05713447, -0.00542847],
        [-0.07549749,  0.07787583,  0.00802237,  0.01766583, -0.04357839],
        [-0.00620779, -0.14974405, -0.09353119, -0.10505418,  0.00469801]],
       dtype=float32),
 array([[ 0.1637671 , -0.1

In [209]:
# weights_dict[1]
history.model.get_weights()

[array([[ 0.16831529,  0.04529983, -0.11541461, -0.12261596, -0.10509757],
        [-0.00850782,  0.02272367,  0.03372698,  0.00627651, -0.1120891 ],
        [ 0.16220663,  0.11641676, -0.07183731, -0.10556759, -0.04595497],
        ...,
        [-0.10286915, -0.08096343,  0.11954395,  0.14518508,  0.13354221],
        [ 0.04689776,  0.00739368, -0.00493831, -0.04366955, -0.0677544 ],
        [ 0.0224624 ,  0.04178615,  0.02189574,  0.01911179,  0.04388652]],
       dtype=float32),
 array([[ 0.18446396, -0.32468605, -0.22613357, -0.174428  ,  0.02995373],
        [ 0.19289885, -0.01055067, -0.08803312, -0.15899211, -0.05517348],
        [-0.15938433,  0.36278868,  0.2808494 ,  0.21153304,  0.06805439],
        ...,
        [ 0.01396004,  0.04183837,  0.01552383, -0.05713447, -0.00542847],
        [-0.07549749,  0.07787583,  0.00802237,  0.01766583, -0.04357839],
        [-0.00620779, -0.14974405, -0.09353119, -0.10505418,  0.00469801]],
       dtype=float32),
 array([[ 0.1637671 , -0.1

In [168]:
# final_weights = (np.array(weights_dict[9]) + 
#                  np.array(weights_dict[8]) + 
#                  np.array(weights_dict[6])) / 3.0
# final_weights.shape # (8,)
# final_weights

  
  This is separate from the ipykernel package so we can avoid doing imports until


array([array([[ 0.10645285,  0.07706374, -0.10491475,  0.08354305,  0.13704465],
              [-0.03561612,  0.03452458,  0.0003613 , -0.00326335,  0.00488208],
              [ 0.12496477,  0.15349808, -0.10352463,  0.05668099,  0.06190388],
              ...,
              [-0.08349016, -0.08113912,  0.09085006, -0.05685028, -0.06027411],
              [-0.02007741, -0.02767142, -0.04079578,  0.05507313,  0.01185412],
              [ 0.03871677,  0.03191639, -0.0022112 , -0.00894641,  0.00093371]],
             dtype=float32)                                                      ,
       array([[-0.27024624,  0.29249737, -0.00961705, -0.20385313,  0.18578483],
              [-0.02951139,  0.04423621,  0.03463923, -0.06193017,  0.11976314],
              [ 0.26488063, -0.31392643,  0.13787119,  0.23301478, -0.25805914],
              ...,
              [ 0.00288111, -0.0584236 , -0.11846658,  0.02821096,  0.07695859],
              [ 0.03346786, -0.06286823, -0.06309313,  0.03132626, -

In [210]:
# 第 5 部分
# 集成模型
print('Base Learner')
print(metrics.mean_squared_error(test.rating, model.predict([test.userId, test.movieId])))

final_weights = (np.array(weights_dict[9]) + 
                 np.array(weights_dict[8]) + 
                 np.array(weights_dict[6])) / 3.0
model.set_weights(final_weights)

print('Ensemble')
print(metrics.mean_squared_error(test.rating, model.predict([test.userId, test.movieId])))

Base Learner
0.7754712384063417


  import sys
  


Ensemble
0.7743860140244611
