In [1]:
import pandas as pd
from nmf_model import NMFModel

In [2]:
train_data=pd.read_csv("../datasets/assignment/train_set.csv")
test_data=pd.read_csv("../datasets/assignment/test_set.csv")

In [3]:
user_train_input=train_data['userId'].values.tolist()
user_test_input=test_data['userId'].values.tolist()

item_train_input=train_data['movieId'].values.tolist()
item_test_input=test_data['movieId'].values.tolist()

label_train_input=train_data['preference'].values.tolist()
label_test_input=test_data['preference'].values.tolist()

In [4]:
num_users=len(train_data.userId.unique())+1
num_items=max(item_train_input)+1

In [5]:
model_all=NMFModel(user_train_input, item_train_input, label_train_input, num_users, num_items)

In [6]:
def cal_result(pred_result, label_test_input):
    ans=0
    for i in range(len(pred_result)):
        if label_test_input==0:
            continue
        elif label_test_input[i]==1 and pred_result[i]>0:
            ans+=1
        elif label_test_input[i]==-1 and pred_result[i]<0:
            ans+=1
        else:
            ans-=1
    return ans

### gmf

In [7]:
gmf_model=model_all.get_gmf_model()

In [8]:
model_all.print_graph('gmf')

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
item_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
user_embedding (Embedding)      (None, 1, 16)        2215904     user_input[0][0]                 
__________________________________________________________________________________________________
item_embedding (Embedding)      (None, 1, 16)        2087856     item_input[0][0]                 
______________________________________________________________________________________________

In [10]:
gmf_hist=model_all.fit_model('gmf', './pretrained/training_2/gmf')

Train on 15971866 samples
Epoch 1/5
Epoch 00001: saving model to ./pretrained/training_2/gmf
Epoch 2/5
Epoch 00002: saving model to ./pretrained/training_2/gmf
Epoch 3/5
Epoch 00003: saving model to ./pretrained/training_2/gmf
Epoch 4/5
Epoch 00004: saving model to ./pretrained/training_2/gmf
Epoch 5/5
Epoch 00005: saving model to ./pretrained/training_2/gmf


In [9]:
gmf_model=model_all.load_weight('gmf', './pretrained/training_2/gmf')

In [None]:
gmf_pred=model_all.predict('gmf', user_test_input, item_test_input)


In [None]:
cal_result(gmf_pred, label_test_input)

928167

### mlp

In [10]:
mlp_model=model_all.get_mlp_model(num_layers=3)

In [11]:
model_all.print_graph('mlp')

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
item_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
user_embedding (Embedding)      (None, 1, 32)        4431808     user_input[0][0]                 
__________________________________________________________________________________________________
item_embedding (Embedding)      (None, 1, 32)        4175712     item_input[0][0]                 
____________________________________________________________________________________________

In [12]:
mlp_model=model_all.load_weight('mlp', './pretrained/training_2/mlp')

In [15]:
mlp_hist=model_all.fit_model('mlp', './pretrained/training_2/mlp')

Train on 15971866 samples
Epoch 1/5
Epoch 00001: saving model to ./pretrained/training_2/mlp
Epoch 2/5
Epoch 00002: saving model to ./pretrained/training_2/mlp
Epoch 3/5
Epoch 00003: saving model to ./pretrained/training_2/mlp
Epoch 4/5
Epoch 00004: saving model to ./pretrained/training_2/mlp
Epoch 5/5
Epoch 00005: saving model to ./pretrained/training_2/mlp


In [None]:
mlp_pred=model_all.predict('mlp', user_test_input, item_test_input)


In [None]:
cal_result(mlp_pred, label_test_input)

902315

### nmf

In [13]:
nmf_model=model_all.get_nmf_model()

In [14]:
model_all.print_graph('nmf')

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
item_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
MLP_user_embedding (Embedding)  (None, 1, 32)        4431808     user_input[0][0]                 
__________________________________________________________________________________________________
MLP_item_embedding (Embedding)  (None, 1, 32)        4175712     item_input[0][0]                 
____________________________________________________________________________________________

In [15]:
nmf_pred=model_all.fit_model('nmf', './pretrained/training_2/nmf')

Train on 15971866 samples
Epoch 1/5
Epoch 00001: saving model to ./pretrained/training_2/nmf
Epoch 2/5
Epoch 00002: saving model to ./pretrained/training_2/nmf
Epoch 3/5
Epoch 00003: saving model to ./pretrained/training_2/nmf
Epoch 4/5
Epoch 00004: saving model to ./pretrained/training_2/nmf
Epoch 5/5
Epoch 00005: saving model to ./pretrained/training_2/nmf


In [None]:
nmf_pred=model_all.predict('nmf', user_test_input, item_test_input)


In [None]:
cal_result(nmf_pred, label_test_input)

871565

In [22]:
nmf_pred

array([0.31575507, 0.6557944 , 0.9454955 , ..., 0.55694944, 0.22265647,
       0.91134447], dtype=float32)

In [23]:
gmf_pred

array([0.28996745, 0.58728915, 0.74631387, ..., 0.31497043, 0.45203266,
       0.8878511 ], dtype=float32)

In [24]:
mlp_pred

array([-0.08990841,  0.5454825 ,  0.82680434, ...,  0.2829082 ,
        0.01542157,  0.8529042 ], dtype=float32)

In [26]:
import numpy as np

In [27]:
x=np.matrix([gmf_pred, mlp_pred, nmf_pred])

In [28]:
s=x.sum(axis=0)

In [38]:
s[0][0]

matrix([[0.5158141, 1.7885661, 2.5186138, ..., 1.1548281, 0.6901107,
         2.6520998]], dtype=float32)

In [42]:
arr=s.getA()

In [44]:
arr[0]

array([0.5158141, 1.7885661, 2.5186138, ..., 1.1548281, 0.6901107,
       2.6520998], dtype=float32)

In [45]:
cal_result(arr[0], label_test_input)

952481

In [48]:
user_test_input

[118853,
 1734,
 65850,
 99208,
 133444,
 36468,
 78722,
 9720,
 120385,
 5348,
 129046,
 57225,
 861,
 99991,
 20054,
 2287,
 10023,
 2724,
 67215,
 37161,
 121723,
 64383,
 96740,
 111949,
 88363,
 115904,
 112093,
 23068,
 55726,
 47335,
 43839,
 30907,
 136250,
 92584,
 84952,
 71804,
 75700,
 22231,
 73213,
 135792,
 93895,
 78289,
 7977,
 97438,
 89032,
 115811,
 30709,
 120274,
 60795,
 2150,
 29500,
 75349,
 30503,
 131655,
 8379,
 115855,
 129169,
 113584,
 62942,
 29099,
 72284,
 123487,
 22739,
 74520,
 15544,
 45529,
 132853,
 66847,
 76292,
 88032,
 71020,
 108507,
 130178,
 93922,
 115928,
 137318,
 34423,
 46112,
 12197,
 39107,
 39405,
 76688,
 28860,
 84910,
 42929,
 121513,
 53758,
 112311,
 127981,
 32146,
 6111,
 79497,
 20028,
 61368,
 81507,
 28847,
 61324,
 117736,
 124195,
 4108,
 94446,
 29296,
 2523,
 170,
 120315,
 68250,
 112282,
 3157,
 271,
 132718,
 108173,
 108106,
 63345,
 37518,
 41370,
 8067,
 71618,
 103709,
 41563,
 17798,
 103109,
 92107,
 136555,


In [49]:
item_test_input

[4232,
 1701,
 535,
 6832,
 318,
 3683,
 1834,
 262,
 296,
 25,
 786,
 1215,
 613,
 5064,
 3107,
 6213,
 673,
 1128,
 77233,
 802,
 1127,
 33672,
 2683,
 97866,
 1537,
 249,
 653,
 57370,
 1411,
 65,
 300,
 31,
 520,
 5881,
 3408,
 4306,
 329,
 2312,
 464,
 520,
 98809,
 47,
 637,
 33166,
 158,
 1917,
 327,
 107627,
 59387,
 3639,
 1221,
 173,
 457,
 380,
 4857,
 1263,
 2797,
 6863,
 915,
 1076,
 1584,
 1747,
 44195,
 539,
 3578,
 356,
 2639,
 2906,
 2571,
 1060,
 95,
 71462,
 1573,
 912,
 1387,
 2312,
 6541,
 3,
 457,
 778,
 3037,
 1234,
 89864,
 4787,
 1196,
 2428,
 1234,
 3791,
 2406,
 7438,
 45081,
 4321,
 34534,
 1552,
 161,
 4191,
 785,
 345,
 589,
 4993,
 1573,
 2701,
 166,
 1078,
 1015,
 4084,
 3129,
 1298,
 86377,
 318,
 5452,
 592,
 2890,
 1952,
 920,
 7323,
 5382,
 252,
 589,
 72356,
 357,
 50,
 7163,
 69908,
 4901,
 2985,
 6323,
 3999,
 541,
 2385,
 7354,
 3704,
 410,
 8341,
 4969,
 1206,
 253,
 56801,
 51255,
 593,
 1221,
 215,
 3489,
 2997,
 6378,
 44058,
 26527,
 8961,
 

In [47]:
label_test_input

[0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 -1.0,
 1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 -1.0,
 0.0,
 -1.0,
 1.0,
 0.0,
 -1.0,
 0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 1.0,
 0.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.0,
 -1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 -1.0,
 1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 0.0,
 -1.0,
 1.0,
 1.0,
 -1.0,
 1.0,
 0.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 -1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 -1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 1.0,
 0.0,
 0.0,
 1.0,
 1.0,
 -1.0,
 0.0,
 -1.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 -1.0,
 -1.0,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 1.0,
 -1.0,
 0.0,
 