In [3]:
!pip install matrix_factorization

Collecting matrix_factorization
  Using cached matrix_factorization-1.3.tar.gz (12 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: matrix-factorization
  Building wheel for matrix-factorization (setup.py): started
  Building wheel for matrix-factorization (setup.py): finished with status 'done'
  Created wheel for matrix-factorization: filename=matrix_factorization-1.3-py3-none-any.whl size=16467 sha256=f8451f2bdcd9a3b4f09442f6675d0e0ecf19b3b8cd334b5056bcfe5e61cf1326
  Stored in directory: c:\users\taiminh\appdata\local\pip\cache\wheels\f6\34\7b\11dd792f060e9c2d7230855147d3bfadbd2b6bbcf5132dcbbc
Successfully built matrix-factorization
Installing collected packages: matrix-factorization
Successfully installed matrix-factorization-1.3


You should consider upgrading via the 'c:\users\taiminh\anaconda3\python.exe -m pip install --upgrade pip' command.


In [4]:
NUM_EPOCHS = 100
UPDATE_N_EPOCHS = 10

In [7]:
# Data manipulation
import numpy as np
import pandas as pd

# Modeling
from matrix_factorization import BaselineModel, KernelMF, train_update_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Other
import os
import random
import sys
    
# Saving model
import pickle

rand_seed = 2
np.random.seed(rand_seed)
random.seed(rand_seed)

DATA_PATH = './data/Ratings.csv'
COL_NAMES = ['user_id', 'item_id', 'rating']

def read_data(data_path, col_names):
    return pd.read_csv(data_path, header=0, names=col_names, usecols=[0, 1, 2])
    
def split_train_test(dataframe):
    X = data[['user_id', 'item_id']]
    y = data['rating']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    return (X_train, X_test, y_train, y_test)

class MF:
    def __init__(self, X_train, y_train, num_epochs=NUM_EPOCHS, save_model_path=None):
        """
            save_model_path (str): * do not include ".pkl"
            * if save_model_path is not none, other parameters do not need any values
        """
        if save_model_path is None:
            self.model = self.__train(X_train, y_train, num_epochs)
        else:
            self = MF.load_model(save_model_path)

    def __train(self, X_train, y_train, num_epochs):
        matrix_fact = KernelMF(n_epochs = num_epochs, n_factors = 100, verbose = 1, lr = 0.001, reg = 0.2)
        matrix_fact.fit(X_train, y_train)
        return matrix_fact

    def update(self, new_X_train: pd.DataFrame, new_y_train: pd.Series, n_epochs=UPDATE_N_EPOCHS):
        """
            new_X_train: a dataframe with two cols: user_id, item_id
            new_y_train: a series of rating corresponds to new_X_train
        """
        self.model.update_users(new_X_train, new_y_train, lr=0.001, n_epochs=n_epochs, verbose=1)

    def evaluate(self, X_test, y_test):
        """
            compute RMSE on test data
        """
        pred = self.model.predict(X_test)
        rmse = mean_squared_error(y_test, pred, squared = False)
        print(f'\nTest RMSE: {rmse:.4f}')
    
    def recommend(self, user_id, items_known, numMovie: int):
        return self.model.recommend(user=user_id, items_known=items_known, amount=numMovie)

    def save_model(self, save_path):
        """
            savePath (str): * do not include ".pkl"
        """
        file_to_store = open(save_path + ".pkl", "wb")
        pickle.dump(self, file_to_store)
        file_to_store.close()

    def load_model(save_path):
        """
            savePath (str): * do not include ".pkl"
        """
        file_to_read = open(save_path + ".pkl", "rb")
        mf = pickle.load(file_to_read)
        file_to_read.close()
        return mf

In [8]:
data = read_data(DATA_PATH, COL_NAMES)
data

Unnamed: 0,user_id,item_id,rating
0,1,110,1.0
1,1,147,4.5
2,1,858,5.0
3,1,1246,5.0
4,1,1968,4.0
...,...,...,...
996462,23624,1358,3.5
996463,23624,1392,4.0
996464,23624,1394,4.0
996465,23624,1914,3.5


In [23]:
X_train, X_test, y_train, y_test = split_train_test(data)

In [24]:
# create MF model and train
mf = MF(X_train, y_train, num_epochs=NUM_EPOCHS)

Epoch  1 / 100  -  train_rmse: 0.9733020323594145
Epoch  2 / 100  -  train_rmse: 0.9476490603350869
Epoch  3 / 100  -  train_rmse: 0.9327003300254094
Epoch  4 / 100  -  train_rmse: 0.9224235170074027
Epoch  5 / 100  -  train_rmse: 0.9147965722796519
Epoch  6 / 100  -  train_rmse: 0.9087558628610237
Epoch  7 / 100  -  train_rmse: 0.9039270279522201
Epoch  8 / 100  -  train_rmse: 0.8998851152144685
Epoch  9 / 100  -  train_rmse: 0.8963883452123106
Epoch  10 / 100  -  train_rmse: 0.8934248352516264
Epoch  11 / 100  -  train_rmse: 0.8909284630377061
Epoch  12 / 100  -  train_rmse: 0.8885576539211111
Epoch  13 / 100  -  train_rmse: 0.8865483996490582
Epoch  14 / 100  -  train_rmse: 0.8846950696997883
Epoch  15 / 100  -  train_rmse: 0.8830619896066283
Epoch  16 / 100  -  train_rmse: 0.8815863922051093
Epoch  17 / 100  -  train_rmse: 0.8801574367575242
Epoch  18 / 100  -  train_rmse: 0.8790038319474636
Epoch  19 / 100  -  train_rmse: 0.8777577393587419
Epoch  20 / 100  -  train_rmse: 0.876748

In [26]:
mf.evaluate(X_test, y_test)


Test RMSE: 0.8799


In [None]:
user = 200
items_known = data.query('user_id == @user')['item_id']
recommendation = mf.recommend(user, items_known, numMovie=12)

In [None]:
recommendation

Unnamed: 0,user_id,item_id,rating_pred
149,200,260,5.0
114,200,318,4.764187
224,200,750,4.658534
104,200,4993,4.597836
258,200,608,4.58967
164,200,4226,4.489451
194,200,858,4.474746
343,200,2019,4.471256
86,200,296,4.464933
338,200,947,4.441132


In [None]:
user_item = pd.DataFrame(data={'user_id': [int('1')],
                               'item_id': [int('102899')]})
rating = pd.Series(data=[float('4.3')]) 

In [None]:
mf.update(user_item, rating)

Epoch  1 / 10  -  train_rmse: 0.8046181550170037
Epoch  2 / 10  -  train_rmse: 0.8030370342550008
Epoch  3 / 10  -  train_rmse: 0.8014590283401906
Epoch  4 / 10  -  train_rmse: 0.7998841311362463
Epoch  5 / 10  -  train_rmse: 0.7983123365189302
Epoch  6 / 10  -  train_rmse: 0.7967436383760695
Epoch  7 / 10  -  train_rmse: 0.7951780306075324
Epoch  8 / 10  -  train_rmse: 0.7936155071252049
Epoch  9 / 10  -  train_rmse: 0.7920560618529668
Epoch  10 / 10  -  train_rmse: 0.7904996887266673


In [None]:
mf.save_model('./model/mf')

In [None]:
del mf

In [None]:
load_mf = MF.load_model('./model/mf') 

In [None]:
load_mf.recommend(user, items_known, 10)

Unnamed: 0,user_id,item_id,rating_pred
149,200,260,5.0
114,200,318,4.764187
224,200,750,4.658534
104,200,4993,4.597836
258,200,608,4.58967
164,200,4226,4.489451
194,200,858,4.474746
343,200,2019,4.471256
86,200,296,4.464933
338,200,947,4.441132


In [None]:
load_mf.update(user_item, rating)

Epoch  1 / 10  -  train_rmse: 0.8522550348692248
Epoch  2 / 10  -  train_rmse: 0.8505800682701294
Epoch  3 / 10  -  train_rmse: 0.8489084013968577
Epoch  4 / 10  -  train_rmse: 0.8472400277488679
Epoch  5 / 10  -  train_rmse: 0.8455749408384232
Epoch  6 / 10  -  train_rmse: 0.8439131341905699
Epoch  7 / 10  -  train_rmse: 0.8422546013431091
Epoch  8 / 10  -  train_rmse: 0.8405993358465729
Epoch  9 / 10  -  train_rmse: 0.838947331264198
Epoch  10 / 10  -  train_rmse: 0.8372985811719031
