In [60]:
import sqlalchemy
import sqlite3 as sql
import pandas as pd
import numpy as np
from matrix_factorization import KernelMF, train_update_test_split
import itertools
import pickle

In [31]:
database = 'website/database.db'
connection = sql.connect(database)

In [32]:
query = '''SELECT * FROM subs'''
df = pd.read_sql_query(query, connection)

In [33]:
df

Unnamed: 0,user_id,note_id
0,2,1
1,1,2
2,3,4
3,3,3
4,3,5
5,3,6
6,3,8
7,3,7
8,4,2
9,4,3


In [9]:
df['user_id'].unique(), df['note_id'].unique()

(array([2, 1, 3, 4], dtype=int64),
 array([1, 2, 4, 3, 5, 6, 8, 7], dtype=int64))

In [10]:
ls = list()
for i in df.values:
    print(i)

[2 1]
[1 2]
[3 4]
[3 3]
[3 5]
[3 6]
[3 8]
[3 7]
[4 2]
[4 3]
[4 4]
[4 5]
[4 6]
[2 3]
[2 2]
[2 4]
[2 5]
[2 7]


In [11]:
df_full = pd.DataFrame(itertools.product(df['user_id'].unique(), df['note_id'].unique()), columns=['user_id', 'note_id'])

In [12]:
target = list()
for idx, value in enumerate(df_full[['user_id', 'note_id']].values.tolist()):
    target.append(1) if value in df.values.tolist() else target.append(0)  
df_full['target'] = target
df_full.head(5)

Unnamed: 0,user_id,note_id,target
0,2,1,1
1,2,2,1
2,2,4,1
3,2,3,1
4,2,5,1


In [54]:
df_full = df_full[(df_full['user_id']!=1) | (df_full['target']!=0)]

In [55]:
x_train = df_full.rename(columns = {'note_id': 'item_id'}, inplace = False).drop(['target'], axis=1)
y_train = df_full['target']

In [56]:
matrix_fact = KernelMF(n_epochs = 20, n_factors = 2, verbose = 1, lr = 0.001)
matrix_fact.fit(x_train, y_train)

Epoch  1 / 20  -  train_rmse: 0.44572883968207666
Epoch  2 / 20  -  train_rmse: 0.4452465514162804
Epoch  3 / 20  -  train_rmse: 0.444769836454211
Epoch  4 / 20  -  train_rmse: 0.44429731137134937
Epoch  5 / 20  -  train_rmse: 0.44382881707564265
Epoch  6 / 20  -  train_rmse: 0.4433655445394877
Epoch  7 / 20  -  train_rmse: 0.442906612220748
Epoch  8 / 20  -  train_rmse: 0.442451924518362
Epoch  9 / 20  -  train_rmse: 0.44200237639970397
Epoch  10 / 20  -  train_rmse: 0.4415565699342561
Epoch  11 / 20  -  train_rmse: 0.44111510658154557
Epoch  12 / 20  -  train_rmse: 0.4406780577270475
Epoch  13 / 20  -  train_rmse: 0.4402451430397552
Epoch  14 / 20  -  train_rmse: 0.439816628715258
Epoch  15 / 20  -  train_rmse: 0.4393917130743635
Epoch  16 / 20  -  train_rmse: 0.43897081158623324
Epoch  17 / 20  -  train_rmse: 0.4385542037009738
Epoch  18 / 20  -  train_rmse: 0.4381408270388969
Epoch  19 / 20  -  train_rmse: 0.43773253483861724
Epoch  20 / 20  -  train_rmse: 0.43732698496727956


KernelMF(gamma=0.5, lr=0.001, n_epochs=20, n_factors=2)

In [57]:
user = 1
items_known = x_train.query('user_id == @user')['item_id']
matrix_fact.recommend(user=user, items_known=items_known)

Unnamed: 0,user_id,item_id,rating_pred
6,1,4,0.756113
5,1,7,0.733502
1,1,3,0.728544
0,1,6,0.726745
2,1,5,0.726395
4,1,8,0.721123
3,1,1,0.703677


In [63]:
matrix_fact.recommend(user=1, amount=5)['item_id'].to_list()

[4, 7, 2, 3, 6]