In [14]:
import pandas as pd
import numpy as np

# Hi JJ
jj = pd.DataFrame(np.array(Rating.objects.filter(user=1).values_list('user_id', 'work_id', 'choice'),
                           dtype=[('user_id', 'i4'), ('work_id', 'i4'), ('choice', 'U25')]))

In [15]:
jj.head()

Unnamed: 0,user_id,work_id,choice
0,1,1146,like
1,1,1875,willsee
2,1,2078,willsee
3,1,3,wontsee
4,1,3117,willsee


In [16]:
from mangaki.utils.values import rating_values

jj['rating'] = jj['choice'].map(rating_values)
jj.head()

Unnamed: 0,user_id,work_id,choice,rating
0,1,1146,like,2.0
1,1,1875,willsee,0.5
2,1,2078,willsee,0.5
3,1,3,wontsee,-0.5
4,1,3117,willsee,0.5


In [17]:
from mangaki.algo.dataset import Dataset

triplets = Rating.objects.exclude(user_id=1).values_list('user_id', 'work_id', 'choice')
dataset = Dataset()
anonymized = dataset.make_anonymous_data(triplets)

In [19]:
jj['encoded_user_id'] = jj['user_id'].map(dataset.encode_user)
jj['encoded_work_id'] = jj['work_id'].map(dataset.encode_work)
jj.head()

Unnamed: 0,user_id,work_id,choice,rating,encoded_user_id,encoded_work_id
0,1,1146,like,2.0,,10341
1,1,1875,willsee,0.5,,6917
2,1,2078,willsee,0.5,,1573
3,1,3,wontsee,-0.5,,7888
4,1,3117,willsee,0.5,,5324


In [20]:
X = anonymized.X
y = anonymized.y

In [21]:
from mangaki.algo.als import MangakiALS

als = MangakiALS()
als.set_parameters(anonymized.nb_users, anonymized.nb_works)
als.fit(X, y)

Computing M: (2139 × 11653)


Chrono: fill and center matrix [7q, 1113ms]
Chrono: factor matrix [7q, 8509ms]


Shapes (2139, 20) (20, 11653)


In [56]:
y_pred = als.predict(X)
als.compute_rmse(y, y_pred)
# 0.924

0.92413970001334411

In [28]:
als.U.shape
# Add a user
als.U = np.vstack((als.U, np.zeros(20)))
als.U.shape

(2141, 20)

In [69]:
als.means = np.concatenate((als.means, [0]))

In [82]:
def train_user(user_id, work_ids, ratings):
    NEW_USER_ID = user_id
    als.means[NEW_USER_ID] = ratings.mean()
    Ru = ratings - ratings.mean()
    Vu = als.VT[:, work_ids]
    Gu = als.lambda_ * len(Ru) * np.eye(als.nb_components)
    als.U[[NEW_USER_ID], :] = np.linalg.solve(Vu.dot(Vu.T) + Gu, Vu.dot(Ru)).T

In [83]:
train_user(2139, jj['encoded_work_id'], jj['rating'])
als.U[2139]

array([-0.14303548, -0.1629117 , -0.09923869, -0.19760937,  0.11942109,
       -0.32526121,  0.18492868, -0.02383607,  0.1816412 ,  0.03937937,
        0.02771371,  0.02618218,  0.0556854 ,  0.10054771, -0.02409849,
       -0.00325536, -0.02481059,  0.114783  ,  0.21336958,  0.03092004])

In [68]:
als.M = als.U.dot(als.VT)

In [61]:
jj['encoded_user_id'] = [2139] * len(Ru)

In [62]:
X_jj = np.array(jj[['encoded_user_id', 'encoded_work_id']]).astype(np.int64)
jj['pred'] = als.predict(X_jj)

In [63]:
jj.head()

Unnamed: 0,user_id,work_id,choice,rating,encoded_user_id,encoded_work_id,pred
0,1,1146,like,2.0,2139,10341,0.735778
1,1,1875,willsee,0.5,2139,6917,1.06824
2,1,2078,willsee,0.5,2139,1573,0.763262
3,1,3,wontsee,-0.5,2139,7888,0.339925
4,1,3117,willsee,0.5,2139,5324,0.540148


In [64]:
als.compute_rmse(jj['rating'], jj['pred'])
# 0.952

1.0834690333964536

In [65]:
titles = dict(Work.objects.values_list('id', 'title'))
jj['title'] = jj['work_id'].map(titles)

jj.sort('pred')

  after removing the cwd from sys.path.


Unnamed: 0,user_id,work_id,choice,rating,encoded_user_id,encoded_work_id,pred,title
395,1,126,willsee,0.5,2139,9183,0.180609,Dragon Ball GT
251,1,18,wontsee,-0.5,2139,4305,0.196237,Fairy Tail
243,1,21,willsee,0.5,2139,11104,0.208779,Highschool of the Dead
270,1,75,wontsee,-0.5,2139,9468,0.209681,High School DxD
323,1,5307,wontsee,-0.5,2139,11263,0.220937,Fairy Tail
29,1,6,dislike,-2.0,2139,1055,0.225559,Bleach
292,1,143,willsee,0.5,2139,7200,0.236263,To LOVE-Ru
315,1,1126,wontsee,-0.5,2139,4619,0.240745,Boku wa Tomodachi ga Sukunai Episode 0
383,1,10365,willsee,0.5,2139,6848,0.254406,Kiss x Sis
608,1,3476,wontsee,-0.5,2139,2843,0.268238,Ikkitousen: Dragon Destiny


In [355]:
NB_QUESTIONS = 10

In [368]:
# Random
import random

all_ratings = dict(zip(jj['encoded_work_id'], jj['rating']))
subset = np.random.choice(np.array(jj['encoded_work_id']), NB_QUESTIONS, replace=False)
y_test = np.array([all_ratings[work_id] for work_id in subset])
print(subset)
train_user(2140, subset, y_test)

[1937 4801 2824 2623  715 9177 4034 7087 8724 5547]


In [369]:
jj.query('encoded_work_id in @subset')

Unnamed: 0,user_id,work_id,choice,rating,encoded_user_id,encoded_work_id,pred,title
65,1,419,dislike,-2.0,2139,2824,1.081679,Mawaru Penguindrum
121,1,1563,like,2.0,2139,7087,1.192337,Ponyo sur la falaise
128,1,746,willsee,0.5,2139,1937,0.669549,Ghost in the Shell: Stand Alone Complex - Tach...
157,1,1306,willsee,0.5,2139,4801,0.808459,Natsume's Book of Friends Four
176,1,781,wontsee,-0.5,2139,2623,0.784534,Ghost in the Shell: Arise - Border:3 Ghost Tears
194,1,6492,willsee,0.5,2139,715,1.244463,Monster
266,1,130,wontsee,-0.5,2139,5547,0.357207,Air Gear
411,1,9092,willsee,0.5,2139,9177,0.396992,Rail Wars!
488,1,7511,willsee,0.5,2139,4034,0.322943,Sword Art Online - Aincrad
631,1,14416,like,2.0,2139,8724,1.285106,Spirited Away


In [370]:
als.M = als.U.dot(als.VT)
als.U[2140]

array([-0.88114882, -0.15880069, -0.28525917, -0.36149601, -0.03939813,
        0.25915399,  0.3786149 ,  0.20925865, -0.14391705, -0.26987842,
       -0.03866946,  0.32678425,  0.32016272, -0.08749871, -0.196268  ,
       -0.20794798, -0.25496892,  0.01392694, -0.01202279, -0.95187918])

In [371]:
X_test = np.column_stack(([2140] * len(subset), subset))
y_pred = als.predict(X_test)
als.compute_rmse(y_test, y_pred)

0.56242319752559311

In [372]:
X_test = np.column_stack(([2140] * len(jj['encoded_work_id']), jj['encoded_work_id']))
y_pred = als.predict(X_test)
als.compute_rmse(jj['rating'], y_pred)

1.2861712207134659

In [361]:
# DPP

In [270]:
y.shape

(346210,)

In [271]:
from mangaki.algo.knn import MangakiKNN

knn = MangakiKNN()
knn.set_parameters(anonymized.nb_users, anonymized.nb_works)
knn.fit(X, y)
L = rbf_kernel(knn.M.T[jj['encoded_work_id']])

In [210]:
from mangaki.utils.dpplib import decompose_kernel, sample_k
from sklearn.metrics.pairwise import rbf_kernel

L = rbf_kernel(als.VT.T[jj['encoded_work_id']])

In [242]:
L.shape

(649, 649)

In [243]:
D, V = decompose_kernel(L)

In [373]:
# From here
subset_pos = sample_k(NB_QUESTIONS, D, V).reshape(NB_QUESTIONS).astype(np.int64)

In [374]:
als.means = np.concatenate((als.means, [0]))
als.U = np.vstack((als.U, np.zeros(20)))

In [375]:
subset = jj['encoded_work_id'][subset_pos]
y_test = np.array([all_ratings[work_id] for work_id in subset])
train_user(2141, subset, y_test)

In [376]:
jj.query('encoded_work_id in @subset')

Unnamed: 0,user_id,work_id,choice,rating,encoded_user_id,encoded_work_id,pred,title
37,1,134,like,2.0,2139,9655,1.157036,Summer Wars
80,1,410,favorite,4.0,2139,8265,1.310537,Porco Rosso
178,1,9936,willsee,0.5,2139,1776,0.974245,Cencoroll
183,1,4369,willsee,0.5,2139,3410,0.739514,Attaque Des Titans (l')
273,1,87,willsee,0.5,2139,1946,0.325259,Sword Art Online II
354,1,148,willsee,0.5,2139,11142,0.746681,Black Lagoon: The Second Barrage
392,1,877,willsee,0.5,2139,8063,0.803639,Goodbye Mr. Despair OAD
411,1,9092,willsee,0.5,2139,9177,0.396992,Rail Wars!
476,1,14293,willsee,0.5,2139,6702,0.876363,UTAMONOGATARI ♫
483,1,8242,willsee,0.5,2139,8352,0.591582,Yona of the Dawn


In [377]:
als.M = als.U.dot(als.VT)
als.U[2141]

array([-0.01710628, -0.13274005, -0.30822378, -0.22863072,  0.16678263,
       -0.35646804,  0.5714343 ,  0.21015158,  0.03017136,  0.23297129,
       -0.05941062,  0.52798474,  0.64742897, -0.19957709, -0.11503318,
       -0.15056268, -0.39358746,  0.0151542 ,  0.83685526,  0.07839983])

In [378]:
X_test = np.column_stack(([2141] * len(jj['encoded_work_id']), jj['encoded_work_id']))
y_pred = als.predict(X_test)
als.compute_rmse(jj['rating'], y_pred)

1.1563861569837124