In [None]:
import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix

import surprise

from surprise import Dataset
from surprise import Reader
from surprise import SVD, SVDpp
from surprise.model_selection import GridSearchCV

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

In [None]:
MAX_RATING=1.0
PRECISION=5
FILTER_THRESHOLD=0.6
MAX_PREDICTION=50

In [None]:
headers = ['event','entity_type','entity_id','target_entity_type','target_entity_id','timestamp','properties']
header_types = {'entity_id':np.int32, 'target_entity_id':np.int32, 'timestamp':np.int32 }
raw_df = pd.read_csv('eval_dataset.csv', names=headers, header=None, dtype=header_types)

In [None]:
df = pd.DataFrame(data={'entity_id': raw_df['entity_id'], 'target_entity_id': raw_df['target_entity_id']})
df['rating'] = MAX_RATING

In [None]:
reader = Reader(rating_scale=(0, MAX_RATING))
data = Dataset.load_from_df(df[['entity_id', 'target_entity_id', 'rating']], reader)
training_data = data.build_full_trainset()

In [None]:
param_grid = {'n_epochs': [10, 30], 'lr_all': [0.002, 0.005], 'reg_all': [0.2, 0.6]}
gs = GridSearchCV(SVDpp, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)

print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

In [None]:
model = gs.best_estimator['rmse']
model.fit(training_data)

In [None]:
unique_entity = np.unique(df.entity_id.values)
unique_target_entity= np.unique(df.target_entity_id.values)

px = pd.DataFrame(-1.0, index=unique_entity, columns=unique_target_entity,dtype=np.float64)
predx = training_data.build_anti_testset(fill=0)

for p in predx:
  pred = model.predict(training_data.to_inner_uid(p[0]), training_data.to_inner_iid(p[1]))
  px.at[p[0], p[1]] = round(pred.est, PRECISION)

In [None]:
print(px)

In [None]:
m1 = px.as_matrix()

m1[m1<0]=0
m1[m1==1]=0

In [None]:
fig, ax = plt.subplots(figsize=m1.shape)
ax.matshow(m1, cmap=plt.cm.Reds)