In [1]:
import json
import numpy as np
import pandas as pd
from app import dataset_word2vec
from sklearn.metrics import precision_score, recall_score

In [3]:
df_movies, _ = dataset_word2vec(['genres', 'rating', 'runtimes', 'year'], op='sum', n_features=200)

Time to build vocab: 0.02 mins
Time to train the model: 2.13 mins
Time to compute vectors: 0.62 mins


In [37]:
(df_movies['runtimes'].min() + df_movies['runtimes'])/(df_movies['runtimes'].max() - df_movies['runtimes'].min())

0        0.148673
1        0.189381
2        0.184071
3        0.224779
4        0.192920
           ...   
27273    0.155752
27274    0.152212
27275    0.235398
27276    0.185841
27277    0.175221
Name: runtimes, Length: 26585, dtype: float64

In [38]:
df_movies['runtimes']

0        0.145133
1        0.185841
2        0.180531
3        0.221239
4        0.189381
           ...   
27273    0.152212
27274    0.148673
27275    0.231858
27276    0.182301
27277    0.171681
Name: runtimes, Length: 26585, dtype: float64

In [31]:
data = {}
with open('./app/recomendacoes/cb-moea-recomendacoes.txt') as json_file:
    data = json.load(json_file)
data['4169']

[318, 1203, 356, 953, 260, 2859, 593, 1270, 922, 2329, 2920, 3578]

In [4]:
df_ratings = pd.read_table('./app/datasets/ml-1m/ratings.dat', delimiter='::', names=['userId', 'movieId', 'rating', 'timestamp'], engine='python')

In [5]:
index = {}
with open('./app/datasets/index.txt') as json_file:
    index = json.load(json_file)

In [6]:
from sklearn.metrics.pairwise import cosine_similarity
def get_diversity(solutions):
    sim = cosine_similarity(solutions, solutions)
    np.fill_diagonal(sim, 0)
    return (1 - sim).sum(axis=1) * 1/(solutions.shape[0] - 1)

def get_novelty(solutions, data):
    sim = cosine_similarity(solutions, data)
    
    return (1-sim).max(axis=1) #* 1/(data.shape[0] - 1)
def evaluate(user, df_ratings, index, data):
    df_ratings_u = df_ratings[df_ratings['userId'] == user].set_index('movieId')['rating']
    test = df_ratings_u[index[str(user)]['test']]
    y_true = test.copy()
    y_true[y_true <= 3] = 0
    y_true[y_true > 3] = 1
    y_pred = pd.Series(np.zeros(test.shape[0]), index=test.index, dtype=int)
    y_pred[data[str(user)]] = 1
    solutions = df_movies.iloc[data[str(user)]].drop(columns=['title'])
    train_data  = df_movies.iloc[index[str(user)]['train']].drop(columns=['title'])
    res = {}
    res['precision'] = precision_score(y_true, y_pred)
    res['recall'] = recall_score(y_true, y_pred)
    res['diversity'] = get_diversity(solutions).mean()
    res['novelty'] = get_novelty(solutions, train_data).mean()
    return res

In [8]:
user = 4169
data = {}
with open('./app/recomendacoes/cb-moea-recomendacoes.txt') as json_file:
    data = json.load(json_file)
print('cb-moea:',evaluate(user, df_ratings, index, data))
with open('./app/recomendacoes/cf-moea-recomendacoes.txt') as json_file:
    data = json.load(json_file)
print('cf-moea:',evaluate(user, df_ratings, index, data))
with open('./app/recomendacoes/cf-recomendacoes.txt') as json_file:
    data = json.load(json_file)
print('cf:',evaluate(user, df_ratings, index, data))
with open('./app/recomendacoes/cb-recomendacoes.txt') as json_file:
    data = json.load(json_file)
print('cb:',evaluate(user, df_ratings, index, data))

cb-moea: {'precision': 0.8333333333333334, 'recall': 0.029239766081871343, 'diversity': 0.09168883407394605, 'novelty': 0.8676707293725322}
cf-moea: {'precision': 0.4, 'recall': 0.017543859649122806, 'diversity': 0.07552305118282152, 'novelty': 0.8592950294129974}
cf: {'precision': 0.8666666666666667, 'recall': 0.038011695906432746, 'diversity': 0.10509113135185758, 'novelty': 0.8296733471712128}
cb: {'precision': 0.6, 'recall': 0.02631578947368421, 'diversity': 0.21347588484022867, 'novelty': 0.6452433625963792}


In [56]:
#df_movies = df_movies.set_index('movieId')
solutions = df_movies.iloc[data[str(999)]].drop(columns=['title'])
train_data  = df_movies.iloc[index[str(999)]['train']].drop(columns=['title'])

In [57]:
solutions

Unnamed: 0_level_0,rating,runtimes,year,Action,Adult,Adventure,Animation,Biography,Comedy,Crime,...,feat_290,feat_291,feat_292,feat_293,feat_294,feat_295,feat_296,feat_297,feat_298,feat_299
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
35,6.8,121.0,1995.0,0,0,0,0,1,0,0,...,-0.0017,-0.004471,0.019554,-0.001688,-0.075222,-0.01306,0.008169,-0.012303,-0.001459,-0.024078
3512,6.9,115.0,2000.0,0,0,0,0,0,1,0,...,-0.003035,-0.003192,0.016915,-0.005017,-0.077969,-0.012303,0.007987,-0.013169,-0.007194,-0.01849
3831,6.9,93.0,2000.0,0,0,0,0,0,1,1,...,-0.002845,-0.001395,0.017219,-0.003349,-0.075506,-0.014003,0.001497,-0.021751,-0.00679,-0.017634
3265,7.8,128.0,1992.0,1,0,0,0,0,0,1,...,-0.00248,-0.001735,0.015155,-0.00789,-0.083386,-0.019744,0.007353,-0.018834,-0.005896,-0.018688
597,7.0,119.0,1990.0,0,0,0,0,0,1,0,...,-0.003484,-0.003245,0.018294,-0.00416,-0.082838,-0.013017,0.006942,-0.017003,-0.008542,-0.021584
3179,7.3,145.0,1999.0,0,0,0,0,0,0,0,...,-0.002769,-0.00323,0.018024,0.002612,-0.076723,-0.00979,0.008173,-0.018404,-0.008153,-0.025684
3565,6.8,120.0,2000.0,0,0,0,0,0,1,0,...,-0.00367,-0.001135,0.01941,0.004516,-0.07437,-0.01131,0.004973,-0.016334,-0.011647,-0.019636
2919,7.1,115.0,1982.0,0,0,0,0,0,0,0,...,-0.002082,-0.001524,0.018143,0.001928,-0.073051,-0.013489,0.007492,-0.015895,-0.006718,-0.020632
112,6.7,104.0,1995.0,1,0,0,0,0,1,1,...,-0.003463,0.000168,0.017553,0.004356,-0.079028,-0.018459,0.003971,-0.021428,-0.011288,-0.019425
887,6.0,96.0,1998.0,0,0,0,0,0,0,0,...,-0.002549,-0.004481,0.019473,-0.00519,-0.071634,-0.006919,0.006293,-0.010795,-0.004615,-0.016011


In [59]:
get_diversity(solutions).mean()

0.0910301013763604

In [60]:
get_novelty(solutions, train_data).mean()

0.0038916873988841063