In [233]:
import pandas as pd
import numpy as np
import math
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform
from scipy.stats import hmean
from itertools import combinations, product

In [188]:
PATH = '../../data/processed/'

In [189]:
train = pd.read_csv(PATH + 'train_lol.csv')[['userid', 'shop', 'rating']]
shop_metadata = pd.read_csv(PATH + 'coffee_shops_context.csv')
num_user = len(train.userid.unique())
num_shop = len(train.shop.unique())

In [190]:
PATH = '../../results/outputs/'

In [191]:
with open(PATH + 'MostPop_lol.txt_recommendations.txt', encoding = 'utf8') as f:
    most_pop = f.read()
most_pop = most_pop.split(' ')

mf_rec = pd.read_csv(PATH + 'mf_lol_recs.csv').drop('Unnamed: 0', axis=1).iloc[:,:6]
fm_rec = pd.read_csv(PATH + 'lightFM_LOL_recommendations_improved_features.csv').iloc[:,:6]
most_pop_rec = most_pop.copy()[:mf_rec.shape[1]-1]
shop_metadata = shop_metadata[shop_metadata['alias'].isin(train['shop'])].set_index('alias')

d = {k:most_pop_rec for k in fm_rec.iloc[:,0].values}
most_pop_all_users = pd.DataFrame.from_dict(d, orient='index').reset_index()


In [212]:
def coverage_item(topn_rec, num_item):
    num_rec = len(set(topn_rec.stack().tolist()))
    return num_rec / num_item

def novelty(topn_rec, pop_rank, num_item):
    # http://www.cs.ucl.ac.uk/fileadmin/UCL-CS/research/Research_Notes/RN_11_21.pdf
    pop_rank = {k: v for v, k in enumerate(pop_rank, start=1)}
    # prob_item = {k:(num_item-v)/(num_item-1) for k,v in pop_rank.items()}

    novelties = []
    for u in range(len(topn_rec)):
        novelty = 0
        for item in topn_rec.iloc[u].values:
            novelty += (math.log2(pop_rank[item]) / num_item)
        novelties.append(novelty)

    return np.mean(novelties)

def item_sim(item_content):
    df = pd.DataFrame(
        squareform(1 - pdist(item_content, 'cosine')),
        columns=item_content.index,
        index=item_content.index,
    )
    return df

def diversity(topn_rec, item_sim):
    # http://files.grouplens.org/papers/ziegler-www05.pdf
    all_sims = []
    for u in range(len(topn_rec)):
        sim_user = 0
        items = topn_rec.iloc[u].values
        combi = list(combinations(items,2))
        for c in combi:
            sim_user += item_sim.loc[c[0], c[1]]
        sim_user /= len(combi)
        all_sims.append(sim_user)
    
    return 1-np.mean(all_sims)

def serendipity(topn_rec, user_history, item_sim):
    # http://www.cs.ucl.ac.uk/fileadmin/UCL-CS/research/Research_Notes/RN_11_21.pdf
    all_serendipity = []
    for u in range(len(topn_rec)):
        serendipity_user = 0
        user = topn_rec.iloc[u,0]
        rec_items = topn_rec.iloc[u,1:].values
        his_items = user_history[user_history['userid']==user]['shop'].values
        combi = list(product(rec_items, his_items))
        for c in combi:
            serendipity_user += item_sim.loc[c[0], c[1]]
        serendipity_user /= len(combi)
        all_serendipity.append(serendipity_user)
    
    return 1-np.mean(all_serendipity)


        

In [234]:
pop_c = coverage_item(most_pop_all_users.iloc[:,1:], num_shop)
pop_c

0.006675567423230975

In [235]:
mf_c = coverage_item(mf_rec.iloc[:,1:], num_shop)
mf_c

0.32710280373831774

In [236]:
fm_c = coverage_item(fm_rec.iloc[:,1:], num_shop)
fm_c

0.14419225634178906

In [237]:
pop_n = novelty(most_pop_all_users.iloc[:,1:], most_pop, num_shop)
pop_n

0.009221482771172922

In [238]:
mf_n = novelty(mf_rec.iloc[:,1:], most_pop, num_shop)
mf_n

0.021954235939160207

In [239]:
fm_n = novelty(fm_rec.iloc[:,1:], most_pop, num_shop)
fm_n

0.025514817159287148

In [240]:
item_cossim = item_sim(shop_metadata)
pop_d = diversity(most_pop_all_users.iloc[:,1:], item_cossim)
pop_d

0.479049134831923

In [241]:
mf_d = diversity(mf_rec.iloc[:,1:], item_cossim)
mf_d


0.4459147296066155

In [242]:
fm_d = diversity(fm_rec.iloc[:,1:], item_cossim)
fm_d

0.46875653806688555

In [243]:
pop_s = serendipity(most_pop_all_users, train, item_cossim)
pop_s

0.5010841989845591

In [244]:
mf_s = serendipity(mf_rec, train, item_cossim)
mf_s

0.5849567304857871

In [246]:
fm_s = serendipity(fm_rec, train, item_cossim)
fm_s

0.5076022122155002

In [248]:
hmean([pop_c, pop_n, pop_d, pop_s])

0.015248227546382056

In [249]:
hmean([mf_c, mf_n, mf_d, mf_s])

0.0761056023034301

In [252]:
hmean([fm_c, fm_n, fm_d, fm_s])

0.07963138951320822