## POLIMI RECSYS CHALLANGE 2018
#### (SIMONE'S ALGORITHMS)

#### IMPORTS

In [1]:
import similaripy as sim
import scipy.sparse as sps
import numpy as np
import pandas as pd
from tqdm import tqdm
import utils.pre_processing as pre
from datareader import Datareader
from evaluator import Evaluator

#### COMMON DATA IN ALL ALGORITHMS

In [2]:
dr = Datareader()
ev = Evaluator()
urm = dr.get_urm()
t_ids = dr.target_playlists
verbose = False

### LISBON

In [3]:
### LISBON ###
### CF IB
s = sim.tversky(pre.bm25_row(urm.T), pre.bm25_col(urm), k=1000, alpha=0.50, beta=0.40, verbose=verbose)
s.data=np.power(s.data, 0.85)
r_cop = sim.dot_product(urm, s, target_rows=t_ids, k=500, verbose=verbose)
score = ev.evaluation(r_cop,urm,dr,save=True, name='lisbon')
print('LISBON MAP@10 --> %.5f'%(score))

LISBON MAP@10 --> 0.08374


### COPENHAGEN

#### BEST CF IB

In [4]:
### CF IB
s = sim.tversky(pre.bm25_row(urm.T), pre.bm25_col(urm), k=5000, alpha=0.30, beta=0.50, verbose=verbose, format_output='csr')
s.data=np.power(s.data, 0.75)
r_cfib = sim.dot_product(urm, s.T, target_rows=t_ids, k=500, verbose=verbose)
score = ev.evaluation(r_cfib,urm,dr,save=False, name='best_cf_ib')
print('%.5f'%(score))

0.08394


#### BEST CF UB

In [5]:
### CF UB
s = sim.tversky(pre.bm25_row(urm), pre.bm25_col(urm.T), alpha=1, beta=1, k=70, shrink=0, target_rows=t_ids, verbose=verbose)
s.data=np.power(s.data, 2.1)
r_cfub = sim.dot_product(s, urm, k=500, verbose=verbose)
score = ev.evaluation(r_cfub,urm,dr,save=False, name='best_cf_ub')
print('%.5f'%(score))

0.08316


#### ENSEMBLE CF_IB + CF_UB 

In [6]:
r_cf = r_cfib + 3.15*r_cfub
score = ev.evaluation(r_cf,urm,dr,save=False, name='ensemble_cf')
print('%.5f'%(score))

0.08713


#### CB AR (NOT USED)

In [7]:
icm = dr.get_icm(alid=False, arid=True)
s = sim.dot_product(pre.bm25_col(icm), pre.bm25_col(icm.T), k=55, verbose=verbose, format_output='csr')
s.data=np.power(s.data, 0.7)
r_cb_ar = sim.dot_product(urm, s.T, target_rows=t_ids, k=500, verbose=verbose)
score = ev.evaluation(r_cb_ar,urm,dr,save=False, name='best_cb_ar')
print('%.5f'%(score))

0.02206


#### CB AL (NOT USED)

In [8]:
icm = dr.get_icm(alid=True, arid=False)
s = sim.dot_product(pre.bm25_col(icm), bm25_col(icm.T), k=15, verbose=verbose, format_output='csr')
s.data=np.power(s.data, 0.8)
r_cb_al = sim.dot_product(urm, s.T, target_rows=t_ids, k=500, verbose=verbose)
score = ev.evaluation(r_cb_al,urm,dr,save=False, name='best_cb.csv')
print('%.5f'%(score))

0.03899


#### CB AL AR

In [9]:
icm_al = dr.get_icm(alid=True, arid=False)
icm_ar = dr.get_icm(alid=False, arid=True)
icm = sps.hstack([icm_al*1, icm_ar*0.4])
s = sim.dot_product(pre.bm25_col(icm), bm25_col(icm.T), k=31, verbose=verbose, format_output='csr')
s.data=np.power(s.data, 0.8)
r_cb = sim.dot_product(urm, s.T, target_rows=t_ids, k=500, verbose=verbose)
score = ev.evaluation(r_cb,urm,dr,save=False, name='best_cb.csv')
print('%.5f'%(score))

0.04113


#### ENSEMBLES

In [10]:
# best ensemble without pre normalization on eurms
r_tot = r_cf + 0.0133*r_cb
score = ev.evaluation(r_tot,urm,dr,save=False, name='not_used')
print('%.5f'%(score))

0.08959


In [11]:
# best ensemble with pre normalization l1 on eurms
r1=pre.norm_l1_row(r_cf.tocsr())
r2=pre.norm_l1_row(r_cb.tocsr())
r_tot = r1 + 0.04127*r2
score = ev.evaluation(r_tot,urm,dr,save=True, name='copenhagen')
print('COPENHAGEN MAP@10 --> %.5f'%(score))

COPENHAGEN MAP@10 --> 0.08980


### TUNING UTILITIES

In [None]:
# normalizations pre ensemble
r1=pre.norm_max_row(r_cf.tocsr())
r2=pre.norm_max_row(r_cb.tocsr())
r3=pre.norm_max_row(r_cb_al.tocsr())
r4=pre.norm_max_row(r_cb_ar.tocsr())

In [None]:
# tune ensemble
for a in np.arange(0.04,0.05,0.001):
    r = r1+ a*r2
    score = ev.evaluation(r,urm,dr,save=False, name='ensemble')
    print('%.5f --> a: %.5f'%(score,a))

In [None]:
# tune alpha beta
for a in np.arange(0.0,1.1,0.1):
    for b in np.arange(0.0,1.1,0.1):
        s = sim.tversky(urm, k=250, alpha=a, beta=b, verbose=verbose, target_rows=t_ids)
        eurm = sim.dot_product(s, urm, k=500, verbose=verbose)
        score = ev.evaluation(eurm,urm,dr)
        print('%.5f --> a: %.2f   b: %.2f'%(score,a,b))