## 0. Import packages

In [2]:
import pandas as pd
import gzip
import math
from tqdm import tqdm
from itertools import chain, combinations
from Association import Association
import numpy as np
tqdm.pandas() #for progres_apply etc.

## 1. Load data from previous step

In [3]:
import pickle
rules = pickle.load(open("pickle_dumps/article_rules.p", "rb"))
user_rules = pickle.load(open("pickle_dumps/user_rules.p", "rb"))

In [4]:
df_test = pickle.load(open("pickle_dumps/test_df.p", "rb"))
df_test = df_test.set_index("user_id").to_dict()["test_split"]

In [5]:
df_train = pickle.load(open("pickle_dumps/train_df.p", "rb"))
df_train = df_train.set_index("user_id").to_dict()["item_id"]

In [6]:
class Recommender():
    def __init__(self, assos):
        pass
    
    def recommend(self, user_items, k, user_id=None, split=None):
        pass        

In [7]:
def hitrate(rc, k):
    hits = 0
    ndcg = []
    counts = 0
    avg_len = 0
    
    idcg = sum(1/np.log2(i+1) for i in range(1,k+1))
    
    for user in tqdm(df_test):
        split_user = df_test[user]
        for split_id, split in enumerate(split_user):
            r = rc.recommend(split[0], k, user, split_id)
        
            avg_len += len(r)/len(split_user)
            
            for item in split[1]:
                if item in r:
                    hits += (item in r) / len(split_user)
                    break
            
            dcg = 0
            for i, rec in enumerate(r):
                good = 1 if rec in split[1] else 0
                dcg += (2**good - 1)/(np.log2(i+2))
            ndcg.append(dcg/idcg)
            
                
                
        counts += 1
    
    print("avg len: {:f}".format(avg_len / len(df_test)))
    if k == -1:
        print("HR\t{:.5f}".format((hits / counts)))
        print("EHR\t{:.5f}".format((hits / counts) / (avg_len /  len(df_test)) * 10))
    else:
        print("HR@{}\t{:.5f}".format(k, (hits / counts)))
        print("EHR@{}\t{:.5f}".format(k, (hits / counts) / (avg_len /  len(df_test)) * 10))
    print("nDCG@{}\t{:.5f}".format(k, (sum(ndcg) / len(ndcg))))
    return hits / counts

In [8]:
class Assos_Recommender(Recommender):
    def __init__(self, assos):
        # use popular items to fill unused space
        # TODO (df_train is used for this)
        
        self.assos = sorted(assos, key=lambda x: x.s, reverse=True)
        
#     def get_items(self, user_id):
#         return self.df_test[user_id]
    
    def recommend(self, user_items, k, user_id=None, split=None):
        recommendation = set()
        for a in self.assos:
            for l in a.left:
                # break if any element is not in the user session
                if l not in user_items:
                    break
                    
                # all elements are in the user session --> add RHS of rule to the recommendation
                r = list(a.right)[0]
                if r not in user_items:
                    recommendation.add(r)
                        
            if len(recommendation) >= k and k != -1:
                break
                
        return recommendation

# Results
rule.s --> 18.317 <br>
rule.s * rule.c --> 20.545

In [9]:
ar = Assos_Recommender(rules)
hr = hitrate(ar, 10)

100%|██████████| 101/101 [00:39<00:00,  2.56it/s]

avg len: 10.000000
HR@10	0.21040
EHR@10	0.21040
nDCG@10	0.02383





In [10]:
class User_Recommender(Recommender):
    def __init__(self, assos):
        self.assos = assos
    
    def recommend(self, user_items, k, user_id, split):
        if len(user_items) <= 0:
            return set()
        
        user_items = set(user_items)
        rdict = dict()
        for a in self.assos[split]:
            if user_id in a.right:
                item_list = list()
                for l in a.left:
                    if l in df_test:
                        item_list.append(set(df_test[l][split][0]))
                    if l in df_train:
                        item_list.append(set(df_train[l]))
                        
                if item_list:
                    intersection = set.intersection(*item_list)
                    for item in intersection - user_items:
                        if item in rdict:
                            rdict[item] += (a.c * a.s)
                        else:
                            rdict[item] = (a.c * a.s)

                            
#         if rdict.values():   
#             if max(rdict.values()) < 50:
#                 return set()
            
        rlist = sorted(rdict.keys(), key=lambda x: rdict[x], reverse=True)

        recommendation = set()
        for i in rlist:
            recommendation.add(i)
            if len(recommendation) >= k and k != -1:
                break
        return recommendation

# Results
sum(rule.s * rule.c) --> 23.020

In [11]:
ur = User_Recommender(user_rules)
hr = hitrate(ur, 10)

100%|██████████| 101/101 [00:28<00:00,  3.55it/s]

avg len: 2.574257
HR@10	0.21287
EHR@10	0.82692
nDCG@10	0.06986





In [12]:
class Combo_Recommender(Recommender):
    def __init__(self, ar, ur):
        self.ar = ar
        self.ur = ur
        
    def recommend(self, user_items, k, user_id, split):
        recommendation = self.ur.recommend(user_items, k, user_id, split)
        if len(recommendation) < k:
            recommendation.update(self.ar.recommend(user_items, k, user_id, split))
        return recommendation

no filter: 36139; 08554 <br>
25: idem<br>

In [13]:
cr = Combo_Recommender(ar, ur)
hr = hitrate(cr, 10)

100%|██████████| 101/101 [00:59<00:00,  1.70it/s]

avg len: 10.000000
HR@10	0.36139
EHR@10	0.36139
nDCG@10	0.08554





In [14]:
class Combo_Recommender2(Recommender):
    def __init__(self, ar, ur):
        self.ar = ar
        self.ur = ur
        
    def recommend(self, user_items, k, user_id, split):
        recommendation = self.ur.recommend(user_items, 8, user_id, split)
        if len(recommendation) < k:
            recommendation.update(self.ar.recommend(user_items, k - len(recommendation), user_id, split))
        return recommendation

In [15]:
cr = Combo_Recommender2(ar, ur)
hr = hitrate(cr, 10)

100%|██████████| 101/101 [01:00<00:00,  1.67it/s]

avg len: 9.997525
HR@10	0.35149
EHR@10	0.35157
nDCG@10	0.07773





In [16]:
class Combo_Recommender3(Recommender):
    def __init__(self, ar, ur):
        self.ar = ar
        self.ur = ur
        
    def recommend(self, user_items, k, user_id, split):
        recommendation = self.ar.recommend(user_items, 5, user_id, split)
        if len(recommendation) < k:
            recommendation.update(self.ur.recommend(user_items, k - len(recommendation), user_id, split))
        return recommendation

In [17]:
cr = Combo_Recommender3(ar, ur)
hr = hitrate(cr, 10)

100%|██████████| 101/101 [00:36<00:00,  2.79it/s]

avg len: 6.274752
HR@10	0.26980
EHR@10	0.42998
nDCG@10	0.05957



