## 0. Import packages

In [1]:
import pandas as pd
import gzip
import math
from tqdm import tqdm
from itertools import chain, combinations
from Association import Association
import numpy as np
import random
tqdm.pandas() #for progres_apply etc.

In [2]:
seed_index = 0

## 1. Load data from previous step

In [3]:
import pickle
rules = pickle.load(open("pickle_dumps/article_rules.p", "rb"))
user_rules = pickle.load(open("pickle_dumps/user_rules.p", "rb"))

In [4]:
df_test = pickle.load(open(f"pickle_dumps/test_df_{seed_index}.p", "rb"))
df_test = df_test.set_index("user_id").to_dict()["item_id"]

In [5]:
df_train = pickle.load(open(f"pickle_dumps/train_df_{seed_index}.p", "rb"))
df_train = df_train.set_index("user_id").to_dict()["item_id"]

In [6]:
class Recommender():
    def __init__(self, assos):
        pass
    
    def recommend(self, user_items, k, user_id=None, split=None):
        pass        

In [7]:
def hitrate(rc, k, nr_users = float("1")):
    hits = 0
    ndcg = []
    counts = 0
    avg_len = 0
    n_recall = 0
    
    
    
    user_ids = list(df_test.keys())
    for user in tqdm(random.sample(user_ids, int(len(user_ids) * nr_users))):
        user_target = df_test[user]
        user_items = df_train[user]
        
        r = rc.recommend(user_items, k, user)

        avg_len += len(r)

        correct = 0
        hit = False
        for item in user_target:
            if item in r:
                if not hit:
                    hits += 1
                    hit = True
                correct += 1
        n_recall += correct / max(1, (min(len(r), len(user_target))))

        dcg = 0
        idcg = sum(1/np.log2(i+1) for i in range(1,(max(1, len(r)))+1))
        for i, rec in enumerate(r):
            good = 1 if rec in user_target else 0
            dcg += (2**good - 1)/(np.log2(i+2))
        ndcg.append(dcg/idcg)
            
                
                
        counts += 1
    
    print("avg len: {:f}".format(avg_len / counts))
    if k == -1:
        print("HR\t{:.5f}".format((hits / counts)))
        print("EHR\t{:.5f}".format((hits / counts) * (avg_len /  counts)))
    else:
        print("HR@{}\t{:.5f}".format(k, (hits / counts)))
        print("normalised recall@{}\t{:.5f}".format(k, (n_recall / counts)))
        print("EHR@{}\t{:.5f}".format(k, (hits / counts) * (avg_len /  counts)))
    print("nDCG@{}\t{:.5f}".format(k, (sum(ndcg) / len(ndcg))))
    return hits / counts

In [8]:
class Assos_Recommender(Recommender):
    def __init__(self, assos):
        # use popular items to fill unused space
        # TODO (df_train is used for this)
        
        self.assos = sorted(assos, key=lambda x: x.s * x.c, reverse=True)[:1000000]
        
#     def get_items(self, user_id):
#         return self.df_test[user_id]
    
    def recommend(self, user_items, k, user_id=None, split=None):
        recommendation = set()
        user_items = set(user_items)
        for a in (self.assos):
#             print(a.left, user_items)
            stop = False
            for l in a.left:
                # break if any element is not in the user session
                if l not in user_items:
                    stop = True
                    break
                    
                # all elements are in the user session --> add RHS of rule to the recommendation
            if not stop:
                r = a.right
                for item in r:
                    if item not in user_items:
                        recommendation.add(item)
                        if len(recommendation) >= k and k != -1:
                            break
                        
            if len(recommendation) >= k and k != -1:
                break
                
        return recommendation

# Results
rule.s --> 18.317 <br>
rule.s * rule.c --> 20.545

In [11]:
ar = Assos_Recommender(rules)
hr = hitrate(ar, 10, 1)

100%|██████████| 62982/62982 [00:52<00:00, 1209.02it/s]

avg len: 9.968594
HR@10	0.66378
normalised recall@10	0.25712
EHR@10	6.61692
nDCG@10	0.12942





In [12]:
class User_Recommender(Recommender):
    def __init__(self, assos):
        self.assos = assos
    
    def recommend(self, user_items, k, user_id):
        if len(user_items) <= 0 or user_id not in self.assos:
            return set()
        
        user_items = set(user_items)
        rdict = dict()

        for a in self.assos[user_id]:
            item_list = list()
            for l in a.left:
                tmp = []
                tmp.extend(df_test[l])
                tmp.extend(df_train[l])
                item_list.append(set(tmp))

#                     if l in df_test:
#                         item_list.append(set(df_test[l]))
#                     if l in df_train:
#                         item_list.append(set(df_train[l]))

            if item_list:
                intersection = set.intersection(*item_list)
                for item in intersection - user_items:
                    if item in rdict:
                        rdict[item] += (a.c * a.s)
                    else:
                        rdict[item] = (a.c * a.s)

                            
#         if rdict.values():   
#             if max(rdict.values()) < 50:
#                 return set()
            
        rlist = sorted(rdict.keys(), key=lambda x: rdict[x], reverse=True)

        recommendation = set()
        for i in rlist:
            recommendation.add(i)
            if len(recommendation) >= k and k != -1:
                break
        return recommendation

# Results
sum(rule.s * rule.c) --> 23.020

In [13]:
ur = User_Recommender(user_rules)
hr = hitrate(ur, 10, 1)

100%|██████████| 62982/62982 [03:54<00:00, 268.64it/s] 

avg len: 0.222762
HR@10	0.02156
normalised recall@10	0.00733
EHR@10	0.00480
nDCG@10	0.00741





In [None]:
class Combo_Recommender(Recommender):
    def __init__(self, ar, ur):
        self.ar = ar
        self.ur = ur
        
    def recommend(self, user_items, k, user_id, split):
        recommendation = self.ur.recommend(user_items, k, user_id, split)
        if len(recommendation) < k:
            recommendation.update(self.ar.recommend(user_items, k, user_id, split))
        return recommendation

no filter: 36139; 08554 <br>
25: idem<br>

In [None]:
cr = Combo_Recommender(ar, ur)
hr = hitrate(cr, 10)

In [None]:
class Combo_Recommender2(Recommender):
    def __init__(self, ar, ur):
        self.ar = ar
        self.ur = ur
        
    def recommend(self, user_items, k, user_id, split):
        recommendation = self.ur.recommend(user_items, 8, user_id, split)
        if len(recommendation) < k:
            recommendation.update(self.ar.recommend(user_items, k - len(recommendation), user_id, split))
        return recommendation

In [None]:
cr = Combo_Recommender2(ar, ur)
hr = hitrate(cr, 10)

In [None]:
class Combo_Recommender3(Recommender):
    def __init__(self, ar, ur):
        self.ar = ar
        self.ur = ur
        
    def recommend(self, user_items, k, user_id, split):
        recommendation = self.ar.recommend(user_items, 5, user_id, split)
        if len(recommendation) < k:
            recommendation.update(self.ur.recommend(user_items, k - len(recommendation), user_id, split))
        return recommendation

In [None]:
cr = Combo_Recommender3(ar, ur)
hr = hitrate(cr, 10)