## 0. Import packages

In [1]:
import pandas as pd
import gzip
import math
from tqdm import tqdm
from itertools import chain, combinations
from Association import Association
import numpy as np
import random
tqdm.pandas() #for progres_apply etc.

In [2]:
seed_index = 0

## 1. Load data from previous step

In [3]:
import pickle
rules = pickle.load(open("pickle_dumps/article_rules.p", "rb"))
user_rules = pickle.load(open("pickle_dumps/user_rules.p", "rb"))

In [4]:
df_test = pickle.load(open(f"pickle_dumps/test_df_{seed_index}.p", "rb"))
df_test = df_test.set_index("user_id").to_dict()["item_id"]

In [5]:
df_train = pickle.load(open(f"pickle_dumps/train_df_{seed_index}.p", "rb"))
df_train = df_train.set_index("user_id").to_dict()["item_id"]

In [6]:
class Recommender():
    def __init__(self, assos):
        pass
    
    def recommend(self, user_items, k, user_id=None, split=None):
        pass        

In [7]:
def hitrate(rc, k, nr_users = float("1")):
    hits = 0
    ndcg = []
    counts = 0
    avg_len = 0
    n_recall = 0
    
    
    
    user_ids = list(df_test.keys())
    for user in tqdm(random.sample(user_ids, int(len(user_ids) * nr_users))):
        user_target = df_test[user]
        user_items = df_train[user]
        
        r = rc.recommend(user_items, k, user)

        avg_len += len(r)

        correct = 0
        hit = False
        for item in user_target:
            if item in r:
                if not hit:
                    hits += 1
                    hit = True
                correct += 1
        n_recall += correct / max(1, (min(len(k), len(user_target))))

        dcg = 0
        idcg = sum(1/np.log2(i+1) for i in range(1,(max(1, len(r)))+1))
        for i, rec in enumerate(r):
            good = 1 if rec in user_target else 0
            dcg += (2**good - 1)/(np.log2(i+2))
        ndcg.append(dcg/idcg)
            
                
                
        counts += 1
    
    print("avg len: {:f}".format(avg_len / counts))
    if k == -1:
        print("HR\t{:.5f}".format((hits / counts)))
        print("EHR\t{:.5f}".format((hits / counts) * (avg_len /  counts)))
    else:
        print("HR@{}\t{:.5f}".format(k, (hits / counts)))
        print("normalised recall@{}\t{:.5f}".format(k, (n_recall / counts)))
        print("EHR@{}\t{:.5f}".format(k, (hits / counts) * (avg_len /  counts)))
    print("nDCG@{}\t{:.5f}".format(k, (sum(ndcg) / len(ndcg))))
    return {"HR": hits /counts,
           "EHR": (hits / counts) * (avg_len /  counts),
           "normalised recall": (n_recall / counts),
           "nDCG": sum(ndcg) / len(ndcg)}

In [8]:
class Assos_Recommender(Recommender):
    def __init__(self, assos, sorter=lambda x: x.s * x.c):
        # use popular items to fill unused space
        # TODO (df_train is used for this)
        
        self.sorter = sorter
        self.assos = sorted(assos, key=self.sorter, reverse=True)[:100000]
        
#     def get_items(self, user_id):
#         return self.df_test[user_id]
    
    def recommend(self, user_items, k, user_id=None, split=None):
        recommendation = set()
        user_items = set(user_items)
        for a in self.assos:
#             print(a.left, user_items)
            stop = False
            for l in a.left:
                # break if any element is not in the user session
                if l not in user_items:
                    stop = True
                    break
                    
                # all elements are in the user session --> add RHS of rule to the recommendation
            if not stop:
                r = a.right
                for item in r:
                    if item not in user_items:
                        recommendation.add(item)
                        if len(recommendation) >= k and k != -1:
                            break
                        
            if len(recommendation) >= k and k != -1:
                break
                
        return recommendation

# Results
rule.s --> 18.317 <br>
rule.s * rule.c --> 20.545

In [9]:
results = []
partition_size = 0.01
k = 10

In [10]:
ar = Assos_Recommender(rules, lambda x: x.z)
results.append(("lambda x: x.z", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:26<00:00, 23.49it/s]

avg len: 3.950715
HR@10	0.26391
normalised recall@10	0.08178
EHR@10	1.04264
nDCG@10	0.07166





In [11]:
ar = Assos_Recommender(rules, lambda x: x.s * x.c)
results.append(("lambda x: x.s * x.c", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:00<00:00, 1283.68it/s]

avg len: 9.850556
HR@10	0.67250
normalised recall@10	0.28095
EHR@10	6.62446
nDCG@10	0.13309





In [12]:
ar = Assos_Recommender(rules, lambda x: x.s + x.c)
results.append(("lambda x: x.s + x.c", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:00<00:00, 1776.85it/s]

avg len: 9.888712
HR@10	0.68839
normalised recall@10	0.27784
EHR@10	6.80733
nDCG@10	0.13936





In [13]:
ar = Assos_Recommender(rules, lambda x: x.s)
results.append(("lambda x: x.s", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:12<00:00, 50.01it/s]

avg len: 4.290938
HR@10	0.39428
normalised recall@10	0.12967
EHR@10	1.69182
nDCG@10	0.10903





In [14]:
ar = Assos_Recommender(rules, lambda x: x.c)
results.append(("lambda x: x.c", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:00<00:00, 2536.29it/s]

avg len: 9.945946
HR@10	0.68680
normalised recall@10	0.26959
EHR@10	6.83092
nDCG@10	0.13367





In [15]:
ar = Assos_Recommender(rules, lambda x: x.lift)
results.append(("lambda x: x.lift", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:26<00:00, 23.79it/s]

avg len: 3.696343
HR@10	0.30207
normalised recall@10	0.09762
EHR@10	1.11654
nDCG@10	0.09027





In [16]:
ar = Assos_Recommender(rules, lambda x: -x.lift)
results.append(("lambda x: -x.lift", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:01<00:00, 605.39it/s]

avg len: 9.869634
HR@10	0.20986
normalised recall@10	0.04946
EHR@10	2.07121
nDCG@10	0.03475





In [17]:
ar = Assos_Recommender(rules, lambda x: 2 * x.s + x.c)
results.append(("lambda x: 2 * x.s + x.c", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:00<00:00, 2117.84it/s]

avg len: 9.922099
HR@10	0.65978
normalised recall@10	0.26304
EHR@10	6.54638
nDCG@10	0.13331





In [18]:
ar = Assos_Recommender(rules, lambda x: x.s * x.s * x.c)
results.append(("lambda x: x.s * x.s * x.c", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:02<00:00, 250.60it/s]

avg len: 9.562798
HR@10	0.65024
normalised recall@10	0.24249
EHR@10	6.21810
nDCG@10	0.13129





In [19]:
ar = Assos_Recommender(rules, lambda x: np.sqrt(np.power(x.s, 2) + np.power(x.c, 2)))
results.append(("lambda x: np.sqrt(np.power(x.s, 2) + np.power(x.c, 2))", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:00<00:00, 2222.61it/s]

avg len: 9.917329
HR@10	0.65501
normalised recall@10	0.23408
EHR@10	6.49593
nDCG@10	0.12971





In [21]:
ar = Assos_Recommender(rules, lambda x: np.sqrt(2 * np.power(x.s, 2) + np.power(x.c, 2)))
results.append(("lambda x: np.sqrt(2 * np.power(x.s, 2) + np.power(x.c, 2))", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:00<00:00, 2114.11it/s]

avg len: 9.910970
HR@10	0.66296
normalised recall@10	0.25574
EHR@10	6.57055
nDCG@10	0.12606





In [22]:
ar = Assos_Recommender(rules, lambda x: -np.abs(x.lift - 1))
results.append(("lambda x: -np.abs(x.lift - 1)", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:01<00:00, 622.16it/s]

avg len: 9.871224
HR@10	0.26073
normalised recall@10	0.06562
EHR@10	2.57374
nDCG@10	0.04275





In [23]:
ar = Assos_Recommender(rules, lambda x: np.abs(x.lift - 1))
results.append(("lambda x: np.abs(x.lift - 1)", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:26<00:00, 24.06it/s]

avg len: 3.885533
HR@10	0.29889
normalised recall@10	0.10006
EHR@10	1.16134
nDCG@10	0.08596





In [24]:
ar = Assos_Recommender(rules, lambda x: x.lift * x.s * x.c)
results.append(("lambda x: x.lift * x.s * x.c", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:12<00:00, 51.84it/s]

avg len: 9.709062
HR@10	0.71383
normalised recall@10	0.29420
EHR@10	6.93063
nDCG@10	0.14416





In [25]:
ar = Assos_Recommender(rules, lambda x: x.z * x.s)
results.append(("lambda x: x.z * x.s", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:28<00:00, 22.26it/s]

avg len: 2.406995
HR@10	0.25437
normalised recall@10	0.09782
EHR@10	0.61227
nDCG@10	0.08587





In [26]:
ar = Assos_Recommender(rules, lambda x: x.z * x.s *x.c)
results.append(("lambda x: x.z * x.s * x.c", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:12<00:00, 50.91it/s]

avg len: 9.618442
HR@10	0.69634
normalised recall@10	0.28730
EHR@10	6.69774
nDCG@10	0.14411





In [31]:
ar = Assos_Recommender(rules, lambda x: x.z * x.s * x.s *x.c)
results.append(("lambda x: x.z * x.s * x.s * x.c", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:27<00:00, 22.92it/s]

avg len: 4.143084
HR@10	0.44356
normalised recall@10	0.18344
EHR@10	1.83771
nDCG@10	0.15634





In [39]:
ar = Assos_Recommender(rules, lambda x: np.sqrt(np.power(x.z, 2) + np.power(x.lift, 2) + np.power(x.s, 2) + np.power(x.s, 2) + np.power(x.c, 2)))
results.append(("lambda x: np.sqrt(np.power(x.z, 2) + np.power(x.lift, 2) + np.power(x.s, 2) + np.power(x.s, 2) + np.power(x.c, 2))", hitrate(ar, k, partition_size)))

100%|██████████| 629/629 [00:00<00:00, 1872.02it/s]

avg len: 9.942766
HR@10	0.65978
normalised recall@10	0.25565
EHR@10	6.56001
nDCG@10	0.12741





In [None]:
ar = Assos_Recommender(rules, lambda x: np.sqrt(np.power(x.z, 2) + np.power(x.lift, 2) + np.power(x.s, 2) + np.power(x.s, 2) + np.power(x.c, 2)))
results.append(("lambda x: np.sqrt(np.power(x.z, 2) + np.power(x.lift, 2) + np.power(x.s, 2) + np.power(x.s, 2) + np.power(x.c, 2))", hitrate(ar, k, partition_size)))

In [36]:
for i in sorted(results, key=lambda x: x[1]["HR"], reverse=True):
    print(i[0])
    print(i[1])
    print("-"*5, '\n')

lambda x: x.lift * x.s * x.c
{'HR': 0.7138314785373608, 'EHR': 6.93063408494064, 'normalised recall': 0.29419713831478517, 'nDCG': 0.14416404550826728}
----- 

lambda x: x.z * x.s * x.c
{'HR': 0.6963434022257552, 'EHR': 6.697738606463941, 'normalised recall': 0.28729843288666823, 'nDCG': 0.14411251821811266}
----- 

lambda x: x.s + x.c
{'HR': 0.6883942766295708, 'EHR': 6.807332910390986, 'normalised recall': 0.27784402049107904, 'nDCG': 0.13935831334654492}
----- 

lambda x: x.c
{'HR': 0.6868044515103339, 'EHR': 6.830919950156834, 'normalised recall': 0.2695889166477401, 'nDCG': 0.13367345037753606}
----- 

lambda x: x.s * x.c
{'HR': 0.6724960254372019, 'EHR': 6.624460053432278, 'normalised recall': 0.2809504883034294, 'nDCG': 0.13308973025642926}
----- 

lambda x: np.sqrt(2 * np.power(x.s, 2) + np.power(x.c, 2))
{'HR': 0.6629570747217806, 'EHR': 6.57054754183717, 'normalised recall': 0.25574166603578363, 'nDCG': 0.12606155819652637}
----- 

lambda x: 2 * x.s + x.c
{'HR': 0.65977742448

In [11]:
class User_Recommender(Recommender):
    def __init__(self, assos):
        self.assos = assos
    
    def recommend(self, user_items, k, user_id):
        if len(user_items) <= 0 or user_id not in self.assos:
            return set()
        
        user_items = set(user_items)
        rdict = dict()

        for a in self.assos[user_id]:
            item_list = list()
            for l in a.left:
                tmp = []
                tmp.extend(df_test[l])
                tmp.extend(df_train[l])
                item_list.append(set(tmp))

            if item_list:
                intersection = set.intersection(*item_list)
                for item in intersection - user_items:
                    if item in rdict:
                        rdict[item] += (a.c * a.s)
                    else:
                        rdict[item] = (a.c * a.s)
            
        rlist = sorted(rdict.keys(), key=lambda x: rdict[x], reverse=True)

        recommendation = set()
        for i in rlist:
            recommendation.add(i)
            if len(recommendation) >= k and k != -1:
                break
        return recommendation

# Results
sum(rule.s * rule.c) --> 23.020

In [12]:
ur = User_Recommender(user_rules)
hr = hitrate(ur, 10, 0.01)

100%|██████████| 629/629 [00:05<00:00, 110.89it/s]

avg len: 5.580286
HR@10	0.34340
normalised recall@10	0.07201
EHR@10	1.91628
nDCG@10	0.06338





In [13]:
class Combo_Recommender(Recommender):
    def __init__(self, ar, ur):
        self.ar = ar
        self.ur = ur
        
    def recommend(self, user_items, k, user_id):
        recommendation = self.ur.recommend(user_items, k, user_id)
        if len(recommendation) < k:
            recommendation.update(self.ar.recommend(user_items, k, user_id))
        return recommendation

no filter: 36139; 08554 <br>
25: idem<br>

In [14]:
cr = Combo_Recommender(ar, ur)
hr = hitrate(cr, 10, 0.01)

100%|██████████| 629/629 [00:19<00:00, 31.46it/s]

avg len: 5.561208
HR@10	0.34976
normalised recall@10	0.08395
EHR@10	1.94510
nDCG@10	0.06983





In [None]:
class Combo_Recommender2(Recommender):
    def __init__(self, ar, ur):
        self.ar = ar
        self.ur = ur
        
    def recommend(self, user_items, k, user_id, split):
        recommendation = self.ur.recommend(user_items, 8, user_id, split)
        if len(recommendation) < k:
            recommendation.update(self.ar.recommend(user_items, k - len(recommendation), user_id, split))
        return recommendation

In [None]:
cr = Combo_Recommender2(ar, ur)
hr = hitrate(cr, 10)

In [None]:
class Combo_Recommender3(Recommender):
    def __init__(self, ar, ur):
        self.ar = ar
        self.ur = ur
        
    def recommend(self, user_items, k, user_id, split):
        recommendation = self.ar.recommend(user_items, 5, user_id, split)
        if len(recommendation) < k:
            recommendation.update(self.ur.recommend(user_items, k - len(recommendation), user_id, split))
        return recommendation

In [None]:
cr = Combo_Recommender3(ar, ur)
hr = hitrate(cr, 10)