## 0. Import packages

In [1]:
import pandas as pd
import gzip
import math
from tqdm import tqdm
from itertools import chain, combinations
from Association import Association
import numpy as np
import random
tqdm.pandas() #for progres_apply etc.

In [2]:
seed_index = 0

## 1. Load data from previous step

In [3]:
import pickle
rules = pickle.load(open(f"pickle_dumps/article_rules_{seed_index}.p", "rb"))
user_rules = pickle.load(open(f"pickle_dumps/user_rules_{seed_index}.p", "rb"))

In [4]:
df_test = pickle.load(open(f"pickle_dumps/test_df_{seed_index}.p", "rb"))
df_test = df_test.set_index("user_id").to_dict()["item_id"]

In [5]:
df_train = pickle.load(open(f"pickle_dumps/train_df_{seed_index}.p", "rb"))
df_train = df_train.set_index("user_id").to_dict()["item_id"]

In [6]:
class Recommender():
    def __init__(self, assos):
        pass
    
    def recommend(self, user_items, k, user_id=None, split=None):
        pass        

In [7]:
class Assos_Recommender(Recommender):
    def __init__(self, assos, sorter=lambda x: x.s * x.c):
        # use popular items to fill unused space
        # TODO (df_train is used for this)
        
        self.sorter = sorter
        self.assos = sorted(assos, key=self.sorter, reverse=True)[:100000]
        
#     def get_items(self, user_id):
#         return self.df_test[user_id]
    
    def recommend(self, user_items, k, user_id=None, split=None):
        recommendation = []
        user_items = set(user_items)
        for a in self.assos:
#             print(a.left, user_items)
            stop = False
            for l in a.left:
                # break if any element is not in the user session
                if l not in user_items:
                    stop = True
                    break
                    
                # all elements are in the user session --> add RHS of rule to the recommendation
            if not stop:
                r = a.right
                for item in r:
                    if item not in user_items:
                        if item not in recommendation:
                            recommendation.append(item)
                        if len(recommendation) >= k and k != -1:
                            break
                        
            if len(recommendation) >= k and k != -1:
                break
                
        return recommendation

In [8]:
class User_Recommender(Recommender):
    def __init__(self, assos, value=lambda x: x.s * x.c):
        self.assos = assos
        self.value = value
    
    def recommend(self, user_items, k, user_id):
        if len(user_items) <= 0 or user_id not in self.assos:
            return []
        
        user_items = set(user_items)
        rdict = dict()

        for a in self.assos[user_id]:
            item_list = list()
            for l in a.left:
                tmp = []
                tmp.extend(df_test[l])
                tmp.extend(df_train[l])
                item_list.append(set(tmp))

            if item_list:
                intersection = set.intersection(*item_list)
                for item in intersection - user_items:
                    if item in rdict:
                        rdict[item] += self.value(a)
                    else:
                        rdict[item] = self.value(a)
            
        rlist = sorted(rdict.keys(), key=lambda x: rdict[x], reverse=True)

        recommendation = []
        for i in rlist:
            if i not in recommendation:
                recommendation.append(i)
            if len(recommendation) >= k and k != -1:
                break
        return recommendation

In [9]:
class Combo_Recommender(Recommender):
    def __init__(self, ar, ur):
        self.ar = ar
        self.ur = ur
        
    def recommend(self, user_items, k, user_id):
        recommendation = self.ur.recommend(user_items, np.ceil(k/3), user_id)
        tmp = list(user_items) + recommendation
        if len(recommendation) < k:
            recommendation += (self.ar.recommend(tmp, k-len(recommendation), user_id))
        return recommendation

In [10]:
def hitrate(rc, k, nr_users = float("1"), qualitative = False, df = df_test):
    hits = 0
    ndcg = []
    counts = 0
    avg_len = 0
    n_recall = 0
    preds = []
    
    
    
    user_ids = list(df.keys())
    for user in tqdm(random.sample(user_ids, int(len(user_ids) * nr_users))):
        user_target = df[user]
        user_items = df_train[user]
        
        r = rc.recommend(user_items, k, user)

        avg_len += len(r)

        correct = 0
        hit = False
        for item in user_target:
            if item in r:
                if not hit:
                    hits += 1
                    hit = True
                correct += 1
        n_recall += correct / max(1, (min(k, len(user_target))))

        dcg = 0
        idcg = sum(1/np.log2(i+1) for i in range(1,max(1, min(len(user_target), k))+1))
        for i, rec in enumerate(r):
            good = 1 if rec in user_target else 0
            dcg += (2**good - 1)/(np.log2(i+2))
        ndcg.append(dcg/idcg)
            
        if qualitative:      
            preds.append((user_items, user_target, r))
        counts += 1
    
    print("avg len: {:f}".format(avg_len / counts))
    if k == -1:
        print("HR\t{:.5f}".format((hits / counts)))
        print("EHR\t{:.5f}".format((hits / counts) * (avg_len /  counts)))
    else:
        print("HR@{}\t{:.5f}".format(k, (hits / counts)))
        print("normalised recall@{}\t{:.5f}".format(k, (n_recall / counts)))
        print("EHR@{}\t{:.5f}".format(k, (hits / counts) * (avg_len /  counts)))
    print("nDCG@{}\t{:.5f}".format(k, (sum(ndcg) / len(ndcg))))
    
    if qualitative:
        return {"HR": hits /counts,
               "EHR": (hits / counts) * (avg_len /  counts),
               "normalised recall": (n_recall / counts),
               "nDCG": sum(ndcg) / len(ndcg),
               "predictions": preds}
    else:
        return {"HR": hits /counts,
               "EHR": (hits / counts) * (avg_len /  counts),
               "normalised recall": (n_recall / counts),
               "nDCG": sum(ndcg) / len(ndcg)}

# Results
rule.s --> 18.317 <br>
rule.s * rule.c --> 20.545

In [11]:
results = []
partition_size = 0.1
k = 10

In [12]:
ar = Assos_Recommender(rules, lambda x: x.z)
results.append(("lambda x: x.z", hitrate(ar, k, partition_size)))

100%|██████████| 5419/5419 [00:21<00:00, 250.05it/s]

avg len: 9.499723
HR@10	0.18306
normalised recall@10	0.05046
EHR@10	1.73902
nDCG@10	0.05562





In [13]:
ar = Assos_Recommender(rules, lambda x: x.s * x.c)
results.append(("lambda x: x.s * x.c", hitrate(ar, k, partition_size)))

100%|██████████| 5419/5419 [00:14<00:00, 368.12it/s]

avg len: 9.672818
HR@10	0.43384
normalised recall@10	0.18027
EHR@10	4.19649
nDCG@10	0.15185





In [14]:
ar = Assos_Recommender(rules, lambda x: x.s + x.c)
results.append(("lambda x: x.s + x.c", hitrate(ar, k, partition_size)))

100%|██████████| 5419/5419 [01:24<00:00, 63.89it/s]

avg len: 8.552500
HR@10	0.44768
normalised recall@10	0.18523
EHR@10	3.82882
nDCG@10	0.16982





In [15]:
ar = Assos_Recommender(rules, lambda x: x.s)
results.append(("lambda x: x.s", hitrate(ar, k, partition_size)))

100%|██████████| 5419/5419 [00:10<00:00, 498.00it/s]

avg len: 9.681676
HR@10	0.37184
normalised recall@10	0.14491
EHR@10	3.60003
nDCG@10	0.12024





In [16]:
ar = Assos_Recommender(rules, lambda x: x.c)
results.append(("lambda x: x.c", hitrate(ar, k, partition_size)))

100%|██████████| 5419/5419 [01:24<00:00, 64.21it/s]

avg len: 8.462816
HR@10	0.44842
normalised recall@10	0.18576
EHR@10	3.79491
nDCG@10	0.17467





In [17]:
ar = Assos_Recommender(rules, lambda x: x.lift)
results.append(("lambda x: x.lift", hitrate(ar, k, partition_size)))

100%|██████████| 5419/5419 [00:21<00:00, 247.77it/s]

avg len: 9.482377
HR@10	0.18306
normalised recall@10	0.05146
EHR@10	1.73584
nDCG@10	0.05685





In [18]:
ar = Assos_Recommender(rules, lambda x: -x.lift)
results.append(("lambda x: -x.lift", hitrate(ar, k, partition_size)))

100%|██████████| 5419/5419 [00:11<00:00, 477.44it/s]

avg len: 9.488836
HR@10	0.00148
normalised recall@10	0.00050
EHR@10	0.01401
nDCG@10	0.00026





In [19]:
ar = Assos_Recommender(rules, lambda x: 2 * x.s + x.c)
results.append(("lambda x: 2 * x.s + x.c", hitrate(ar, k, partition_size)))

100%|██████████| 5419/5419 [01:23<00:00, 64.81it/s]

avg len: 8.630190
HR@10	0.45525
normalised recall@10	0.19185
EHR@10	3.92889
nDCG@10	0.17639





In [20]:
ar = Assos_Recommender(rules, lambda x: x.s * x.s * x.c)
results.append(("lambda x: x.s * x.s * x.c", hitrate(ar, k, partition_size)))

100%|██████████| 5419/5419 [00:13<00:00, 414.49it/s]

avg len: 9.693117
HR@10	0.40949
normalised recall@10	0.16241
EHR@10	3.96919
nDCG@10	0.13770





In [21]:
ar = Assos_Recommender(rules, lambda x: np.sqrt(np.power(x.s, 2) + np.power(x.c, 2)))
results.append(("lambda x: np.sqrt(np.power(x.s, 2) + np.power(x.c, 2))", hitrate(ar, k, partition_size)))

100%|██████████| 5419/5419 [01:26<00:00, 62.97it/s]

avg len: 8.427385
HR@10	0.44824
normalised recall@10	0.18794
EHR@10	3.77747
nDCG@10	0.17326





In [None]:
ar = Assos_Recommender(rules, lambda x: np.sqrt(2 * np.power(x.s, 2) + np.power(x.c, 2)))
results.append(("lambda x: np.sqrt(2 * np.power(x.s, 2) + np.power(x.c, 2))", hitrate(ar, k, partition_size)))

 17%|█▋        | 907/5419 [00:14<00:56, 80.28it/s]

In [None]:
ar = Assos_Recommender(rules, lambda x: -np.abs(x.lift - 1))
results.append(("lambda x: -np.abs(x.lift - 1)", hitrate(ar, k, partition_size)))

In [None]:
ar = Assos_Recommender(rules, lambda x: np.abs(x.lift - 1))
results.append(("lambda x: np.abs(x.lift - 1)", hitrate(ar, k, partition_size)))

In [None]:
ar = Assos_Recommender(rules, lambda x: x.lift * x.s * x.c)
results.append(("lambda x: x.lift * x.s * x.c", hitrate(ar, k, partition_size)))

In [None]:
ar = Assos_Recommender(rules, lambda x: x.z * x.s)
results.append(("lambda x: x.z * x.s", hitrate(ar, k, partition_size)))

In [None]:
ar = Assos_Recommender(rules, lambda x: x.z * x.s *x.c)
results.append(("lambda x: x.z * x.s * x.c", hitrate(ar, k, partition_size)))

In [None]:
ar = Assos_Recommender(rules, lambda x: x.z * x.s * x.s *x.c)
results.append(("lambda x: x.z * x.s * x.s * x.c", hitrate(ar, k, partition_size)))

In [None]:
ar = Assos_Recommender(rules, lambda x: np.sqrt(np.power(x.z, 2) + np.power(x.lift, 2) + np.power(x.s, 2) + np.power(x.s, 2) + np.power(x.c, 2)))
results.append(("lambda x: np.sqrt(np.power(x.z, 2) + np.power(x.lift, 2) + np.power(x.s, 2) + np.power(x.s, 2) + np.power(x.c, 2))", hitrate(ar, k, partition_size)))

In [None]:
ar = Assos_Recommender(rules, lambda x: np.sqrt(np.power(x.z, 2) + np.power(x.lift, 2) + np.power(x.s, 2) + np.power(x.s, 2) + np.power(x.c, 2)))
results.append(("lambda x: np.sqrt(np.power(x.z, 2) + np.power(x.lift, 2) + np.power(x.s, 2) + np.power(x.s, 2) + np.power(x.c, 2))", hitrate(ar, k, partition_size)))

In [None]:
for i in sorted(results, key=lambda x: x[1]["HR"], reverse=True):
    print(i[0])
    print(i[1])
    print("-"*5, '\n')

# Results
sum(rule.s * rule.c) --> 23.020

In [None]:
ur = User_Recommender(user_rules)
hr = hitrate(ur, k, partition_size)

no filter: 36139; 08554 <br>
25: idem<br>

In [None]:
score_function = lambda x: x.lift * x.s * x.c

ar = Assos_Recommender(rules, score_function)
ur = User_Recommender(user_rules)
cr = Combo_Recommender(ar, ur)
hr = hitrate(cr, k, partition_size)

In [None]:
df_test2 = pickle.load(open(f"pickle_dumps/test_df_{seed_index}.p", "rb"))
df_test2 = df_test2.set_index("user_id").to_dict()["item_id"]

In [None]:
df_test2 = { key: df_test2[key] for key in [69, 420, 42069] }

In [None]:
results = hitrate(ar, k, 1, True, df_test2)

In [None]:
for prediction in results["predictions"]:
    items = prediction[0]
    target = prediction[1]
    recommended = prediction[2]
#     print(items)
    print(target)
    print(recommended)
    print("-"*5)