## 0. Import packages

In [1]:
import pandas as pd
import gzip
import math
from tqdm import tqdm
from itertools import chain, combinations
from Association import Association
import numpy as np
tqdm.pandas() #for progres_apply etc.

## 1. Load data from previous step

In [2]:
import pickle
rules = pickle.load(open("pickle_dumps/article_rules.p", "rb"))
user_rules = pickle.load(open("pickle_dumps/user_rules.p", "rb"))

In [18]:
df_test = pickle.load(open("pickle_dumps/test_df.p", "rb"))
df_test = df_test.set_index("user_id").to_dict()["test_split"]

In [19]:
df_test

{1528: [([1155,
    1688,
    454,
    4267,
    183,
    5117,
    159,
    468,
    187,
    82,
    479,
    482,
    491,
    497,
    1295,
    1439,
    1907,
    2005,
    1037,
    1159,
    2482,
    2007,
    2484,
    2614,
    1910,
    2009,
    3129,
    2010,
    762,
    1912,
    1705,
    1304,
    1305,
    1306,
    3227,
    3965,
    1309,
    1445,
    1310,
    2249,
    198,
    199,
    1314,
    1079,
    1622,
    1081,
    336,
    337,
    1080,
    1625,
    1082,
    5825,
    1576,
    1577,
    1578,
    548,
    557,
    207,
    212,
    213,
    566,
    240,
    3757,
    3043,
    1730,
    2461,
    219,
    1731,
    1733,
    4646,
    2135,
    612,
    3364,
    3686,
    2943,
    585,
    231,
    2018,
    1334,
    667,
    2253,
    449,
    1795,
    677,
    3134,
    697,
    2167,
    699,
    1555,
    2153,
    711,
    5240,
    2435,
    3763,
    2256,
    2019,
    2257,
    3313,
    2955,
    2382,
    2303,
    2260,
    230

In [4]:
df_train = pickle.load(open("pickle_dumps/train_df.p", "rb"))
df_train = df_train.set_index("user_id").to_dict()["item_id"]

In [5]:
class Recommender():
    def __init__(self, assos):
        pass
    
    def recommend(self, user_items, k, user_id=None, split=None):
        pass        

In [10]:
def hitrate(rc, k):
    hits = 0
    ndcg = []
    counts = 0
    avg_len = 0
    
    idcg = sum(1/np.log2(i+1) for i in range(1,k+1))
    
    for user in tqdm(df_test):
        split_user = df_test[user]
        for split_id, split in enumerate(split_user):
            r = rc.recommend(split[0], k, user, split_id)
        
            avg_len += len(r)/len(split_user)
            
            for item in split[1]:
                if item in r:
                    hits += (item in r) / len(split_user)
                    break
            
            dcg = 0
            for i, rec in enumerate(r):
                good = 1 if rec in split[1] else 0
                dcg += (2**good - 1)/(np.log2(i+2))
            ndcg.append(dcg/idcg)
            
                
                
        counts += 1
    
    print("avg len: {:f}".format(avg_len / len(df_test)))
    if k == -1:
        print("HR\t{:.5f}".format((hits / counts)))
    else:
        print("HR@{}\t{:.5f}".format(k, (hits / counts)))
    print("nDCG@{}\t{:.5f}".format(k, (sum(ndcg) / len(ndcg))))
    return hits / counts

In [8]:
class Assos_Recommender(Recommender):
    def __init__(self, assos):
        # use popular items to fill unused space
        # TODO (df_train is used for this)
        
        self.assos = sorted(assos, key=lambda x: x.s, reverse=True)
        
#     def get_items(self, user_id):
#         return self.df_test[user_id]
    
    def recommend(self, user_items, k, user_id=None, split=None):
        recommendation = set()
        for a in self.assos:
            for l in a.left:
                # break if any element is not in the user session
                if l not in user_items:
                    break
                    
                # all elements are in the user session --> add RHS of rule to the recommendation
                r = list(a.right)[0]
                if r not in user_items:
                    recommendation.add(r)
                        
            if len(recommendation) >= k and k != -1:
                break
                
        return recommendation

# Results
rule.s --> 18.317 <br>
rule.s * rule.c --> 20.545

In [11]:
ar = Assos_Recommender(rules)
hr = hitrate(ar, 10)

100%|█████████████████████████████████████████| 101/101 [00:40<00:00,  2.49it/s]

avg len: 10.000000
HR@10	0.24257
nDCG@10	0.03086





In [22]:
class User_Recommender(Recommender):
    def __init__(self, assos):
        self.assos = assos
    
    def recommend(self, user_items, k, user_id, split):
        user_items = set(user_items)
        rdict = dict()
        for a in self.assos[split]:
            if user_id in a.right:
                item_list = list()
                for l in a.left:
                    if l in df_test:
                        item_list.append(set(df_test[l][split][0]))
                    if l in df_train:
                        item_list.append(set(df_train[l]))
                        
                if item_list:
                    intersection = set.intersection(*item_list)
                    for item in intersection - user_items:
                        if item in rdict:
                            rdict[item] += (a.c * a.s)
                        else:
                            rdict[item] = (a.c * a.s)

                            
#         if rdict.values():   
#             if max(rdict.values()) < 50:
#                 return set()
            
        rlist = sorted(rdict.keys(), key=lambda x: rdict[x], reverse=True)

        recommendation = set()
        for i in rlist:
            recommendation.add(i)
            if len(recommendation) >= k and k != -1:
                break
        return recommendation

# Results
sum(rule.s * rule.c) --> 23.020

In [23]:
ur = User_Recommender(user_rules)
hr = hitrate(ur, 10)

100%|█████████████████████████████████████████| 101/101 [00:39<00:00,  2.53it/s]

avg len: 2.648515
HR@10	0.19059
nDCG@10	0.05762





In [24]:
class Combo_Recommender(Recommender):
    def __init__(self, ar, ur):
        self.ar = ar
        self.ur = ur
        
    def recommend(self, user_items, k, user_id, split):
        recommendation = self.ur.recommend(user_items, k, user_id, split)
        if len(recommendation) < k:
            recommendation.update(self.ar.recommend(user_items, k, user_id, split))
        return recommendation

In [25]:
cr = Combo_Recommender(ar, ur)
hr = hitrate(cr, 10)

100%|█████████████████████████████████████████| 101/101 [01:09<00:00,  1.45it/s]

avg len: 10.000000
HR@10	0.38119
nDCG@10	0.08085





In [26]:
class Combo_Recommender2(Recommender):
    def __init__(self, ar, ur):
        self.ar = ar
        self.ur = ur
        
    def recommend(self, user_items, k, user_id, split):
        recommendation = self.ur.recommend(user_items, 8, user_id, split)
        if len(recommendation) < k:
            recommendation.update(self.ar.recommend(user_items, k - len(recommendation), user_id, split))
        return recommendation

In [27]:
cr = Combo_Recommender2(ar, ur)
hr = hitrate(cr, 10)

100%|█████████████████████████████████████████| 101/101 [01:10<00:00,  1.43it/s]

avg len: 10.000000
HR@10	0.38614
nDCG@10	0.07772



