# `get_rec_item` Function

If the recs are item clusters and you want to get actual items, use this!

In [1]:
import numpy as np
import pandas as pd

from random_gen import *
from get_rec import *

def get_rec_item(df_rec, top_k, ic_assignment):
    
    """Returns the top K item recommendations for each user in the user list. 
    Items are selected randomly from the top recommended item cluster, exhaustively. Left overs are taken from the next highest ranked item clusters in a cascading fashion.
    
            Parameters:
                    df_rec (pandas.DataFrame): Table containing the top N item cluster recommendations for each user in the user list
                    ic_assignment (array-like): List containing the cluster assignment of each item
                    top_n (int): Number of items to recommend

            Returns:
                    df_rec_item (pandas.DataFrame): Table containing the top K item recommendations for each user in the user list
                    
    """

    # Create recommendation table
    df_rec_item = pd.DataFrame()
    df_rec_item['user_id'] = df_rec['user_id']

    for i in range(top_k):
        df_rec_item['rank_'+str(i+1)] = np.zeros(df_rec_item.shape[0])

    # Get items
    for j in range(df_rec_item.shape[0]):
        item_rec = []
        rank = 0
        while len(item_rec) < top_k:
            item_list = np.where(ic_assignment == df_rec.iloc[j, rank+1])[0]
            if top_k-len(item_rec) > len(item_list):
                item_rec = item_rec + list(item_list)
                rank += 1
            else:
                item_rec = item_rec + list(np.random.choice(item_list, size=top_k-len(item_rec), replace=False))
        df_rec_item.iloc[j, 1:] = item_rec
                
    return df_rec_item

## Example

In [2]:
n_user = 100
n_item = 50
sample_size = 10
n_user_cluster = 5
n_item_cluster = 5
top_n = 3
random_seed = 1

user_id_list = list(range(n_user))
user_list = random_user_list(n_user, sample_size, random_seed)
uc_assignment = random_user_cluster(n_user, n_user_cluster, random_seed)
utility_matrix_o, utility_matrix = random_utility_matrix(n_user_cluster, n_item_cluster, random_seed)
df_rec = get_rec(utility_matrix, utility_matrix_o, user_list, top_n, uc_assignment)
df_rec

Unnamed: 0,user_id,rank_1,rank_2,rank_3
0,80,1.0,2.0,4.0
1,84,3.0,4.0,1.0
2,33,3.0,4.0,1.0
3,81,0.0,3.0,1.0
4,93,3.0,1.0,0.0
5,17,0.0,3.0,1.0
6,36,3.0,1.0,0.0
7,82,1.0,2.0,4.0
8,69,4.0,2.0,0.0
9,65,3.0,1.0,0.0


In [3]:
ic_assignment = random_user_cluster(n_item, n_item_cluster, random_seed=2)
ic_assignment

array([0, 0, 3, 2, 3, 0, 2, 1, 3, 2, 4, 4, 4, 3, 4, 2, 3, 3, 2, 1, 2, 4,
       3, 0, 4, 3, 1, 2, 0, 4, 4, 2, 4, 2, 1, 0, 2, 2, 1, 0, 1, 0, 2, 1,
       1, 1, 4, 2, 3, 0])

In [4]:
top_k = 10
df_rec_item = get_rec_item(df_rec, top_k, ic_assignment)
df_rec_item

Unnamed: 0,user_id,rank_1,rank_2,rank_3,rank_4,rank_5,rank_6,rank_7,rank_8,rank_9,rank_10
0,80,7.0,19.0,26.0,34.0,38.0,40.0,43.0,44.0,45.0,15.0
1,84,2.0,4.0,8.0,13.0,16.0,17.0,22.0,25.0,48.0,32.0
2,33,2.0,4.0,8.0,13.0,16.0,17.0,22.0,25.0,48.0,21.0
3,81,0.0,1.0,5.0,23.0,28.0,35.0,39.0,41.0,49.0,48.0
4,93,2.0,4.0,8.0,13.0,16.0,17.0,22.0,25.0,48.0,38.0
5,17,0.0,1.0,5.0,23.0,28.0,35.0,39.0,41.0,49.0,2.0
6,36,2.0,4.0,8.0,13.0,16.0,17.0,22.0,25.0,48.0,43.0
7,82,7.0,19.0,26.0,34.0,38.0,40.0,43.0,44.0,45.0,47.0
8,69,21.0,46.0,29.0,24.0,32.0,10.0,14.0,30.0,11.0,12.0
9,65,2.0,4.0,8.0,13.0,16.0,17.0,22.0,25.0,48.0,43.0


In [5]:
df_rec_item.to_numpy()

array([[80.,  7., 19., 26., 34., 38., 40., 43., 44., 45., 15.],
       [84.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 32.],
       [33.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 21.],
       [81.,  0.,  1.,  5., 23., 28., 35., 39., 41., 49., 48.],
       [93.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 38.],
       [17.,  0.,  1.,  5., 23., 28., 35., 39., 41., 49.,  2.],
       [36.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 43.],
       [82.,  7., 19., 26., 34., 38., 40., 43., 44., 45., 47.],
       [69., 21., 46., 29., 24., 32., 10., 14., 30., 11., 12.],
       [65.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 43.]])

## Unit Test

In [None]:
import unittest

class TestGetRecItem(unittest.TestCase):
    
    def test_1(self):
        
        # Set-up
        n_user = 100
        n_item = 50
        sample_size = 10
        n_user_cluster = 5
        n_item_cluster = 5
        random_seed = 1
        top_n = 3
        top_k = 10

        user_id_list = list(range(n_user))
        user_list = random_user_list(n_user, sample_size, random_seed)
        uc_assignment = random_user_cluster(n_user, n_user_cluster, random_seed)
        utility_matrix_o, utility_matrix = random_utility_matrix(n_user_cluster, n_item_cluster, random_seed)
        df_rec = get_rec(utility_matrix, utility_matrix_o, user_list, top_n, uc_assignment)
        
        ic_assignment = random_user_cluster(n_item, n_item_cluster, random_seed=2)
        
        df_rec_item = get_rec_item(df_rec, top_k, ic_assignment)
        
        test_case = array([
            [80.,  7., 19., 26., 34., 38., 40., 43., 44., 45., 15.],
            [84.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 32.],
            [33.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 21.],
            [81.,  0.,  1.,  5., 23., 28., 35., 39., 41., 49., 48.],
            [93.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 38.],
            [17.,  0.,  1.,  5., 23., 28., 35., 39., 41., 49.,  2.],
            [36.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 43.],
            [82.,  7., 19., 26., 34., 38., 40., 43., 44., 45., 47.],
            [69., 21., 46., 29., 24., 32., 10., 14., 30., 11., 12.],
            [65.,  2.,  4.,  8., 13., 16., 17., 22., 25., 48., 43.]
        ])
        
        self.assertEqual(df_rec_item.to_numpy().tolist(), test_case.tolist())
        
    def test_2(self):
        
        # Set-up
        n_user = 100
        sample_size = 10
        n_user_cluster = 5
        n_item_cluster = 5
        random_seed = 2
        top_n = 3

        user_id_list = list(range(n_user))
        user_list = random_user_list(n_user, sample_size, random_seed)
        uc_assignment = random_user_cluster(n_user, n_user_cluster, random_seed)
        utility_matrix_o, utility_matrix = random_utility_matrix(n_user_cluster, n_item_cluster, random_seed)

        df_rec = get_rec(utility_matrix, utility_matrix_o, user_list, top_n, uc_assignment)
        
        test_case = np.array([
            [80.,  1.,  2.,  4.],
            [84.,  3.,  4.,  1.],
            [33.,  3.,  4.,  1.],
            [81.,  0.,  3.,  1.],
            [93.,  3.,  1.,  0.],
            [17.,  0.,  3.,  1.],
            [36.,  3.,  1.,  0.],
            [82.,  1.,  2.,  4.],
            [69.,  4.,  2.,  0.],
            [65.,  3.,  1.,  0.]
        ])
        
        self.assertEqual(df_rec.to_numpy().tolist(), test_case.tolist())
        
unittest.main(argv=[''], verbosity=2, exit=False)