In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import sys
import pandas as pd
import numpy as np
import json
from tqdm import tqdm
import matplotlib.pyplot as plt
from pprint import pprint
sys.path.append('..')

from model import BoxRec, BoxRecConditional
from model import MatrixFactorization, MatrixFactorizationWithBias
from data_loaders.data_processing import JointDataProcessing
from model.box.utils import log1mexp

# Load Models

## Functionalities

In [None]:
def load_model(model_dir):
    arg_dict = json.load(open(model_dir + '/args.json', 'r'))
    if arg_dict['model'] == 'box':
        model = BoxRec(
                n_users=arg_dict['num_users'],
                n_items=arg_dict['num_items'],
                embedding_dim=arg_dict['embedding_dim'],
                volume_temp=arg_dict['volume_temp'],
                intersection_temp=arg_dict['intersection_temp']
        )
    elif arg_dict['model'] == 'box_conditional':
        model = BoxRecConditional(
                n_users=arg_dict['num_users'],
                n_items=arg_dict['num_items'],
                embedding_dim=arg_dict['embedding_dim'],
                volume_temp=arg_dict['volume_temp'],
                intersection_temp=arg_dict['intersection_temp']
        )
    elif arg_dict['model'] == 'mf_bias':
        model = MatrixFactorizationWithBias(
                    n_users=arg_dict['num_users'],
                    n_items=arg_dict['num_items'],
                    embedding_dim=arg_dict['embedding_dim'],
        )
    elif arg_dict['model'] == 'mf':
        model = MatrixFactorization(
                    n_users=arg_dict['num_users'],
                    n_items=arg_dict['num_items'],
                    embedding_dim=arg_dict['embedding_dim'],
        )
    else:
        raise ValueError('Invalid model type')
    model.load_state_dict(torch.load(model_dir + '/model_best_ndcg.pth',
                                     map_location=torch.device('cpu')))
    return model, arg_dict

## Load Box Model Genre

In [None]:
box_cond_model_dir = "../model/ml1m/box_conditional/user_genre_movie/dim_64-negs_20"
model_box_mg, arg_dict_box_mg = load_model(box_cond_model_dir)

## Load Vector Model Genre

In [None]:
vector_model_dir = "../model/ml1m/mf_bias/user_genre_movie/dim_128-negs_10"
model_vector_mg, arg_dict_vector_mg = load_model(vector_model_dir)

# Load dataloader

## functionalities

In [None]:
def gt_df_to_matrix(dataset, gt_df):
    columns = gt_df.columns
    n_rows = len(gt_df[columns[0]].unique())
    n_cols = dataset.n_movies
    gt_matrix = torch.zeros((n_rows, n_cols), dtype=torch.bool)  
    gt_matrix[gt_df[columns[0]], gt_df[columns[1]]] = 1
    return gt_matrix

def get_hr_ndcg_101(scores):
    target_idx = torch.tensor(scores.shape[1] - 1)
    pred_order = torch.argsort(scores, dim=-1, descending=True)
    rank = torch.where(pred_order == target_idx)[1] + 1
    hr_101 = sum(rank <= 10) / len(rank)
    ndcg_101 = sum(1.0 / torch.log2(rank + 1)) / len(rank)
    return hr_101.item(), ndcg_101.item()

def get_hr_ndcg_at_k(rank_list):
    rank_list = np.array(rank_list)
    hr_list = []
    for k in [10, 20, 50]:
        hr = sum(rank_list <= k) / len(rank_list)
        hr_list.append(hr)
        print(f'HR@{k}: {hr}')
    ndcg = sum(1.0 / np.log2(rank_list + 1 + 1)) / len(rank_list)
    print(f'NDCG: {ndcg}')
    return hr_list, ndcg

## Dataloader class

In [None]:
def get_dataset(arg_dict):
    dataset = JointDataProcessing(data_dir='../' + arg_dict['data_dir'],
                                        dataset_type=arg_dict['dataset'],
                                        batch_size=arg_dict['batch_size'])
    return dataset


## Get test and validation numbers
- This is to see if the model loading has worked and if their is any anomaly in the model.
- Also the test numbers will be reported.

#### Functionalities

In [None]:
def check_test_results(dataset, model, mode='valid'):
    if mode == 'valid':
        user_movie_dict = dataset.val_neg_user_movie.to_dict('list')
        attribute_movie_dict = dataset.val_neg_attribute_movie.to_dict('list')
    elif mode == 'test':
        user_movie_dict = dataset.test_neg_user_movie.to_dict('list')
        attribute_movie_dict = dataset.test_neg_attribute_movie.to_dict('list')

    users = [int(x) for x in user_movie_dict.keys()]
    items = [x for x in user_movie_dict.values()]
    users = torch.tensor(users)
    items = torch.tensor(items)
    scores = model(users, items)
    valid_hr, valid_ndcg = get_hr_ndcg_101(scores)

    attributes = [int(x) + dataset.n_users for x in attribute_movie_dict.keys()] ### might be wrong
    items = [x for x in attribute_movie_dict.values()]
    attributes = torch.tensor(attributes)
    items = torch.tensor(items)
    scores = model(attributes, items)
    valid_hr_attr, valid_ndcg_attr = get_hr_ndcg_101(scores)

    return {'hr': valid_hr, 
            'ndcg': valid_ndcg, 
            'hr_attr': valid_hr_attr, 
            'ndcg_attr': valid_ndcg_attr
            }


# Inference

#### Utility Functions

In [None]:
def vector_predict(combination_vector, model):
    all_items = torch.arange(model.n_items)
    item_vectors = model.item_embeddings(all_items)
    scores = (combination_vector * item_vectors).sum(dim=-1)
    return scores

def box_predict(combination_box, model):
    all_items = torch.arange(model.n_items)
    item_boxes = model.item_embeddings(all_items)
    if model.intersection_temp == 0.0:
        scores = combination_box.intersection_log_soft_volume(
            item_boxes, volume_temp=model.volume_temp
        )
    else:
        scores = combination_box.gumbel_intersection_log_volume(
            item_boxes,
            volume_temp=model.volume_temp,
            intersection_temp=model.intersection_temp,
        )
    return scores

def box_predict_conditional(combination_box, model):
    all_items = torch.arange(model.n_items)
    item_boxes = model.item_embeddings(all_items)
    if model.intersection_temp == 0.0:
        intersection_scores = combination_box.intersection_log_soft_volume(
            item_boxes, volume_temp=model.volume_temp
        )
    else:
        intersection_scores = combination_box.gumbel_intersection_log_volume(
            item_boxes,
            volume_temp=model.volume_temp,
            intersection_temp=model.intersection_temp,
        )
    log_volume_items = item_boxes.log_soft_volume_adjusted(volume_temp=model.volume_temp,
                                                      intersection_temp=model.intersection_temp)
    conditional_prob = intersection_scores - log_volume_items
    assert (conditional_prob <= 0).all(), "Log probability can not be positive"
    return conditional_prob
 

## $U \cap A$

In [None]:
def get_ranking_metrices(query_df,
                         model,
                         arg_dict,
                         gt_matrix,
                         predicted_a_m_matrix,
                         apply_mask = True):

    rank_u_mult_a = []
    rank_u_int_a = []
    rank_a_and_u = []
    for user_id, attribute_id, movie_id in tqdm(zip(query_df['user_id'], query_df['attribute_id'], query_df['movie_id'])):
        u = torch.tensor([user_id])
        a = torch.tensor([attribute_id + n_users])
        m = torch.tensor([movie_id])
        if apply_mask:
            mask = torch.ones(model_vector_mg.n_items, dtype=torch.bool)
            gt_m = torch.where(gt_matrix[user_id, attribute_id] == 1)[0]
            mask[gt_m] = False
            mask[m] = True

        score_u = model.predict_item(u)
        score_a = model.predict_item(a)

        ## Score multiplication
        if arg_dict['model'] == 'box_conditional':
            score_u_mult_a = score_u + score_a
        elif arg_dict['model'] == 'mf_bias':
            score_u_mult_a = F.logsigmoid(score_u) + F.logsigmoid(score_a)
        if apply_mask:
            score_u_mult_a[~mask] = -torch.inf
        order_u_mult_a = torch.argsort(score_u_mult_a, dim=-1, descending=True)

        ## Matrix completion
        a_m = torch.where(predicted_a_m_matrix[attribute_id])[0]
        if m not in a_m:
            rank_a_and_u.append(model.n_items)
        else:
            if apply_mask:
                score_u[~mask] = -torch.inf
            order_u = torch.argsort(score_u, dim=-1, descending=True)
            a_in_u = (order_u[:, None] == a_m).nonzero()[:, 0]
            order_a_in_u = order_u[a_in_u]
            rank_a_and_u.append(torch.where(order_a_in_u == m)[0].item())

        ## Score intersection
        if arg_dict['model'] == 'box_conditional':
            u_box = model.user_embeddings(u)
            a_box = model.user_embeddings(a)
            u_int_a_box = u_box.intersection(a_box)
            u_int_a_box = u_box.intersection(
                        a_box,
                        intersection_temp=model.intersection_temp,
                        bayesian=True)
            u_int_a_scores = box_predict_conditional(u_int_a_box, model)
        elif arg_dict['model'] == 'mf_bias':
            u_vector = model.user_embeddings(u)
            a_vector = model.user_embeddings(a)
            u_int_a_vector = u_vector + a_vector
            u_int_a_scores = vector_predict(u_int_a_vector, model)
        
        if apply_mask:  
            u_int_a_scores[~mask] = -torch.inf
        order_u_int_a = torch.argsort(u_int_a_scores, dim=-1, descending=True)

        # Get ranks
        rank_u_mult_a.append(torch.where(order_u_mult_a == m)[0].item())
        rank_u_int_a.append(torch.where(order_u_int_a == m)[0].item())

    print("Calculating metrics for user*attribute")
    hr_u_mult_a, ndcg_u_mult_a = get_hr_ndcg_at_k(rank_u_mult_a)
    print("Calculating metrics for user intersection attribute")
    hr_u_int_a, ndcg_u_int_a = get_hr_ndcg_at_k(rank_u_int_a)
    print("Calculating metrics for attribute and user threshold")
    hr_a_and_u, ndcg_a_and_u = get_hr_ndcg_at_k(rank_a_and_u)


    return {
            'hr_u_mult_a': hr_u_mult_a, 'ndcg_u_mult_a': ndcg_u_mult_a,
            'hr_u_int_a': hr_u_int_a, 'ndcg_u_int_a': ndcg_u_int_a,
            'hr_a_and_u': hr_a_and_u, 'ndcg_a_and_u': ndcg_a_and_u
            }

## $ U \cap A_1 \cap A_2$

In [None]:
def get_ranking_metrices_intersection(query_df,
                                      model,
                                      arg_dict,
                                      gt_matrix,
                                      predicted_a_m_matrix,
                                      apply_mask=True):
    keep_top = 300
    rank_u_a1_a2_mult = []
    rank_u_a1_a2_int = []
    rank_u_a1_a2_and = []
    for user_id, movie_id, attribute_1, attribute_2 in tqdm(query_df.values):
        u = torch.tensor([user_id])
        m = torch.tensor([movie_id])
        a1 = torch.tensor([attribute_1 + n_users])
        a2 = torch.tensor([attribute_2 + n_users])

        if apply_mask:
            mask = torch.ones(model_vector_mg.n_items, dtype=torch.bool)
            gt_m = torch.where(gt_matrix[user_id, attribute_1, attribute_2] == 1)[0]
            mask[gt_m] = False
            mask[m] = True

        score_u = model.predict_item(u)
        score_a1 = model.predict_item(a1)
        score_a2 = model.predict_item(a2)

        ## Score multiplication
        if arg_dict['model'] == 'box_conditional':
            score_u_a1_a2_mult = score_u + score_a1 + score_a2
        elif arg_dict['model'] == 'mf_bias':
            score_u_a1_a2_mult = F.logsigmoid(score_u) + F.logsigmoid(score_a1) + F.logsigmoid(score_a2)
        
        if apply_mask:
            score_u_a1_a2_mult[~mask] = -torch.inf
        order_u_a1_a2_mult = torch.argsort(score_u_a1_a2_mult, dim=-1, descending=True)

        ## Score intersection
        if arg_dict['model'] == 'box_conditional':
            u_box = model.user_embeddings(u)
            a1_box = model.user_embeddings(a1)
            a2_box = model.user_embeddings(a2)
            u_int_a1_box = u_box.intersection(
                a1_box,
                intersection_temp=arg_dict['intersection_temp'],
                bayesian=True)
            u_a1_a2_int_box = u_int_a1_box.intersection(
                a2_box,
                intersection_temp=arg_dict['intersection_temp'],
                bayesian=True)
            u_a1_a2_scores = box_predict_conditional(u_a1_a2_int_box, model)
        elif arg_dict['model'] == 'mf_bias':
            u_vector = model.user_embeddings(u)
            a1_vector = model.user_embeddings(a1)
            a2_vector = model.user_embeddings(a2)
            u_a1_a2_vector = u_vector + a1_vector + a2_vector
            u_a1_a2_scores = vector_predict(u_a1_a2_vector, model)
        
        if apply_mask:
            u_a1_a2_scores[~mask] = -torch.inf
        order_u_a1_a2_int = torch.argsort(u_a1_a2_scores, dim=-1, descending=True)

        ## Matrix completion
        a1_m = torch.where(predicted_a_m_matrix[attribute_1])[0].tolist()
        a2_m = torch.where(predicted_a_m_matrix[attribute_2])[0].tolist()
        a1_a2_m = set(a1_m).intersection(set(a2_m))
        if m.item() not in a1_a2_m:
            rank_u_a1_a2_and.append(model.n_items)
        else:
            if apply_mask:
                score_u[~mask] = -torch.inf
            order_u = torch.argsort(score_u, dim=-1, descending=True)
            a1_a2_m = torch.tensor(list(a1_a2_m))
            a1_a2_in_u = (order_u[:, None] == a1_a2_m).nonzero()[:, 0]
            order_a1_a2_in_u = order_u[a1_a2_in_u]
            rank_u_a1_a2_and.append(torch.where(order_a1_a2_in_u == m)[0].item())


        ## get ranks
        rank_u_a1_a2_mult.append(torch.where(order_u_a1_a2_mult == m)[0].item())
        rank_u_a1_a2_int.append(torch.where(order_u_a1_a2_int == m)[0].item())

    print("Post hoc filtering")
    hr_u_a1_a2_and, ndcg_u_a1_a2_and = get_hr_ndcg_at_k(rank_u_a1_a2_and)
    print("Calculating metrics for user*attribute1*attribute2")
    hr_u_a1_a2_mult, ndcg_u_a1_a2_mult = get_hr_ndcg_at_k(rank_u_a1_a2_mult)
    print("Calculating metrics for user intersection attribute1 intersection attribute2")
    hr_u_a1_a2_int, ndcg_u_a1_a2_int = get_hr_ndcg_at_k(rank_u_a1_a2_int)

    return {
            'hr_u_a1_a2_mult': hr_u_a1_a2_mult, 'ndcg_u_a1_a2_mult': ndcg_u_a1_a2_mult,
            'hr_u_a1_a2_int': hr_u_a1_a2_int, 'ndcg_u_a1_a2_int': ndcg_u_a1_a2_int,
            'hr_u_a1_a2_and': hr_u_a1_a2_and, 'ndcg_u_a1_a2_and': ndcg_u_a1_a2_and
            }

## $ U \cap A_1 \cap \neg A_2$

In [None]:
def get_ranking_metrices_difference(query_df,
                                    model,
                                    arg_dict,
                                    gt_matrix,
                                    predicted_a_m_matrix,
                                    apply_mask=True):
    keep_top = 300
    rank_u_a1_not_a2_mult = []
    rank_u_a1_not_a2_int = []
    rank_u_a1_not_a2_and = []
    for user_id, movie_id, attribute_1, attribute_2 in tqdm(query_df.values):
        u = torch.tensor([user_id])
        m = torch.tensor([movie_id])
        a1 = torch.tensor([attribute_1 + n_users])
        a2 = torch.tensor([attribute_2 + n_users])

        if apply_mask:
            mask = torch.ones(model_vector_mg.n_items, dtype=torch.bool)
            gt_m = torch.where(gt_matrix[user_id, attribute_1, attribute_2] == 1)[0]
            mask[gt_m] = False
            mask[m] = True

        score_u = model.predict_item(u)
        score_a1 = model.predict_item(a1)
        score_a2 = model.predict_item(a2)
        

        ## Score Multiplication
        if arg_dict['model'] == 'box_conditional':
            score_u_a1_not_a2_mult = score_u + score_a1 + log1mexp(score_a2)
        elif arg_dict['model'] == 'mf_bias':
            score_u_a1_not_a2_mult = F.logsigmoid(score_u) + F.logsigmoid(score_a1) + F.logsigmoid(-score_a2)
        
        if apply_mask:
            score_u_a1_not_a2_mult[~mask] = -torch.inf
        order_u_a1_not_a2_mult = torch.argsort(score_u_a1_not_a2_mult, dim=-1, descending=True)

        ## Score Set Operations
        if arg_dict['model'] == 'box_conditional':
            u_box = model.user_embeddings(u)
            a1_box = model.user_embeddings(a1)
            a2_box = model.user_embeddings(a2)
            u_int_a1_box = u_box.intersection(
                a1_box,
                intersection_temp=model.intersection_temp,
                bayesian=True)
            u_a1_a2_int_box = u_int_a1_box.intersection(
                a2_box,
                intersection_temp=model.intersection_temp,
                bayesian=True)
        
            u_a1_a2_scores = box_predict_conditional(u_a1_a2_int_box, model)
            u_a1_scores = box_predict_conditional(u_int_a1_box, model)
            scores_u_a1_not_a2_int = u_a1_scores + log1mexp(u_a1_a2_scores - u_a1_scores)
        elif arg_dict['model'] == 'mf_bias':
            u_vector = model.user_embeddings(u)
            a1_vector = model.user_embeddings(a1)
            a2_vector = model.user_embeddings(a2)
            u_a1_not_a2_vector = u_vector + a1_vector - a2_vector
            scores_u_a1_not_a2_int = vector_predict(u_a1_not_a2_vector, model)
        
        if apply_mask:
            scores_u_a1_not_a2_int[~mask] = -torch.inf
        order_u_a1_not_a2_int = torch.argsort(scores_u_a1_not_a2_int, dim=-1, descending=True)

        ## Matrix completion
        a1_m = torch.where(predicted_a_m_matrix[attribute_1])[0].tolist()
        a2_m = torch.where(predicted_a_m_matrix[attribute_2])[0].tolist()
        a1_not_a2_m = set(a1_m).difference(set(a2_m))
        
        if m not in a1_not_a2_m:
            rank_u_a1_not_a2_and.append(model.n_items)
        else:
            if apply_mask:
                score_u[~mask] = -torch.inf
            order_u = torch.argsort(score_u, dim=-1, descending=True)
            a1_not_a2_in_u = (order_u[:, None] == torch.tensor(list(a1_not_a2_m))).nonzero()[:, 0]
            order_a1_not_a2_in_u = order_u[a1_not_a2_in_u]
            rank_u_a1_not_a2_and.append(torch.where(order_a1_not_a2_in_u == m)[0].item())
        
        ## get ranks
        rank_u_a1_not_a2_mult.append(torch.where(order_u_a1_not_a2_mult == m)[0].item())
        rank_u_a1_not_a2_int.append(torch.where(order_u_a1_not_a2_int == m)[0].item())
    
    print("Post hoc filtering")
    hr_u_a1_not_a2_and, ndcg_u_a1_not_a2_and = get_hr_ndcg_at_k(rank_u_a1_not_a2_and)

    print("Calculating metrics for user*attribute1-not-attribute2")
    hr_u_a1_not_a2_mult, ndcg_u_a1_not_a2_mult = get_hr_ndcg_at_k(rank_u_a1_not_a2_mult)
    print("Calculating metrics for user intersection attribute1 - intersection attribute2")
    hr_u_a1_not_a2_int, ndcg_u_a1_not_a2_int = get_hr_ndcg_at_k(rank_u_a1_not_a2_int)

    return {
            'hr_u_a1_not_a2_mult': hr_u_a1_not_a2_mult, 'ndcg_u_a1_not_a2_mult': ndcg_u_a1_not_a2_mult,
            'hr_u_a1_not_a2_int': hr_u_a1_not_a2_int, 'ndcg_u_a1_not_a2_int': ndcg_u_a1_not_a2_int,
            'hr_u_a1_not_a2_and': hr_u_a1_not_a2_and, 'ndcg_u_a1_not_a2_and': ndcg_u_a1_not_a2_and
            }


In [None]:
def get_ranking_metrices_difference_no_condition(query_df,
                                    model,
                                    arg_dict,
                                    gt_matrix,
                                    predicted_a_m_matrix,
                                    apply_mask=True):
    keep_top = 300
    rank_u_a1_not_a2_int = []
    for user_id, movie_id, attribute_1, attribute_2 in tqdm(query_df.values):
        u = torch.tensor([user_id])
        m = torch.tensor([movie_id])
        a1 = torch.tensor([attribute_1 + n_users])
        a2 = torch.tensor([attribute_2 + n_users])

        if apply_mask:
            mask = torch.ones(model_vector_mg.n_items, dtype=torch.bool)
            gt_m = torch.where(gt_matrix[user_id, attribute_1, attribute_2] == 1)[0]
            mask[gt_m] = False
            mask[m] = True

        ## Score Set Operations
        if arg_dict['model'] == 'box_conditional':
            u_box = model.user_embeddings(u)
            a1_box = model.user_embeddings(a1)
            a2_box = model.user_embeddings(a2)
            u_int_a1_box = u_box.intersection(
                a1_box,
                intersection_temp=model.intersection_temp,
                bayesian=True)
            u_a1_a2_int_box = u_int_a1_box.intersection(
                a2_box,
                intersection_temp=model.intersection_temp,
                bayesian=True)
        
            u_a1_a2_scores = box_predict(u_a1_a2_int_box, model)
            u_a1_scores = box_predict(u_int_a1_box, model)
            scores_u_a1_not_a2_int = u_a1_scores + log1mexp(u_a1_a2_scores - u_a1_scores)
        elif arg_dict['model'] == 'mf_bias':
            u_vector = model.user_embeddings(u)
            a1_vector = model.user_embeddings(a1)
            a2_vector = model.user_embeddings(a2)
            u_a1_not_a2_vector = u_vector + a1_vector - a2_vector
            scores_u_a1_not_a2_int = vector_predict(u_a1_not_a2_vector, model)
        
        if apply_mask:
            scores_u_a1_not_a2_int[~mask] = -torch.inf
        order_u_a1_not_a2_int = torch.argsort(scores_u_a1_not_a2_int, dim=-1, descending=True)
        rank_u_a1_not_a2_int.append(torch.where(order_u_a1_not_a2_int == m)[0].item())
    print("Calculating metrics for user intersection attribute1 - intersection attribute2")
    hr_u_a1_not_a2_int, ndcg_u_a1_not_a2_int = get_hr_ndcg_at_k(rank_u_a1_not_a2_int)

    return {
            'hr_u_a1_not_a2_int': hr_u_a1_not_a2_int, 'ndcg_u_a1_not_a2_int': ndcg_u_a1_not_a2_int,
            }

def get_ranking_metrices_difference_condition(query_df,
                                    model,
                                    arg_dict,
                                    gt_matrix,
                                    predicted_a_m_matrix,
                                    apply_mask=True):
    keep_top = 300
    rank_u_a1_not_a2_int = []
    for user_id, movie_id, attribute_1, attribute_2 in tqdm(query_df.values):
        u = torch.tensor([user_id])
        m = torch.tensor([movie_id])
        a1 = torch.tensor([attribute_1 + n_users])
        a2 = torch.tensor([attribute_2 + n_users])

        if apply_mask:
            mask = torch.ones(model_vector_mg.n_items, dtype=torch.bool)
            gt_m = torch.where(gt_matrix[user_id, attribute_1, attribute_2] == 1)[0]
            mask[gt_m] = False
            mask[m] = True

        ## Score Set Operations
        if arg_dict['model'] == 'box_conditional':
            u_box = model.user_embeddings(u)
            a1_box = model.user_embeddings(a1)
            a2_box = model.user_embeddings(a2)
            u_int_a1_box = u_box.intersection(
                a1_box,
                intersection_temp=model.intersection_temp,
                bayesian=True)
            u_a1_a2_int_box = u_int_a1_box.intersection(
                a2_box,
                intersection_temp=model.intersection_temp,
                bayesian=True)
        
            u_a1_a2_scores = box_predict_conditional(u_a1_a2_int_box, model)
            u_a1_scores = box_predict_conditional(u_int_a1_box, model)
            scores_u_a1_not_a2_int = u_a1_scores + log1mexp(u_a1_a2_scores - u_a1_scores)
        elif arg_dict['model'] == 'mf_bias':
            u_vector = model.user_embeddings(u)
            a1_vector = model.user_embeddings(a1)
            a2_vector = model.user_embeddings(a2)
            u_a1_not_a2_vector = u_vector + a1_vector - a2_vector
            scores_u_a1_not_a2_int = vector_predict(u_a1_not_a2_vector, model)
        
        if apply_mask:
            scores_u_a1_not_a2_int[~mask] = -torch.inf
        order_u_a1_not_a2_int = torch.argsort(scores_u_a1_not_a2_int, dim=-1, descending=True)
        rank_u_a1_not_a2_int.append(torch.where(order_u_a1_not_a2_int == m)[0].item())
    print("Calculating metrics for user intersection attribute1 - intersection attribute2")
    hr_u_a1_not_a2_int, ndcg_u_a1_not_a2_int = get_hr_ndcg_at_k(rank_u_a1_not_a2_int)

    return {
            'hr_u_a1_not_a2_int': hr_u_a1_not_a2_int, 'ndcg_u_a1_not_a2_int': ndcg_u_a1_not_a2_int,
            }



# Get Results for Movie-Genre (MG) Joint Matrix

In [None]:
dataset_mg = get_dataset(arg_dict_box_mg)
n_users = dataset_mg.n_users
n_movies = dataset_mg.n_movies
n_attributes = dataset_mg.n_attributes
gt_user_movie_matrix_mg = gt_df_to_matrix(dataset_mg, dataset_mg.gt_user_movie)
gt_attribute_movie_matrix_mg = gt_df_to_matrix(dataset_mg, dataset_mg.gt_attribute_movie)
dataset_mg.read_neg_data_files()
print('Data loaded')
print('Number of users:', n_users)
print('Number of movies:', n_movies)
print('Number of attributes:', n_attributes)
user_attribute_query_mg = pd.read_csv('../' + arg_dict_box_mg['data_dir'] + '/combinations/user_attribute.csv')
user_attributeA_attributeB_mg = pd.read_csv('../' + arg_dict_box_mg['data_dir'] + '/combinations/user_attributeA_attributeB.csv')
user_attributeA_not_attributeB_mg = pd.read_csv('../' + arg_dict_box_mg['data_dir'] + '/combinations/user_attributeA_not_attributeB.csv')
print("Combination queries loaded")

In [None]:
print("Validation results for box model")
pprint(check_test_results(dataset_mg, model_box_mg))
print("Validation results for vector model")
pprint(check_test_results(dataset_mg, model_vector_mg))

### Trying out

In [None]:
import torch

In [None]:
def score_all_items_vector(user, model):
    all_items = torch.arange(model.n_items)
    item_vectors = model.item_embeddings(all_items)
    user_vector = model.user_embeddings(user)
    scores = torch.matmul(user_vector, item_vectors.T)
    user_bias = model.user_biases(user)
    item_biases = model.item_biases(all_items)
    scores += user_bias + item_biases.T + model.global_bias
    return scores

In [None]:
u = torch.tensor(user_attribute_query_mg['user_id'])
a = torch.tensor(user_attribute_query_mg['genre_id'] + n_users)
m = torch.tensor(user_attribute_query_mg['movie_id'])

u_embedding = model_vector_mg.user_embeddings(u)
a_embedding = model_vector_mg.user_embeddings(a)
all_item_embedding = model_vector_mg.item_embeddings(torch.arange(model_vector_mg.n_items))

score_u = score_all_items_vector(u, model_vector_mg)
score_a = score_all_items_vector(a, model_vector_mg)

#mask 



### Get GT

In [None]:
gt_u_and_a_matrix_mg = gt_user_movie_matrix_mg[:, None] & gt_attribute_movie_matrix_mg
gt_u_and_a1_a2_matrix_mg = gt_u_and_a_matrix_mg[:,:,None,:] & gt_attribute_movie_matrix_mg
gt_u_and_a1_not_a2_matrix_mg = gt_u_and_a_matrix_mg[:,:,None,:] & ~gt_attribute_movie_matrix_mg
# #shape
# gt_u_and_a_matrix_mg.shape, gt_u_and_a1_a2_matrix_mg.shape, gt_u_and_a1_not_a2_matrix_mg.shape

In [None]:
u = torch.where(gt_u_and_a_matrix_mg)[0]
a = torch.where(gt_u_and_a_matrix_mg)[1]
m = torch.where(gt_u_and_a_matrix_mg)[2]
for i in range(len(u)):
    assert gt_user_movie_matrix_mg[u[i], m[i]] == 1
    assert gt_attribute_movie_matrix_mg[a[i], m[i]] == 1


## Direct Filtering model

In [None]:
from torcheval.metrics.functional import binary_f1_score
def get_best_f1_theshold(input, target):
    max_f1 = 0
    best_threshold = 0
    for threshold in input:
        thresholded_score = (input >= threshold).float()
        f1 = binary_f1_score(thresholded_score, target)
        if f1 > max_f1:
            max_f1 = f1
            best_threshold = threshold
    return best_threshold, max_f1

In [None]:
train_user_movie_mg = dataset_mg.train_user_movie
train_user_movie_matrix_mg = gt_df_to_matrix(dataset_mg, train_user_movie_mg)
train_attribute_movie_mg = dataset_mg.train_attribute_movie
train_attribute_movie_matrix_mg = gt_df_to_matrix(dataset_mg, train_attribute_movie_mg)

In [None]:
def get_matrix_completion(model, train_matrix):
    predicted_matrix = []
    for i in tqdm(range(n_attributes)):
        attribute = torch.tensor([i + n_users])
        scores = model.predict_item(attribute)
        target = train_matrix[i]
        threshold, f1 = get_best_f1_theshold(scores, target)
        predicted_matrix.append(scores >= threshold)
    return torch.stack(predicted_matrix)

In [None]:
predicted_a_m_box_mg = get_matrix_completion(model_box_mg, train_attribute_movie_matrix_mg)
predicted_a_m_vector_mg = get_matrix_completion(model_vector_mg, train_attribute_movie_matrix_mg)
print(predicted_a_m_box_mg.sum() / train_attribute_movie_matrix_mg.sum(), " times more entries")
print(predicted_a_m_vector_mg.sum() / train_attribute_movie_matrix_mg.sum(), " times more entries")

## BOX U A - Genre

In [None]:
metric_dict_box_mg = get_ranking_metrices(user_attribute_query_mg,
                                          model_box_mg,
                                          arg_dict_box_mg,
                                          gt_u_and_a_matrix_mg,
                                          predicted_a_m_box_mg,
                                          apply_mask = True)

## Vector U A - Genre

In [None]:
metric_dict_vector_mg = get_ranking_metrices(user_attribute_query_mg,
                                             model_vector_mg,
                                             arg_dict_vector_mg,
                                             gt_u_and_a_matrix_mg,
                                             predicted_a_m_vector_mg,
                                             apply_mask=True
                                    )

## BOX U A1 A2 -Genre

In [None]:
metric_dict_box_mg_a1_a2 = get_ranking_metrices_intersection(user_attributeA_attributeB_mg,
                                                             model_box_mg,
                                                             arg_dict_box_mg,
                                                             gt_u_and_a1_a2_matrix_mg,
                                                                predicted_a_m_box_mg,
                                                             apply_mask=True)

## Vector U A1 A2 - Genre

In [None]:
metric_dict_vector_mg_a1_a2 = get_ranking_metrices_intersection(user_attributeA_attributeB_mg,
                                                                model_vector_mg,
                                                                arg_dict_vector_mg,
                                                             gt_u_and_a1_a2_matrix_mg,
                                                                predicted_a_m_box_mg,
                                                             apply_mask=True)

## BOX $U \cap A1 \cap \neg A2$ - Genre

In [None]:
metric_dict_box_mg_a1_not_a2 = get_ranking_metrices_difference(user_attributeA_not_attributeB_mg,
                                                               model_box_mg,
                                                               arg_dict_box_mg,
                                                               gt_u_and_a1_not_a2_matrix_mg,
                                                                predicted_a_m_box_mg,
                                                               apply_mask=True)

In [None]:
get_ranking_metrices_difference_no_condition(user_attributeA_not_attributeB_mg,
                                                               model_box_mg,
                                                               arg_dict_box_mg,
                                                               gt_u_and_a1_not_a2_matrix_mg,
                                                                predicted_a_m_box_mg,
                                                               apply_mask=True)

In [None]:
get_ranking_metrices_difference_condition(user_attributeA_not_attributeB_mg,
                                                               model_box_mg,
                                                               arg_dict_box_mg,
                                                               gt_u_and_a1_not_a2_matrix_mg,
                                                                predicted_a_m_box_mg,
                                                               apply_mask=True)

## Vector  $U \cap A_1 \cap \neg A_2$  -Genre

In [None]:
metric_dict_vector_mg_a1_not_a2 = get_ranking_metrices_difference(user_attributeA_not_attributeB_mg,
                                                                  model_vector_mg,
                                                                  arg_dict_vector_mg,
                                                                  gt_u_and_a1_not_a2_matrix_mg,
                                                                  predicted_a_m_box_mg,
                                                                  apply_mask=True)

### Analysis

In [None]:
user_attributeA_attributeB_mg.values[8990]

In [None]:
count = 0
count_opp = 0
for user_id, movie_id, attribute_1, attribute_2 in user_attributeA_attributeB_mg.values:
   
    u = torch.tensor([user_id])
    m = torch.tensor([movie_id])
    a1 = torch.tensor([attribute_1 + n_users])
    a2 = torch.tensor([attribute_2 + n_users])

    mask = torch.ones(model_vector_mg.n_items, dtype=torch.bool)
    gt_m = torch.where(gt_u_and_a1_a2_matrix_mg[user_id, attribute_1, attribute_2] == 1)[0]
    mask[gt_m] = False
    mask[m] = True

    movie_name = dataset_mg.movies[dataset_mg.movies['movie_id'] == movie_id]['movie'].item()
    attr_1_name = dataset_mg.attributes[dataset_mg.attributes['genre_id'] == attribute_1]['genre'].item()
    attr_2_name = dataset_mg.attributes[dataset_mg.attributes['genre_id'] == attribute_2]['genre'].item()
    # print(f"User: {user_id}, Movie: {movie_name}, Attribute 1: {attr_1_name}, Attribute 2: {attr_2_name}")

    score_u = model_box_mg.predict_item(u)
    score_a1 = model_box_mg.predict_item(a1)
    score_a2 = model_box_mg.predict_item(a2)

    user_movie_rank = torch.where(torch.argsort(score_u, dim=-1, descending=True) == m)[0].item()
    attr1_movie_rank = torch.where(torch.argsort(score_a1, dim=-1, descending=True) == m)[0].item()
    attr2_movie_rank = torch.where(torch.argsort(score_a2, dim=-1, descending=True) == m)[0].item()
    # print(f"Rank of movie in user: {user_movie_rank}")
    # print(f"Rank of movie in attribute 1: {attr1_movie_rank}")
    # print(f"Rank of movie in attribute 2: {attr2_movie_rank}")

    joint_score = score_u + score_a1 + score_a2
    joint_score[~mask] = -torch.inf

    joint_rank = torch.where(torch.argsort(joint_score, dim=-1, descending=True) == m)[0].item()
    # print(f"Rank of movie in user*attribute1*attribute2: {joint_rank}")

    u_box = model_box_mg.user_embeddings(u)
    a1_box = model_box_mg.user_embeddings(a1)
    a2_box = model_box_mg.user_embeddings(a2)
    u_int_a1_box = u_box.intersection(
        a1_box,
        intersection_temp=model_box_mg.intersection_temp,
        bayesian=True)
    u_int_a1_a2_box = u_int_a1_box.intersection(
        a2_box,
        intersection_temp=model_box_mg.intersection_temp,
        bayesian=True)

    int_scores = box_predict_conditional(u_int_a1_a2_box, model_box_mg)
    int_scores[~mask] = -torch.inf
    int_rank = torch.where(torch.argsort(int_scores, dim=-1, descending=True) == m)[0].item()
    # print(f"Rank of movie in user intersection attribute1 intersection attribute2: {int_rank}")
    if int_rank < joint_rank:
        count+=1
        if int_rank < 10 and joint_rank > 50:
            print("-------------------------------")
            print(f"User: {user_id}, movie_id: {movie_id}, Attr_1_id: {attribute_1}, attr_2_id {attribute_2}")
            print(f"Movie: {movie_name}, Attribute 1: {attr_1_name}, Attribute 2: {attr_2_name}")
            print(f"int_rank {int_rank}, joint_rank {joint_rank}")
    else:
        count_opp+=1     


In [None]:
attr_df = dataset_mg.attributes
movie_df = dataset_mg.movies
user_id = 15
movie_id = 58
attribute_1 = 1
attribute_2 = 13
u = torch.tensor([user_id])
m = torch.tensor([movie_id])
a1 = torch.tensor([attribute_1 + n_users])
a2 = torch.tensor([attribute_2 + n_users])

mask = torch.ones(model_vector_mg.n_items, dtype=torch.bool)
gt_m = torch.where(gt_u_and_a1_a2_matrix_mg[user_id, attribute_1, attribute_2] == 1)[0]
mask[gt_m] = False
mask[m] = True

movie_name = movie_df[movie_df['movie_id'] == movie_id]['movie'].item()
attr_1_name = attr_df[attr_df['genre_id'] == attribute_1]['genre'].item()
attr_2_name = attr_df[attr_df['genre_id'] == attribute_2]['genre'].item()
print(f"User: {user_id}, Movie: {movie_name}, Attribute 1: {attr_1_name}, Attribute 2: {attr_2_name}")

score_u = model_box_mg.predict_item(u)
score_a1 = model_box_mg.predict_item(a1)
score_a2 = model_box_mg.predict_item(a2)

user_movie_rank = torch.where(torch.argsort(score_u, dim=-1, descending=True) == m)[0].item()
attr1_movie_rank = torch.where(torch.argsort(score_a1, dim=-1, descending=True) == m)[0].item()
attr2_movie_rank = torch.where(torch.argsort(score_a2, dim=-1, descending=True) == m)[0].item()
# print(f"Rank of movie in user: {user_movie_rank}")
# print(f"Rank of movie in attribute 1: {attr1_movie_rank}")
# print(f"Rank of movie in attribute 2: {attr2_movie_rank}")

joint_score = score_u + score_a1 + score_a2
joint_score[~mask] = -torch.inf

joint_rank = torch.where(torch.argsort(joint_score, dim=-1, descending=True) == m)[0].item()
# print(f"Rank of movie in user*attribute1*attribute2: {joint_rank}")

u_box = model_box_mg.user_embeddings(u)
a1_box = model_box_mg.user_embeddings(a1)
a2_box = model_box_mg.user_embeddings(a2)
u_int_a1_box = u_box.intersection(
    a1_box,
    intersection_temp=model_box_mg.intersection_temp,
    bayesian=True)
u_int_a1_a2_box = u_int_a1_box.intersection(
    a2_box,
    intersection_temp=model_box_mg.intersection_temp,
    bayesian=True)

int_scores = box_predict_conditional(u_int_a1_a2_box, model_box_mg)
int_scores[~mask] = -torch.inf
int_rank = torch.where(torch.argsort(int_scores, dim=-1, descending=True) == m)[0].item()

top_10_int = torch.argsort(int_scores, dim=-1, descending=True)[:30]
top_10_joint = torch.argsort(joint_score, dim=-1, descending=True)[:30]

for i, id in enumerate(top_10_int):
    print(i, ' : ', movie_df[movie_df['movie_id'] == id.item()]['movie'].item(), ':', id)
print("----------------------------------------")
for i, id in enumerate(top_10_joint):
    print(i, ' : ', movie_df[movie_df['movie_id'] == id.item()]['movie'].item(), ':', id)


In [None]:
attr_df = dataset_mg.attributes
movie_df = dataset_mg.movies
user_id = 15
movie_id = 58
attribute_1 = 1
attribute_2 = 13
u = torch.tensor([user_id])
m = torch.tensor([movie_id])
a1 = torch.tensor([attribute_1 + n_users])
a2 = torch.tensor([attribute_2 + n_users])

mask = torch.ones(model_vector_mg.n_items, dtype=torch.bool)
gt_m = torch.where(gt_u_and_a1_a2_matrix_mg[user_id, attribute_1, attribute_2] == 1)[0]
mask[gt_m] = False
mask[m] = True

movie_name = movie_df[movie_df['movie_id'] == movie_id]['movie'].item()
attr_1_name = attr_df[attr_df['genre_id'] == attribute_1]['genre'].item()
attr_2_name = attr_df[attr_df['genre_id'] == attribute_2]['genre'].item()
print(f"User: {user_id}, Movie: {movie_name}, Attribute 1: {attr_1_name}, Attribute 2: {attr_2_name}")

score_u = model_box_mg.predict_item(u)
score_a1 = model_box_mg.predict_item(a1)
score_a2 = model_box_mg.predict_item(a2)
score_u[~mask] = -torch.inf
score_a1[~mask] = -torch.inf
score_a2[~mask] = -torch.inf
order_a1 = torch.argsort(score_a1, dim=-1, descending=True)
order_a2 = torch.argsort(score_a2, dim=-1, descending=True)

a1_m = torch.where(predicted_a_m_box_mg[attribute_1])[0].tolist()
a2_m = torch.where(predicted_a_m_box_mg[attribute_2])[0].tolist()
a1_a2_m = set(a1_m).intersection(set(a2_m))

score_u[~mask] = -torch.inf
order_u = torch.argsort(score_u, dim=-1, descending=True)
a1_a2_m = torch.tensor(list(a1_a2_m))
a1_a2_in_u = (order_u[:, None] == a1_a2_m).nonzero()[:, 0]
order_a1_a2_in_u = order_u[a1_a2_in_u]
rank = torch.where(order_a1_a2_in_u == m)
user_rank = torch.where(order_u == m)
print(user_rank)
print(rank)

In [None]:
set(order_u.tolist()).intersection(set(order_a1.tolist()[:1000])).intersection(set(order_a2.tolist()[:300]))

In [None]:
rank_fixed_100 =[]
rank_fixed_200 = []
rank_fixed_500 = []
rank_fixed_1000 = []
rank_u_and_a = []
rank_only_a = []
for user_id, attribute_id, movie_id in tqdm(user_attribute_query_mg.values):
    u = torch.tensor([user_id])
    a = torch.tensor([attribute_id + n_users])
    m = torch.tensor([movie_id])

    mask = torch.ones(model_vector_mg.n_items, dtype=torch.bool)
    gt_m = torch.where(gt_u_and_a_matrix_mg[user_id, attribute_id] == 1)[0]
    mask[gt_m] = False
    mask[m] = True

    score_u = model_box_mg.predict_item(u)
    score_a = model_box_mg.predict_item(a)
    score_u[~mask] = -torch.inf
    score_a[~mask] = -torch.inf
    order_m = torch.argsort(score_a, dim=-1, descending=True)
    top_100_a_m = order_m[:100]
    top_200_a_m = order_m[:200]
    top_500_a_m = order_m[:500]
    top_1000_a_m = order_m[:1000]
    a_m = torch.where(predicted_a_m_box_mg[attribute_id])[0]
    break
#     if m not in 
#     if m not in a_m:
#         rank_a_and_u.append(model.n_items)
#     else:
#         if apply_mask:
#             score_u[~mask] = -torch.inf
#         order_u = torch.argsort(score_u, dim=-1, descending=True)
#         a_in_u = (order_u[:, None] == a_m).nonzero()[:, 0]
#         order_a_in_u = order_u[a_in_u]
#         rank_a_and_u.append(torch.where(order_a_in_u == m)[0].item())

#     # Get ranks
#     rank_u_mult_a.append(torch.where(order_u_mult_a == m)[0].item())
#     rank_u_int_a.append(torch.where(order_u_int_a == m)[0].item())

# print("Calculating metrics for user*attribute")
# hr_u_mult_a, ndcg_u_mult_a = get_hr_ndcg_at_k(rank_u_mult_a)
# print("Calculating metrics for user intersection attribute")
# hr_u_int_a, ndcg_u_int_a = get_hr_ndcg_at_k(rank_u_int_a)
# print("Calculating metrics for attribute and user threshold")
# hr_a_and_u, ndcg_a_and_u = get_hr_ndcg_at_k(rank_a_and_u)


In [None]:
rank_u_a1_a2_and = []
rank_u_a1_a2_mult = []
rank_u_a1_a2_int = []

for user_id, movie_id, attribute_1, attribute_2 in tqdm(user_attributeA_attributeB_mg.values):
    u = torch.tensor([user_id])
    m = torch.tensor([movie_id])
    a1 = torch.tensor([attribute_1 + n_users])
    a2 = torch.tensor([attribute_2 + n_users])


    mask = torch.ones(model_vector_mg.n_items, dtype=torch.bool)
    gt_m = torch.where(gt_u_and_a1_a2_matrix_mg[user_id, attribute_1, attribute_2] == 1)[0]
    mask[gt_m] = False
    mask[m] = True

    score_u = model_box_mg.predict_item(u)
    score_a1 = model_box_mg.predict_item(a1)
    score_a2 = model_box_mg.predict_item(a2)

    ## Score multiplication
    if arg_dict_box_mg['model'] == 'box_conditional':
        score_u_a1_a2_mult = score_u + score_a1 + score_a2
    elif arg_dict_box_mg['model'] == 'mf_bias':
        score_u_a1_a2_mult = F.logsigmoid(score_u) + F.logsigmoid(score_a1) + F.logsigmoid(score_a2)
    

    score_u_a1_a2_mult[~mask] = -torch.inf
    order_u_a1_a2_mult = torch.argsort(score_u_a1_a2_mult, dim=-1, descending=True)

    ## Score intersection
    if arg_dict_box_mg['model'] == 'box_conditional':
        u_box = model_box_mg.user_embeddings(u)
        a1_box = model_box_mg.user_embeddings(a1)
        a2_box = model_box_mg.user_embeddings(a2)
        u_int_a1_box = u_box.intersection(
            a1_box,
            intersection_temp=arg_dict_box_mg['intersection_temp'],
            bayesian=True)
        u_a1_a2_int_box = u_int_a1_box.intersection(
            a2_box,
            intersection_temp=arg_dict_box_mg['intersection_temp'],
            bayesian=True)
        u_a1_a2_scores = box_predict_conditional(u_a1_a2_int_box, model_box_mg)
    elif arg_dict_box_mg['model'] == 'mf_bias':
        u_vector = model_vector_mg.user_embeddings(u)
        a1_vector = model_vector_mg.user_embeddings(a1)
        a2_vector = model_vector_mg.user_embeddings(a2)
        u_a1_a2_vector = u_vector + a1_vector + a2_vector
        u_a1_a2_scores = vector_predict(u_a1_a2_vector, model_vector_mg)
    

    u_a1_a2_scores[~mask] = -torch.inf
    order_u_a1_a2_int = torch.argsort(u_a1_a2_scores, dim=-1, descending=True)
            ## get ranks
    rank_u_a1_a2_mult.append(torch.where(order_u_a1_a2_mult == m)[0].item())
    rank_u_a1_a2_int.append(torch.where(order_u_a1_a2_int == m)[0].item())
    ## Matrix completion
    a1_m = torch.where(predicted_a_m_box_mg[attribute_1])[0].tolist()
    a2_m = torch.where(predicted_a_m_box_mg[attribute_2])[0].tolist()
    a1_a2_m = set(a1_m).intersection(set(a2_m))
    if m.item() not in a1_a2_m:
        rank_u_a1_a2_and.append(model_box_mg.n_items)
    else:
        score_u[~mask] = -torch.inf
        order_u = torch.argsort(score_u, dim=-1, descending=True)
        a1_a2_m = torch.tensor(list(a1_a2_m))
        a1_a2_in_u = (order_u[:, None] == a1_a2_m).nonzero()[:, 0]
        order_a1_a2_in_u = order_u[a1_a2_in_u]
        rank_u_a1_a2_and.append(torch.where(order_a1_a2_in_u == m)[0].item())
        break

In [None]:
m.item() not in a1_a2_m

In [None]:
torch.where(order_u == 1663)

## Plots

### utils 

In [None]:
import seaborn as sns

def plot_hits(hr_at_10, hr_at_20, hr_at_50):

    # we have the following models and HR scores
    models = ['MC(vec)', 'Vector Alg', 'Vector Geo', 'MC(box)', 'Box Alg', 'Box Geo']

    # Create a DataFrame for HR@10
    df_10 = pd.DataFrame({'Models': models, 'HR': hr_at_10, 'hr@': ['10']*len(models)})

    # Create a DataFrame for HR@20
    df_20 = pd.DataFrame({'Models': models, 'HR': hr_at_20, 'hr@': ['20']*len(models)})

    # Create a DataFrame for HR@50
    df_50 = pd.DataFrame({'Models': models, 'HR': hr_at_50, 'hr@': ['50']*len(models)})

    # Concatenate the DataFrames
    df = pd.concat([df_10, df_20, df_50])

    # Create a bar plot
    sns.catplot(x='Models', y='HR', hue='Models', legend=False, col='hr@', data=df, kind='bar', palette='viridis')

In [None]:
def append_metrices(list10, list20, list50, metric_dict, key):
    list10.append(metric_dict[key][0])
    list20.append(metric_dict[key][1])
    list50.append(metric_dict[key][2])
    return list10, list20, list50

### $ U \cap A$

In [None]:
hr_at_10 = []
hr_at_20 = []
hr_at_30 = []
hr_at_10,hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_vector_mg, 'hr_a_and_u')
hr_at_10, hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_vector_mg, 'hr_u_mult_a')
hr_at_10,hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_vector_mg, 'hr_u_int_a')
hr_at_10,hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_box_mg, 'hr_a_and_u')
hr_at_10,hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_box_mg, 'hr_u_mult_a')
hr_at_10,hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_box_mg, 'hr_u_int_a')
plot_hits(hr_at_10, hr_at_20, hr_at_30)

### $ U \cap A_1 \cap A_2 $

In [None]:
hr_at_10 = []
hr_at_20 = []
hr_at_30 = []
hr_at_10,hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_vector_mg_a1_a2, 'hr_u_a1_a2_and')
hr_at_10, hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_vector_mg_a1_a2, 'hr_u_a1_a2_mult')
hr_at_10,hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_vector_mg_a1_a2, 'hr_u_a1_a2_int')
hr_at_10,hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_box_mg_a1_a2, 'hr_u_a1_a2_and')
hr_at_10,hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_box_mg_a1_a2, 'hr_u_a1_a2_mult')
hr_at_10,hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_box_mg_a1_a2, 'hr_u_a1_a2_int')
plot_hits(hr_at_10, hr_at_20, hr_at_30)

### $ U \cap A_1 \cap \neg A_2$

In [None]:
hr_at_10 = []
hr_at_20 = []
hr_at_30 = []
hr_at_10,hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_vector_mg_a1_not_a2, 'hr_u_a1_not_a2_and')
hr_at_10, hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_vector_mg_a1_not_a2, 'hr_u_a1_not_a2_mult')
hr_at_10,hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_vector_mg_a1_not_a2, 'hr_u_a1_not_a2_int')
hr_at_10,hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_box_mg_a1_not_a2, 'hr_u_a1_not_a2_and')
hr_at_10,hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_box_mg_a1_not_a2, 'hr_u_a1_not_a2_mult')
hr_at_10,hr_at_20, hr_at_30 = append_metrices(hr_at_10, hr_at_20, hr_at_30, metric_dict_box_mg_a1_not_a2, 'hr_u_a1_not_a2_int')
plot_hits(hr_at_10, hr_at_20, hr_at_30)

### Analysis

# Get Results for Movie-Attribute (MA) Joint Matrix

In [None]:
dataset_ma = get_dataset(arg_dict_box_ma)
n_users = dataset_ma.n_users
n_movies = dataset_ma.n_movies
n_attributes = dataset_ma.n_attributes
gt_user_movie_matrix = gt_df_to_matrix(dataset_ma.gt_user_movie, dataset_ma)
gt_attribute_movie_matrix = gt_df_to_matrix(dataset_ma.gt_attribute_movie, dataset_ma)
dataset_ma.read_neg_data_files()
print('Data loaded')
print('Number of users:', n_users)
print('Number of movies:', n_movies)
print('Number of attributes:', n_attributes)
user_attribute_query_ma = pd.read_csv('../' + arg_dict_box_ma['data_dir'] + '/combinations/user_attribute.csv')
user_attributeA_attributeB_ma = pd.read_csv('../' + arg_dict_box_ma['data_dir'] + '/combinations/user_attributeA_attributeB.csv')
user_attributeA_not_attributeB_ma = pd.read_csv('../' + arg_dict_box_ma['data_dir'] + '/combinations/user_attributeA_not_attributeB.csv')
print("Combination queries loaded")

In [None]:
print("Validation results for box model")
pprint(check_test_results(dataset_ma, model_box_ma))
print("Validation results for vector model")
pprint(check_test_results(dataset_ma, model_vector_ma))

## Box $U \cap A$ - Attribute

In [None]:
metric_dict_box_ma = get_ranking_metrices(user_attribute_query_ma, model_box_ma, arg_dict_box_ma)

## Vector $U \cap A$ - Attribute

In [None]:
metric_dict_vector_ma = get_ranking_metrices(user_attribute_query_ma, model_vector_ma, arg_dict_vector_ma)

## Box $U \cap A_1 \cap A_2$ - Attribute

In [None]:
metric_dict_box_ma_a1_a2 = get_ranking_metrices_intersection(user_attributeA_attributeB_ma, model_box_ma, arg_dict_box_ma)

## Vector $U \cap A_1 \cap A_2$ - Attribute

In [None]:
metric_dict_vector_ma_a1_a2 = get_ranking_metrices_intersection(user_attributeA_attributeB_ma, model_vector_ma, arg_dict_vector_ma)

## Box $U \cap A_1 \cap \neg A_2$ - Attribute

In [None]:
metric_dict_box_ma_a1_not_a2 = get_ranking_metrices_difference(user_attributeA_not_attributeB_ma, model_box_ma, arg_dict_box_ma)

## Vector $U \cap A_1 \cap \neg A_2$ - Attribute

In [None]:
metric_dict_vector_ma_a1_not_a2 = get_ranking_metrices_difference(user_attributeA_not_attributeB_ma, model_vector_ma, arg_dict_vector_ma)

In [None]:

keep_top = 200
rank_u = []
rank_a = []
rank_u_mult_a = []
rank_u_int_a = []
rank_a_in_u = []
rank_a_and_u = []
for user_id, attribute_id, movie_id in tqdm(user_attribute_query_mg.values):
    u = torch.tensor([user_id])
    a = torch.tensor([attribute_id + n_users])
    m = torch.tensor([movie_id])

    mask = torch.ones(model_vector_mg.n_items, dtype=torch.bool)
    gt_m = torch.where(gt_u_and_a_matrix_mg[user_id, attribute_id] == 1)[0]
    mask[gt_m] = False
    mask[m] = True

    score_u = model_vector_mg.predict_item(u)
    score_a = model_vector_mg.predict_item(a)

    ## Score multiplication
    if arg_dict_vector_mg['model'] == 'box_conditional':
        score_u_mult_a = score_u + score_a
    elif arg_dict_vector_mg['model'] == 'mf_bias':
        score_u_mult_a = F.logsigmoid(score_u) + F.logsigmoid(score_a)
    
    score_u_mult_a[~mask] = -torch.inf
    order_u_mult_a = torch.argsort(score_u_mult_a, dim=-1, descending=True)

    ## Score intersection
    if arg_dict_vector_mg['model'] == 'box_conditional':
        u_box = model_vector_mg.user_embeddings(u)
        a_box = model_vector_mg.user_embeddings(a)
        u_int_a_box = u_box.intersection(a_box)
        u_int_a_box = u_box.intersection(
                    a_box,
                    intersection_temp=model_vector_mg.intersection_temp,
                    bayesian=True)
        u_int_a_scores = box_predict_conditional(u_int_a_box, model_vector_mg)
    elif arg_dict_vector_mg['model'] == 'mf_bias':
        u_vector = model_vector_mg.user_embeddings(u)
        a_vector = model_vector_mg.user_embeddings(a)
        u_int_a_vector = u_vector + a_vector
        u_int_a_scores = vector_predict(u_int_a_vector, model_vector_mg)
    
    u_int_a_scores[~mask] = -torch.inf
    order_u_int_a = torch.argsort(u_int_a_scores, dim=-1, descending=True)

    ## Get ranks
    rank_u_mult_a.append(torch.where(order_u_mult_a == m)[0].item())
    rank_u_int_a.append(torch.where(order_u_int_a == m)[0].item())

print("Calculating metrics for user*attribute")
hr_u_mult_a, ndcg_u_mult_a = get_hr_ndcg_at_k(rank_u_mult_a)
print("Calculating metrics for user intersection attribute")
hr_u_int_a, ndcg_u_int_a = get_hr_ndcg_at_k(rank_u_int_a)
