In [None]:
!pip install lenskit
from scipy.spatial import distance
import random
import pandas as pd

In [None]:
user_df = pd.read_csv('Preprocessed_Data/user_tag_matrix_improved.csv')
user_df = user_df.rename(columns = {"Unnamed: 0":"userID"})
user_df.head()
user_scores = user_df.drop("userID", axis = 1)
user_scores.head()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming you have a DataFrame called 'user_scores'
# If not, you can replace this with your actual DataFrame

# Step 1: Calculate the mean of each row
row_mean = user_scores.mean(axis=1)

# Step 2: Create a violin plot
plt.figure(figsize=(10, 6))  # Optional: Set the figure size

# Create the violin plot using Seaborn
sns.violinplot(row_mean, inner='quartile')

# Add labels and title
plt.xlabel('Mean Scores')
plt.ylabel('Frequency')
plt.title('Violin Plot of the average user-tag vector value')

# Show the plot
plt.show()

In [None]:
display(row_mean.max())

In [None]:
similarity_matrix = pd.read_csv("Preprocessed_Data/sim_matrix.csv")

In [None]:
del similarity_matrix[similarity_matrix.columns[0]]
similarity_matrix["mean"] = similarity_matrix.mean(axis=1)
similarity_matrix.head()

In [None]:
import seaborn as sns
sns.violinplot(x=similarity_matrix["mean"])
plt.xlabel("Mean")
plt.title("Violin Plot of 'mean' Column")
plt.show()

There are some outliers that are very dissimilar from the rest of the group. Generally, most users on average are very similar to every other user. They tend to average around 90% accuracy. Some users are very dissimilar from the rest of the group though.

In [None]:
similarity_matrix["min"] = similarity_matrix.min(axis=1)
similarity_matrix.head()

In [None]:
import seaborn as sns
sns.violinplot(x=similarity_matrix["min"])
plt.xlabel("Min")
plt.title("Violin Plot of 'min' Column")
plt.show()

In [None]:
item_df = pd.read_csv('Preprocessed_Data/post_processed_movie_tag.csv')
item_df.head()

In [None]:
def generate_candidate(user_df):
    #Find a random user
    userIndex = random.randint(1,len(user_df))
    user = user_df.loc[userIndex-1]
    return user

In [None]:
import numpy as np
from numpy import dot
from numpy.linalg import norm

#Lesot et al. Normalized euclidean distance. m = minimum rating, M = maximum rating
def sim(vector1, vector2):
    """

    Calculates the normalized cosine similarity between two users.
    :vector 1: user to compare.
    :vector 2: user 2  to compare.
    """
    vector1 = vector1.drop("userID")
    vector2 = vector2.drop("userID")
    similarity = dot(vector1, vector2) / (norm(vector1) * norm(vector2))
    return similarity

In [None]:
import time
def check_candidate_uniform(u_new, U, threshold):
    """
    checks if a submitted candidate is uniform with the submitted set in accordance with the threshold

    :param u_new: the candidate user to the group
    :param U: the group that already exists
    :param threshold: the threshold beyond which two users are too dissimnilar for the group to be uniform
    """
    good_candidate = True

    for u_k in U:
        similarity = sim(u_new, u_k)  # Calculate similarity with each vector in U
        if similarity < threshold:
            good_candidate = False
            break

    return good_candidate


def generate_uniform_group(size,threshold,user_df):
    """
    generates a uniform group from the data

    :param size: the amount of people in the group
    :param threshold: the threshold beyond which two users are too dissimilar for the group to be uniform
    """
    U = []
    timeout = time.time() + 2
    while len(U) < size:
        u_new = generate_candidate(user_df)
        if check_candidate_uniform(u_new, U, threshold):
            U.append(u_new)
        if(time.time()>timeout):
            timeout = time.time() +2
            U=[]
    return U

In [None]:
def check_candidate_divergent(u_new, U, threshold):
    """
    checks if a submitted candidate is divergent with the submitted set in accordance with the threshold

    :param u_new: the candidate user to the group
    :param U: the group that already exists
    :param threshold: the threshold below which two candidates are too similar for the group to  be  divergent
    """
    good_candidate = True

    for u_k in U:
        similarity = sim(u_new, u_k)  # Calculate similarity with each vector in U
        if similarity > threshold:
            good_candidate = False
            break

    return good_candidate



def generate_divergent_group(size,threshold,user_df):
    """
    greedily generates a divergent group from the data

    :param size: the amount of people in the group
    :param threshold: the threshold beyond which two users are too divergent for the group to be uniform
    """


    U = []
    timeout = time.time() + 4
    while len(U) < size:
        u_new = generate_candidate(user_df)

        if check_candidate_divergent(u_new, U, threshold):
            #if a candidate is not uniform with
            U.append(u_new)
        if(time.time()>timeout):
            U=[]
            timeout = time.time() + 4
    return U

In [None]:

def generate_minority_group(size, threshold1, threshold2,userdf):
    """
    generates a uniform group from the data

    :param size: the amount of people in the group
    :param threshold1: the threshold beyond which two users are too dissimilar for the group to be uniform
    :param threshold2: the threshold below which a two users are too similar for the group to be divergent
    """
    U = []
    while len(U) < size - 1:
        u_new = generate_candidate(userdf)
        if check_candidate_uniform(u_new, U, threshold1):
            U.append(u_new)
    while len(U) < size:
        u_new = generate_candidate(userdf)
        if check_candidate_divergent(u_new,U,threshold2):
            U.append(u_new)

    return U

In [None]:
def generate_coalitional_group(size, threshold1,threshold2, userdf):
    """
    generates a coalitional group from the data

    :param size: the amount of people in the group
    :param threshold1: the threshold beyond which two users are too dissimilar for the group to be uniform
    :param threshold2: the threshold below which a two users are too similar for the group to be divergent
    """
    G1 = []
    G2 = []
    while len(G1) < size // 2:
        u_new = generate_candidate(userdf)
        if check_candidate_uniform(u_new, G1, threshold1):
            G1.append(u_new)
    while len(G2) < size - len(G1):
        u_new = generate_candidate(userdf)
        if check_candidate_uniform(u_new,G2,threshold1) and check_candidate_divergent(u_new,G1,threshold2):
            G2.append(u_new)
    U = G1 + G2
    return U

In [None]:
def check_group_uniformity(group, threshold):
    """
    Checks if a group is uniform according to the specified threshold.

    :param group: List of users in the group.
    :param threshold: The threshold beyond which two users are too dissimilar for the group to be uniform.
    :return: True if the group is uniform, False otherwise.
    """
    for i in range(len(group)):
        for j in range(i + 1, len(group)):
            similarity = sim(group[i], group[j])  # Calculate similarity between user i and user j
            if similarity < threshold:
                return False  # If any pair of users is too dissimilar, the group is not uniform
    return True  # If all pairs are sufficiently similar, the group is uniform

def check_group_divergence(group, threshold):
    """
    Checks if a group is divergent according to the specified threshold.

    :param group: List of users in the group.
    :param threshold: The threshold above which two users are too similar for the group to be divergent.
    :return: True if the group is divergent, False otherwise.
    """
    for i in range(len(group)):
        for j in range(i + 1, len(group)):
            similarity = sim(group[i], group[j])  # Calculate similarity between user i and user j
            if similarity > threshold:
                return False  # If any pair of users is too similar, the group is divergent
    return True  # If no pair is too similar, the group is not divergent

def check_group_coalitional(group, threshold):
  """
    Checks if a group is coalitional according to the specified threshold.

    :param group: List of users in the group.
    :param threshold: The threshold above which two users are too similar for the group to be divergent.
    :return: True if the group is divergent, False otherwise.
    """
  G1 = []
  G2 = []
  #create two groups that are uniform within themselves.
  for i in range(len(group)):
    if (check_candidate_uniform(group[i], G1, threshold)):
      G1.append(group[i])
    elif (check_candidate_uniform(group[i],G2, threshold)):
      G2.append(group[i])
    else:
      #if there is an element that fits in neither one group or the other, return false
      return False
 #check that there are no elements that are uniform with one another across groups.
  for user1 in G1:
        for user2 in G2:
            similarity = sim(user1, user2)
            if similarity > threshold:
              return False
  #check that the two groups have a maximum size difference of 1.
  if abs(len(G1)-len(G2))>1:
    return False
  return True

findings from the paper "Evaluating explainable social choice-based aggregation strategies for group recommendation (2023)":
- Most Pleasure is preferable for Uniform groups but bad for minority groups
- Fairness is strong for uniform or coalitional
- Additive Utilitarian is good for when there is no clear group identity, it performs among the best on divergent and minority.

Based on these results we create an adaptative group recommendation aggregator. It analyses the group configuration and uses Most Pleasure for uniform groups, Fairness for coalitional groups and Additive Utilitarian for all other groups.


In [None]:
%run 'ind_rec_pipeline.ipynb'

In [None]:
def most_pleasure(G,size = 10):
  allrecs = pd.DataFrame()
  for user in G:
    userrec = individual_recommender_pipeline(user["userID"])
    allrecs = pd.concat([allrecs, userrec]) #gather all user recommendations into one big list

  allrecs = allrecs.sort_values(by="new_score", ascending=False) #sort it by new score
  allrecs = allrecs.drop_duplicates(subset="item", keep="first") #if there are duplicate elements, drop them
  allrecs = allrecs.iloc[:size]
  return allrecs

In [None]:
import random
def fairness(G, size = 10):
  group_preferences = [] #list that will contain the recommendations for each user
  for user in G:
    userrec = individual_recommender_pipeline(user["userID"]).sort_values(by="new_score", ascending=False)
    group_preferences.append(userrec)
  random.shuffle(group_preferences) #create a random order for the recommendations
  final_recommendation = []
  k=0 #keep track of whose turn it is by how many iterations have happened
  selected_items = set() #keep track of the items that have already been selected
  while(len(final_recommendation) < size):
    turn = k % len(group_preferences) #the user whose turn it is to select an item
    user_prefs = group_preferences[turn]
    user_prefs = user_prefs[~user_prefs['item'].isin(selected_items)]
    if not user_prefs.empty:
            selected_item = user_prefs.iloc[0]
            final_recommendation.append(selected_item)
            selected_items.add(selected_item['item'])
    k = k+1

  return pd.DataFrame(final_recommendation)

In [None]:
def additive(G,size = 10):
  prefs = []
  for user in G:
    userrec = individual_recommender_pipeline(user["userID"])
    userrec = userrec.drop("score",axis = 1)
    userrec = userrec.rename(columns = {"new_score":user["userID"]})
    prefs.append(userrec)
  prefs = pd.concat(prefs)

  for index, row in prefs.iterrows():
        for col in prefs.columns:
          if pd.isna(row[col]):
             user_id = int(col)
             item_id = row['item']
             score = mathias_algo(user_id, item_id)
             prefs.at[index,col] = score[item_id]

  sum = prefs.iloc[:,2:].sum(axis=1)
  prefs["sum_scores"] = sum
  prefs = prefs.sort_values(by="sum_scores", ascending=False)
  prefs = prefs.drop_duplicates()
  prefs = prefs[['item', 'movie_title', 'sum_scores']]
  prefs = prefs.iloc[:size]
  return prefs

In [None]:
def group_recommendation_pipeline(G, recommendation_size = 10, threshold1 = 0.8):
    """
        G: a group. A list of user tag_vectors.
        recommendation_size: how many items to recommend
    """
    if check_group_coalitional(G,threshold1):
        rec = fairness(G,recommendation_size)
        method = "fairness"
    elif check_group_uniformity(G,threshold1):
        rec = most_pleasure(G,recommendation_size)
        method = "most pleasure"
    else:
        rec = additive(G,recommendation_size)
        method = "additive utilitarian"
    # rec = rec[["movie_title"]]
    return rec, method

## Group explanations

In [None]:
def explain_fairness(rec_item):
  # Individuals take turns to receive their preferred items. The idea behind this strategy is that it is not so bad
  # to watch something you hate, as long as you get to watch the things you really love as well.
  return "Recommeded Item: [" + rec_item + "]. This recommendation prioritizes a member of our group who had been less favored in past decisions. Everyone will have their turn for personalized recommendations."

def explain_most_pleasure(rec_item):
  # Make a new list of ratings with the maximum of the individual user ratings per item. Items get selected based
  # on their rating on that list, the higher the rating the earlier the item appears in the sequence.
  return "Recommeded Item: [" + rec_item + "]. This was chosen because it's an option that all group members are likely to enjoy. While it might be the most preferred choice of User B, it also ensures that User A and others don't strongly dislike it."

def explain_additive_utilitarian(rec_item):
  # Maximize the overall satisfaction or utility of the group.
  # Recommend items that maximize the collective welfare of the entire group. Which means that items that bring
  # the most satisfaction to the group as a whole are prioritized.
  return "Recommeded Item: [" + rec_item + "]. This recommendation was made because it is broadly appreciated by our group. It's chosen to maximize the combined satisfaction and enjoyment of all members."


def group_explanation(method, rec_item):
  if method == "fairness":
    return explain_fairness(rec_item)
  elif method == "most pleasure":
    return explain_most_pleasure(rec_item)
  elif method == "additive utilitarian":
    return explain_additive_utilitarian(rec_item)

In [None]:
def group_predict_and_explain(group):
  # determine the method used
  rec_item, method = group_recommendation_pipeline(group)
  rec_item = rec_item.iloc[0, rec_item.columns.get_loc('movie_title')]

  # print(rec_item)
  # print(method)

  #get explanation
  explanation = group_explanation(method, rec_item)
  return rec_item, explanation