<a href="https://colab.research.google.com/github/menicacci/fairness-group-recommendations/blob/main/Group_Recommendations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import random
import statistics

import sys
sys.path.append('scripts')
import recommender

Datasets: https://grouplens.org/datasets/movielens/

In [2]:
# Dataset loading
data_dir = 'ml-latest-small'

df_movies = pd.read_csv(f"{data_dir}/movies.csv")
df_ratings = pd.read_csv(f"{data_dir}/ratings.csv")

'''
  Create a pandas df with a column for each value of the df[column_name] column (in this case reppresenting the movieId).
  In this example, each row reppresents the ratings given by a user to the specific movie (NaN means rating unknown).
'''
df_user_based_cf = df_ratings.groupby('userId').apply(lambda x: x.set_index('movieId')['rating']).unstack(fill_value=np.nan)

In [3]:
def find_nan_columns_in_group(df, group):
  group_df = df.iloc[group]
  nan_mask = group_df.isna()

  nan_counts = nan_mask.sum(axis=0)

  return nan_counts[nan_counts == len(group)].index.tolist()


def random_distinct_indexes(df, num):
  distinct_indexes = df.index.tolist()
  random.shuffle(distinct_indexes)

  return distinct_indexes[:num]

In [4]:
def average_aggregation(prediction_scores):
  return statistics.mean(prediction_scores)


def least_misery(prediction_scores):
  return min(prediction_scores)


def get_combined_predictions(group_predictions):
  first_user_predictions = group_predictions[0]
  combined_predictions = [[prediction[1], []] for prediction in first_user_predictions]

  for user_predictions in group_predictions:
    for i in range(len(user_predictions)):
      combined_predictions[i][1].append(user_predictions[i][0])

  return combined_predictions


def get_group_recommendation(df, group, neighbourhood_size=50, predictions_size=10, group_score_function=average_aggregation, score_function=recommender.pearson_correlation):
  group_neighbours = [recommender.get_neighborhood(df, target_user, neighbourhood_size, score_function) for target_user in group]

  nan_columns = find_nan_columns_in_group(df, group)
  group_predictions = [recommender.get_items_predictions_based_on_similarity(df, target_user, similar_users, nan_columns)
                       for target_user, similar_users in zip(group, group_neighbours)
                      ]

  combined_predictions = get_combined_predictions(group_predictions)
  for combined_prediction in combined_predictions:
    combined_prediction.insert(0, group_score_function(combined_prediction[1]))

  return recommender.get_top_k_predictions(combined_predictions, predictions_size)


def get_group_recommendation_with_disagreements(df, group, neighbourhood_size=50, predictions_size=10, group_predictions_size=300, group_score_function=average_aggregation):
  predictions = get_group_recommendation(df, group, neighbourhood_size, group_predictions_size, group_score_function=group_score_function)

  group_size = len(group)
  for prediction in predictions:
    predicted_score = prediction[0]
    users_predictions = prediction[2]
    st_dev = statistics.stdev(prediction[2])

    prediction[0] = ((1 / st_dev) ** (1 / group_size)) * predicted_score if st_dev > 0 else 0

  sorted_predictions = sorted(predictions, key=lambda x: x[0], reverse=True)
  return sorted_predictions[:predictions_size]

In [5]:
users = random_distinct_indexes(df_user_based_cf, 3)

standard_predictions = get_group_recommendation(df_user_based_cf, users, group_score_function=average_aggregation)
predictions_with_disagrements = get_group_recommendation_with_disagreements(df_user_based_cf, users)

In [6]:
def float_approx(num):
  return "{:.2f}".format(num)

def float_list_approx(nums):
  approximations = [float_approx(num) for num in nums]
  return ', '.join(approximations)

def print_group_predictions(group_predictions):
  for prediction in group_predictions:
    print(f"Movie ID: {prediction[1]}\tScore: [{float_approx(prediction[0])}]\t Users' Predictions: [{float_list_approx(prediction[2])}]")

print("Prediction without considering disagreements")
print_group_predictions(standard_predictions)

print("\nPrediction considering disagreements")
print_group_predictions(predictions_with_disagrements)

Prediction without considering disagreements
Movie ID: 1233	Score: [4.64]	 Users' Predictions: [4.63, 5.13, 4.16]
Movie ID: 1208	Score: [4.57]	 Users' Predictions: [4.89, 4.78, 4.02]
Movie ID: 58	Score: [4.50]	 Users' Predictions: [4.81, 4.84, 3.86]
Movie ID: 3972	Score: [4.50]	 Users' Predictions: [3.53, 5.18, 4.79]
Movie ID: 1374	Score: [4.50]	 Users' Predictions: [4.05, 5.19, 4.25]
Movie ID: 1912	Score: [4.49]	 Users' Predictions: [4.51, 5.23, 3.74]
Movie ID: 2716	Score: [4.48]	 Users' Predictions: [4.27, 6.06, 3.11]
Movie ID: 1250	Score: [4.45]	 Users' Predictions: [3.55, 5.29, 4.51]
Movie ID: 3469	Score: [4.43]	 Users' Predictions: [3.82, 5.75, 3.73]
Movie ID: 1673	Score: [4.43]	 Users' Predictions: [3.79, 5.19, 4.32]

Prediction considering disagreements
Movie ID: 261	Score: [12.19]	 Users' Predictions: [4.23, 4.27, 4.19]
Movie ID: 1957	Score: [8.77]	 Users' Predictions: [4.37, 4.20, 4.16]
Movie ID: 1608	Score: [8.69]	 Users' Predictions: [3.73, 3.81, 3.65]
Movie ID: 3052	Score: 