# Group Movie Recommendation System

Here we are using the same MovieLens 100K data. By using the collaborative filtering approach, we get predicted movie ratings for all the movies for each group members. A prompt will ask for the inputs for the group members. Simply input the userIds (with comma seperation). First, we are giving movies recommendation (top 20) for each members of the group. Then we used two well established aggregation
methods for the group recommendations.

The first approach is the average method and the second one is the least misery method. We give a list of 20 movies as recommnendation in both the cases.

Then we have defined a way for counting the disagreements between the group members. We have proposed an equation to calculate the movie score which takes disagreements into account.

We are calling this as the the disagreement-aware recommendation method.

In [1]:
import numpy as np
import pandas as pd
from scipy import stats

root = 'ml-latest-small'
movies_df = pd.read_csv(root + '/movies.csv', sep=',')
ratings_df = pd.read_csv(root + '/ratings.csv', sep=',')

def get_similarUsers(u_index, ui_matrix, topk_users=30):
    sim = []
    total_users = ui_matrix.shape[0]
    x = np.array(ui_matrix.iloc[u_index, :])
    for i in range(total_users):
        if i != u_index:
            y = np.array(ui_matrix.iloc[i, :])
            xy_stacked = np.vstack([x, y])
            xy = xy_stacked[:, ~np.any(np.isnan(xy_stacked), axis=0)]

            if len(xy[0]) > 3 and len(xy[1]) > 3:
                r, p = stats.mstats.pearsonr(xy[0], xy[1])
                if np.isnan(r):
                    r = 0
                sim.append((i, r))
    sim.sort(key=lambda o: o[1])
    return list(reversed(sim[-topk_users:]))

def pred(u_index, ui_matrix, sim_matrix, topk_recommendations=20):
    item_pred = []
    x = np.array(ui_matrix.iloc[u_index, :])
    x_mean = np.nanmean(x)
    user_rating = []
    for item in range(len(x)):
        if np.isnan(ui_matrix.iloc[u_index, item]):
            simsum_centering = []
            sim_sum = 0
            for index, similarity in sim_matrix:
                if not np.isnan(ui_matrix.iloc[index, item]):
                    y = np.array(ui_matrix.iloc[index, :])
                    y_mean = np.nanmean(y)
                    rating = ui_matrix.iloc[index, item]
                    rating_centered = rating - y_mean
                    simsum_centering.append(similarity * rating_centered)
                    sim_sum += similarity
            if sim_sum != 0 and np.sum(simsum_centering) != 0:
                pred = x_mean + (np.sum(simsum_centering) / abs(sim_sum))
                item_pred.append((item, pred))
            else:
                pred = x_mean
                item_pred.append((item, pred))
            user_rating.append(pred)
        else:
            user_rating.append(ui_matrix.iloc[u_index, item])
    item_pred.sort(key=lambda u: u[1])
    return list(reversed(item_pred[-topk_recommendations:])), user_rating

def prune_movies_df(movies, ratings):
    movie_ids = np.array(movies['movieId'])
    rated_movie_ids = set(list(np.array(ratings['movieId'])))
    uncommon_movies = []
    for i in movie_ids:
        if i not in rated_movie_ids:
            uncommon_movies.append(i)
    for movie_id in uncommon_movies:
        movies.drop(movies.loc[movies['movieId'] == movie_id].index, inplace=True)
    return movies

def display_topk(movies, agg_list):
    for index, value in agg_list:
        movie_title = movies.get('title')[index]
        print(f'Movie: {movie_title}, Rating: {value}')

def compute_disagreement_score(group_ratings):
    disagreement_scores = []
    for item_ratings in zip(*group_ratings):
        if not all(np.isnan(rating) for rating in item_ratings):
            max_rating = np.nanmax(item_ratings)
            min_rating = np.nanmin(item_ratings)
            range_rating = max_rating - min_rating
            disagreement_scores.append(range_rating)
        else:
            disagreement_scores.append(np.nan)
    max_disagreement = np.nanmax(disagreement_scores)
    disagreement_scores = [d / max_disagreement if not np.isnan(d) else 0 for d in disagreement_scores]
    return disagreement_scores


def compute_combined_score(group_ratings, disagreement_scores, alpha=0.4):
    # Average-based disagreement-aware score
    combined_scores = []
    for idx, item_ratings in enumerate(zip(*group_ratings)):
        avg_rating = np.nanmean(item_ratings)
        dscore = disagreement_scores[idx]
        adjusted_score = avg_rating * (1 - dscore * alpha)
        combined_scores.append((idx, adjusted_score))
    return sorted(combined_scores, key=lambda x: x[1], reverse=True)

# def compute_least_misery_score(group_ratings, disagreement_scores, alpha=0.4):
#     # Least misery-based disagreement-aware score
#     least_misery_scores = []
#     for idx, item_ratings in enumerate(zip(*group_ratings)):
#         least_rating = np.nanmin(item_ratings)
#         dscore = disagreement_scores[idx]
#         adjusted_score = least_rating * (1 - dscore * alpha)
#         least_misery_scores.append((idx, adjusted_score))
#     return sorted(least_misery_scores, key=lambda x: x[1], reverse=True)

# Main Program
movies_df = prune_movies_df(movies_df, ratings_df)
ratings_df_reshaped = ratings_df.pivot(index='userId', columns='movieId', values='rating')
user_item_matrix = ratings_df_reshaped

# Dynamic user input for group members
while True:
    try:
        group_input = input("Enter group member IDs separated by commas: ").strip()
        group = [int(user.strip()) for user in group_input.split(',')]
        if not all(user in user_item_matrix.index for user in group):
            raise ValueError("Some user IDs are invalid.")
    except ValueError as e:
        print(e)
        retry = input("Do you want to try again? (y/n): ").strip().lower()
        if retry != 'y':
            print("Exiting program.")
            exit()
        continue
    break

# Generate predictions and ratings for each group member
group_predictions = []
group_ratings = []
for member in group:
    member_index = member - 1  # Assuming user ID starts from 1 in the dataset
    sim = get_similarUsers(u_index=member_index, ui_matrix=user_item_matrix, topk_users=30)
    predictions, u_rating = pred(u_index=member_index, ui_matrix=user_item_matrix, sim_matrix=sim, topk_recommendations=20)
    group_predictions.append(predictions)
    group_ratings.append(u_rating)

# Display predictions for each group member
# for i, member in enumerate(group):
#     #print(f"\nTop 20 Predictions for User {member}\n")
#     display_topk(movies_df, group_predictions[i])

# Aggregation Methods
r_mean_gi, r_min_gi = [], []
index_visted = []
for i in range(len(group_predictions)):
    for index, value in group_predictions[i]:
        if index not in index_visted:
            index_visted.append(index)
            item_ratings = [group_ratings[j][index] for j in range(len(group))]
            r_mean = np.nanmean(item_ratings)
            r_min = np.nanmin(item_ratings)
            r_mean_gi.append((index, r_mean))
            r_min_gi.append((index, r_min))
r_mean_gi.sort(key=lambda w: w[1])
r_min_gi.sort(key=lambda w: w[1])
top20_average_agg_list = list(reversed(r_mean_gi[-20:]))
top20_leastmisery_agg_list = list(reversed(r_min_gi[-20:]))

print("\nTop 20 Movies for the group using Average Aggregation Method:\n")
display_topk(movies_df, top20_average_agg_list)

print("\nTop 20 Movies for the group using Least Misery Aggregation Method:\n")
display_topk(movies_df, top20_leastmisery_agg_list)

# Disagreement-Aware Method
disagreement_scores = compute_disagreement_score(group_ratings)
top20_combined_agg_list = compute_combined_score(group_ratings, disagreement_scores, alpha=0.4)[:20]

print("\nTop 20 Movies for the group using Disagreement-Aware Aggregation Method:\n")
display_topk(movies_df, top20_combined_agg_list)

# Disagreement-Aware Least Misery Method
# top20_least_misery_agg_list = compute_least_misery_score(group_ratings, disagreement_scores, alpha=0.4)[:20]

# print("\nTop 20 Movies for the group using Disagreement-Aware Aggregation (Least Misery) Method:\n")
# display_topk(movies_df, top20_least_misery_agg_list)

  return scipy.stats._stats_py.pearsonr(



Top 20 Movies for the group using Average Aggregation Method:

Movie: Soul Surfer (2011), Rating: 4.820260293585492
Movie: Escape From Tomorrow (2013), Rating: 4.732108153817095
Movie: Bound (1996), Rating: 4.722161155160958
Movie: 2001 Maniacs (2005), Rating: 4.709628094383838
Movie: Taming of the Shrew, The (1967), Rating: 4.669750543875505
Movie: Iron Giant, The (1999), Rating: 4.596994824442841
Movie: Bulworth (1998), Rating: 4.532436579017352
Movie: Awake (2007), Rating: 4.491691645170454
Movie: Shark Night 3D (2011), Rating: 4.468108746328439
Movie: The Good Boy (2016), Rating: 4.463541824328012
Movie: Volga - Volga (1938), Rating: 4.394172463100367
Movie: Bell, Book and Candle (1958), Rating: 4.377460303398632
Movie: His Girl Friday (1940), Rating: 4.364934419541238
Movie: Enchanted April (1992), Rating: 4.353769467851996
Movie: Snakes on a Plane (2006), Rating: 4.352296338652413
Movie: Middle Men (2009), Rating: 4.349028185181489
Movie: Kicking Off (2016), Rating: 4.3442251711