In [395]:
import pandas as pd
import numpy as np
import math as m
import random as r
from tabulate import tabulate
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import itertools

links = pd.read_csv('ml-latest-small/links.csv')
links.head(5)
movies = pd.read_csv('ml-latest-small/movies.csv')
movies.head(5)
tags = pd.read_csv('ml-latest-small/tags.csv')
tags.head(5)
ratings = pd.read_csv("ml-latest-small/ratings.csv")
ratings.head(5)
#dropping the timestamp column
ratings = ratings.drop(['timestamp'], axis=1)
#movie and ratings dataset
movie_ratings = pd.merge(ratings, movies, on='movieId')
movie_ratings.head()
#reshaping the data to table based on column values
user_ptable= ratings.pivot(index='userId', columns='movieId', values='rating')
user_ptable.head()


#pearson correlation coefficient
def pearson_correlation(user_a_ratings,user_b_ratings):
    corr,_ = pearsonr(user_a_ratings,user_b_ratings)
    return corr

def user_collaborative_filtering(target_user,p_table,correlationfunction):
    '''
    Gets the most similar users and their correlations to the target user
    Parameters: int target_user -user id in the dataset
                p_table - data as a pivot table
                correlationfunction - the correlation function to be used
    Return: dict similar_users -dictionary of users who have rated similar movies as the target user
    with their ratings.
    '''
    similar_users = {}
    #other users who are not the target user
    for user_b in p_table.index:
        if user_b != target_user:
            # ratings for the target user and user_b
            target_user_ratings = p_table.loc[target_user].dropna()
            user_b_ratings = p_table.loc[user_b].dropna()

            # common rated movies
            common_rated_movies = target_user_ratings.index.intersection(user_b_ratings.index)
            #filter for at least 2  common rated movies
            if len(common_rated_movies) >= 2:
                #filter  ratings to include only common rated movies
                target_user_ratings = target_user_ratings[common_rated_movies]
                user_b_ratings = user_b_ratings[common_rated_movies]
                #check if either contains all the same elements as correlation will be 1 regardless of actual rating
                if len(set(target_user_ratings)) == 1 or len(set(user_b_ratings)) == 1:
                    continue
                similar_users[user_b] = correlationfunction(target_user_ratings,user_b_ratings)
                    
    return similar_users
     
def user_prediction(user_a,item_p,p_table,similarities):
    '''
    Calculates the predicted rating of user `user_a` for item `item_p`.
    Parameters: int user_a - the index of the target user
                int item_p - the index of the unseen movie by target user
                p_table - pivot table of data
                similarities - the dictionary of correlations between target user
                  and other users.
    Return: int prediction - rating of user a for item p
    '''
    user_a_ratings = p_table.loc[user_a]
    mean_usera_ratings = user_a_ratings.mean()
    unseen_item_ratings = p_table.loc[:, item_p].dropna()

    # Get the similarity scores between the target user and other users who have rated the unseen item.
    #relevant_similarities = {}
    predicted_rating = 0
    weighted_difference = 0
    similarity_sum = 0
    for user_b, similarity in similarities.items():
        if user_b != user_a and user_b in unseen_item_ratings.index:
            user_b_ratings = p_table.loc[user_b]
            mean_userb_ratings = user_b_ratings.mean()
            rating_difference = unseen_item_ratings.loc[user_b] - mean_userb_ratings
            weighted_difference += (similarity*rating_difference)
            similarity_sum += abs(similarity)

    if similarity_sum != 0:
        # the prediction as the active user's mean plus the weighted rating differences
        predicted_rating = mean_usera_ratings + (weighted_difference / similarity_sum)
    else:
        predicted_rating = mean_usera_ratings

    return np.clip(predicted_rating,0.5,5)

def get_user_recommendations(user, p_table, correlation_function, prediction_function,top_n = 10):
    '''Function gets the user _recommendations for a particular user using the prediction function
    Returns a dictionary of the movie (key) and the predicted rating(value).
    p table in this case is the data in the said iteration
    '''
    similar_users = user_collaborative_filtering(user, p_table, correlation_function)
    sorted_similar_users =  sorted(similar_users.items(), key=lambda item: item[1],reverse=True)
    #sorted_similar_users = sorted(similar_users.items(), key=operator.itemgetter(1), reverse=True)
    top_similar_users = sorted_similar_users[:top_n]
    top_10_similar_users_dict={}
    for user,similarity in top_similar_users:
        top_10_similar_users_dict[user]=similarity
    user_recommendations = {}
    for movie in p_table.columns:
        if pd.isna(p_table.loc[user, movie]):
            user_recommendations[movie] = prediction_function(user, movie, p_table, top_10_similar_users_dict)
    sorted_user_recommendations =  sorted(user_recommendations.items(), key=lambda item: item[1],reverse=True)
    top_10_user_recommendations = sorted_user_recommendations[:top_n]
    return user_recommendations
                                                                                                                                                                             

In [396]:
def get_group_recommendations(user_recommendations_dict, aggregation_method, top_n = 10):
    '''Function calculates the group_recommendation based on the given aggregation method.
    The aggregation methods are average and least misery method.
    Returns :list(tuple) of the movies and predicted rating based on selected method'''
  
    movie_ratings = {}
    #user_recommendations_list = [user_recommendations]
    for user,recommendations in user_recommendations_dict.items():
        for movie, rating in recommendations.items():
            if movie not in movie_ratings:
                movie_ratings[movie] = []
            movie_ratings[movie].append(rating)
    aggregated_ratings = {}   
    if aggregation_method == 'average':
        aggregated_ratings = {movie: np.mean(ratings) for movie, ratings in movie_ratings.items()}

    elif aggregation_method == 'least misery':
        aggregated_ratings = {movie: np.min(ratings) for movie, ratings in movie_ratings.items()}
        
        
    #group recommendations
    sorted_group_recommendations = sorted(aggregated_ratings.items(), key=lambda item: item[1], reverse=True)
    top_group_recommendations = sorted_group_recommendations[:top_n]

    return aggregated_ratings

The weighted aggregated group recommendations was taken advantage of for the subsequent sequential group recommendations.
The disagreements are calculated with the cosine similarity measure which when subtracted from 1 creates a disimilarity measure.
The disagreements are normalised because cosine similarity falls between [-1,1] but we now want the values to fall between the range [0,1] cosine similarity is a measure between vectors and the magnitude of the vectors can influence the similarity.This will ensure the dissimilarity values are independed of the vector magnitudes.

In [397]:
def weighted_group_recommendation(common_recommendations):
    '''
    parameters : dataframe of  common user recommendations
    returns the group recommendations ->pd series
    
    '''
    #Calculating disaggreements
    disagreements = 1 - cosine_similarity(common_recommendations.T)
    # Create a DataFrame with disagreements
    disagreements_df = pd.DataFrame(disagreements, columns=common_recommendations.columns, index=common_recommendations.columns)
    weights = 1 - (disagreements_df - disagreements_df.min()) / (disagreements_df.max() - disagreements_df.min())
    # Weighted Aggregation for group recommendations
    weighted_aggregated_recommendations = common_recommendations.copy()

    # multiplying each user's recommendation by their corresponding weight based on disagreements
    for user in common_recommendations.columns:
        weighted_aggregated_recommendations[user] *= weights.loc[user, user]

    # Sum up the weighted recommendations across users to get aggregated recommendations
    group_recommendations = weighted_aggregated_recommendations.sum(axis=1)
    # normalized aggregated recommendations to range (0 and 5) 
    #this works similarly as doing the average.
    min_rating = 0 
    max_rating = 5  

    normalized_group_recommendations = (
        (group_recommendations - group_recommendations.min()) / (group_recommendations.max() - group_recommendations.min())
    ) * (max_rating - min_rating) + min_rating
    top_10_group_recommendations = normalized_group_recommendations.sort_values(ascending=False).head(10)

    return normalized_group_recommendations


In [398]:
def get_group_predictions(user_ptable,group_users):
    user_recommendations_dict = {}
    for user in group_users:
        user_recommendations_dict [user] = get_user_recommendations(user, user_ptable, pearson_correlation, user_prediction)
    #user recommendations dict as a DataFrame
    recommendations_df = pd.DataFrame(user_recommendations_dict)
    # commonly recommended items to the 3 users
    common_recommendations = recommendations_df.dropna(axis=0, how='any')    
    #average_ratings = group_recommendations(user_recommendations_dict,'average')
    least_misery_ratings = get_group_recommendations(user_recommendations_dict,'least misery')
    weighted_agg = weighted_group_recommendation(common_recommendations)
    return least_misery_ratings,weighted_agg


# Satisfaction-Optimized Sequential Aggregation Model

## Design and Implementation:
Sequential Aggregation Model (satisfaction_optimized_sequential_agg_model function):

Purpose: Provides sequential recommendations for a group of users, optimizing satisfaction and adapting to evolving preferences.

### Parameters:

- i: Iteration number (sequential step).

- group_users: List of user IDs in the group.

- user_ptable: Pivot table of user ratings.

- prev_user_satisfactions: Dictionary storing satisfaction scores from previous iterations.

- top_n: Number of top recommendations to consider (default is set to 10).

### Key Components:

- Initialization:For the first iteration (i == 1), it calculates the initial group recommendation using the weighted_group_recommendation function based on the collaborative filtering approach.

- Sequential Scores:For subsequent iterations (i > 1), it calculates sequential scores using the compute_scores function, incorporating recommendations from previous iterations.

- User Satisfaction:Calculates user satisfaction scores based on either group recommendations (for the first iteration) or sequential scores (for subsequent iterations).

- Iteration Update:Updates the satisfaction scores for each user in the prev_user_satisfactions dictionary.

- Print Output:Prints the satisfaction scores and recommendations for the current iteration.

### Advantages:

- Sequential Consideration:Takes into account the sequential nature of group recommendations, considering recommendations and satisfaction scores from previous iterations for a refined experience.

- Adaptability with Iterations:Introduces an adaptive factor (alpha) to adjust the importance of previous recommendations, allowing the system to adapt to evolving group preferences.

- User-Centric Satisfaction:Incorporates user satisfaction as a key metric, ensuring that recommendations align with individual user preferences for higher overall satisfaction.

- Combination of Group and Sequential Approaches:Intelligently combines both group recommendations and sequential scores, starting with collaborative filtering and refining recommendations over iterations.

- Flexibility in Recommendation Strategies:Provides flexibility by allowing different aggregation methods for group recommendations, accommodating diverse group preferences.

### Theoretical Foundation:

- Adaptive Recommendation Adjustment:The method adjusts recommendations based on both predicted preferences and the lowest satisfaction in the group, optimizing for a balance between personalization and group satisfaction.

- Sequential Satisfaction Computation:Sequentially computes satisfaction, adapting to evolving preferences and providing a dynamic group recommendation experience.

- Combination of Collaborative Filtering and Sequential Scoring:Integrates collaborative filtering for initial group recommendations and sequential scoring for iterative refinement, ensuring a comprehensive and adaptive approach.

### Explanation of Method Effectiveness for Sequential Group Recommendations:

1. **Sequential Nature Consideration:**
   - *Reason:* The method acknowledges that group preferences evolve over time. By considering recommendations and satisfaction scores from previous iterations, it adapts to changing dynamics and provides recommendations aligned with the current state of user preferences.

2. **Adaptive Recommendation Adjustment:**
   - *Reason:* The introduction of an adaptive factor (`alpha`) allows the system to adjust the influence of previous recommendations. This adaptability optimizes the balance between historical preferences and the most recent ones, ensuring that the system is responsive to shifts in group tastes.

3. **User-Centric Satisfaction:**
   - *Reason:* The method prioritizes user satisfaction as a key metric for evaluating the effectiveness of recommendations. By focusing on individual satisfaction, it ensures that the group recommendations are tailored to the preferences of each user, contributing to an overall positive group experience.

4. **Combination of Collaborative Filtering and Sequential Scoring:**
   - *Reason:* The integration of collaborative filtering for initial group recommendations and sequential scoring for iterative refinement creates a comprehensive approach. Collaborative filtering captures the collective preferences of the group, while sequential scoring refines recommendations based on evolving tastes, resulting in a well-rounded solution.

5. **Flexibility in Recommendation Strategies:**
   - *Reason:* The method offers flexibility by allowing different aggregation methods for group recommendations, such as 'average' or 'least misery.' This adaptability accommodates diverse group preferences and ensures that the system can adjust its strategy based on the nature of the group.

6. **Balancing Predicted Preferences and Lowest Satisfaction:**
   - *Reason:* The theoretical foundation of balancing recommendations based on both predicted preferences and the lowest satisfaction in the group contributes to a more nuanced and considerate recommendation strategy. It avoids consistently overlooking the preferences of users who might be less satisfied in previous iterations.

7. **Holistic Group Satisfaction Approach:**
   - *Reason:* By combining sequential considerations, adaptive adjustments, and user-centric satisfaction, the method takes a holistic approach to group satisfaction. It strives to enhance the overall satisfaction of the entire group by catering to individual preferences and adapting to evolving tastes.

8. **Dynamic Evolution of Recommendations:**
   - *Reason:* The method's design allows for the dynamic evolution of recommendations over sequential iterations. This ensures that the system does not become static and remains responsive to the changing dynamics of the group, providing relevant and timely suggestions.

In essence, the proposed method excels in sequential group recommendations by leveraging adaptability, user-centricity, and a combination of collaborative and sequential strategies. It offers a comprehensive and flexible approach that aligns with the dynamic nature of group preferences, ultimately contributing to enhanced group satisfaction over sequential iterations.

In [399]:
def calculate_user_satisfaction(user_recommendations_dict,group_recommendation,sorted_group_recommendations, k=10):
    #the top 10 movies in the group_recommendation
    top_10_movies = [movie_id for movie_id, _ in sorted_group_recommendations]

    user_satisfaction = {}
    for user, user_recs in user_recommendations_dict.items():
        # Ensure user recommendations align with top-k group recommendations
        user_top_k = [movie_id for movie_id, _ in user_recs.items() if movie_id in top_10_movies]

        # Group Recommendation Satisfaction (Equation 2)
        group_list_sat = sum(group_recommendation.get(movie_id, 0) for movie_id in user_top_k)

        # Calculate Individual User Satisfaction (Equation 3)
        individual_list_sat = sum(user_recs.get(movie_id, 0) for movie_id in user_top_k)

        # Calculate User Satisfaction (Equation 1)
        user_satisfaction[user] = group_list_sat / individual_list_sat if individual_list_sat != 0 else 0

    # Check user satisfaction scores
    return user_satisfaction



In [400]:
def calculate_group_satisfaction(group_users, satisfaction_scores):
    total_satisfaction = sum(satisfaction_scores[user] for user in group_users)
    group_satisfaction = total_satisfaction / len(group_users)
    return group_satisfaction

In [401]:
def compute_scores(G,user_ptable,alpha,group_users):
    '''G set of movies
    '''
    least_misery_ratings,weighted_agg = get_group_predictions(user_ptable,group_users)
    scores = {}
    for movie_id in G:
        # Find the least misery rating for the current movie
        least_score = next((rating for mid, rating in least_misery_ratings.items() if mid == movie_id), 0)
        # Find the weighted aggregation score for the current movie
        weighted_score = weighted_agg.get(movie_id, 0)

        score = (1 - alpha) * weighted_score + (alpha * least_score)
        scores[movie_id] = score
    return scores


In [402]:
def sequential_agg_model(i,group_users,user_ptable,top_n = 10,prev_user_satisfactions = None):
    if prev_user_satisfactions is None:
        prev_user_satisfactions  = {}
    
    user_recommendations_dict = {}#a
    G = set()
    for user in group_users:
        recommendations = get_user_recommendations(user, user_ptable, pearson_correlation, user_prediction)
        user_recommendations_dict [user] = recommendations
        G.update(recommendations)

    #user recommendations dict as a DataFrame
    recommendations_df = pd.DataFrame(user_recommendations_dict)
    # commonly recommended items to the 3 users
    common_recommendations = recommendations_df.dropna(axis=0, how='any')

    sequential_scores = {}
    if i == 1:
        alpha = 0
        #group_recommendations = weighted_group_recommendation(common_recommendations)#top 10 group recommendations
        #satisfaction_scores = calculate_user_satisfaction(common_recommendations,group_recommendations)
        #prev_user_satisfactions[i] = satisfaction_scores
    else:
        satisfaction_values = prev_user_satisfactions.get(i-1, {})
        alpha = max(satisfaction_values.values()) - min(satisfaction_values.values())
        
    sequential_scores = compute_scores(G,user_ptable,alpha,group_users)
    sorted_group_recommendations = sorted(sequential_scores.items(), key=lambda x: x[1], reverse=True)[:top_n]
    
    satisfaction_scores = calculate_user_satisfaction(user_recommendations_dict,sequential_scores,sorted_group_recommendations)
    prev_user_satisfactions[i] = satisfaction_scores
    group_satisfaction = calculate_group_satisfaction(group_users,satisfaction_scores)
        
    recommended_movies = [movie[0] for movie in sorted_group_recommendations]
    # Remove recommended movies from user_ptable columns
    user_ptable = user_ptable.drop(recommended_movies, axis=1, errors='ignore')

    print(f'Iteration {i} Satisfaction Scores at alpha = {alpha}:')
    for user, satisfaction in satisfaction_scores.items():
        print(f'User {user}: {satisfaction}')

    print(f'\nGroup Satisfaction Score: {group_satisfaction}')
    print(f'\nSequential Recommendations at Iteration {i}:')
    for movie_id, score in sorted_group_recommendations:
        print(f'Movie ID: {movie_id}, Score: {score}')
    return user_ptable, sequential_scores, prev_user_satisfactions


In [403]:
group_users = [1,2,3]
# First iteration
user_ptable2, sequential_scores, prev_user_satisfactions1 = sequential_agg_model(1, group_users, user_ptable)


Iteration 1 Satisfaction Scores at alpha = 0:
User 1: 1.1375626092804556
User 2: 0.9548186804347567
User 3: 1.0216702803063453

Group Satisfaction Score: 1.038017190007186

Sequential Recommendations at Iteration 1:
Movie ID: 47, Score: 5.0
Movie ID: 1198, Score: 4.95983651729687
Movie ID: 3863, Score: 4.861231469497395
Movie ID: 750, Score: 4.708014901869666
Movie ID: 1129, Score: 4.708014901869666
Movie ID: 3740, Score: 4.708014901869666
Movie ID: 101, Score: 4.698955332333643
Movie ID: 968, Score: 4.698955332333643
Movie ID: 1235, Score: 4.698955332333643
Movie ID: 1285, Score: 4.698955332333643


In [404]:
user_ptable3, sequential_scores, prev_user_satisfactions = sequential_agg_model(2,group_users,user_ptable2,prev_user_satisfactions = prev_user_satisfactions1)

Iteration 2 Satisfaction Scores at alpha = 0.1827439288456989:
User 1: 1.2541934095883078
User 2: 0.9320423168924368
User 3: 0.998701090332866

Group Satisfaction Score: 1.061645605604537

Sequential Recommendations at Iteration 2:
Movie ID: 1089, Score: 4.823027218547004
Movie ID: 2959, Score: 4.690597072478593
Movie ID: 1449, Score: 4.623985712298976
Movie ID: 1587, Score: 4.623985712298976
Movie ID: 1997, Score: 4.623985712298976
Movie ID: 2020, Score: 4.623985712298976
Movie ID: 2116, Score: 4.623985712298976
Movie ID: 2530, Score: 4.623985712298976
Movie ID: 3262, Score: 4.623985712298976
Movie ID: 3771, Score: 4.623985712298976


In [405]:
# Third iteration
user_ptable4, sequential_scores, prev_user_satisfactions = sequential_agg_model(3,group_users,user_ptable3,prev_user_satisfactions = prev_user_satisfactions)

Iteration 3 Satisfaction Scores at alpha = 0.32215109269587106:
User 1: 1.1405787584986096
User 2: 0.9379333571091371
User 3: 1.113039797539193

Group Satisfaction Score: 1.0638506377156465

Sequential Recommendations at Iteration 3:
Movie ID: 4006, Score: 4.706842505646757
Movie ID: 4180, Score: 4.706842505646757
Movie ID: 4915, Score: 4.706842505646757
Movie ID: 4941, Score: 4.706842505646757
Movie ID: 5026, Score: 4.706842505646757
Movie ID: 5540, Score: 4.706842505646757
Movie ID: 48774, Score: 4.642399083087692
Movie ID: 1059, Score: 4.617082324687607
Movie ID: 1274, Score: 4.58284369657521
Movie ID: 3727, Score: 4.566291084362085
