In [206]:
import pandas as pd
import numpy as np
import math as m
import random as r
from tabulate import tabulate
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import itertools


links = pd.read_csv('ml-latest-small/links.csv')
links.head(5)
movies = pd.read_csv('ml-latest-small/movies.csv')
movies.head(5)
tags = pd.read_csv('ml-latest-small/tags.csv')
tags.head(5)
ratingss = pd.read_csv("ml-latest-small/ratings.csv")
ratingss.head(5)
#dropping the timestamp column
ratings = ratingss.drop(['timestamp'], axis=1)
ratings_subsets = [
    ratings[:10000], #subset 1
    ratings[:20000], #subsets 1-2
    ratings[:30000], #subsets 1-3
    ratings[:40000], #subsets 1-4
    ratings[:50000], #subsets 1-5
    ratings[:60000], #subsets 1-6
    ratings[:70000], #subsets 1-7
    ratings[:80000], #subsets 1-8
    ratings[:90000], #subsets 1-9
    ratings, #subsets 1-10
]
#movie and ratings dataset
movie_ratings = pd.merge(ratingss, movies, on='movieId')
movie_ratings.head()
#reshaping the data to table based on column values
user_ptable= ratings.pivot(index='userId', columns='movieId', values='rating')
user_ptable.head()


#pearson correlation coefficient
def pearson_correlation(user_a_ratings,user_b_ratings):
    corr,_ = pearsonr(user_a_ratings,user_b_ratings)
    return corr

def user_collaborative_filtering(target_user,p_table,correlationfunction):
    '''
    Gets the most similar users and their correlations to the target user
    Parameters: int target_user -user id in the dataset
                p_table - data as a pivot table
                correlationfunction - the correlation function to be used
    Return: dict similar_users -dictionary of users who have rated similar movies as the target user
    with their ratings.
    '''
    similar_users = {}
    #other users who are not the target user
    for user_b in p_table.index:
        if user_b != target_user:
            # ratings for the target user and user_b
            target_user_ratings = p_table.loc[target_user].dropna()
            user_b_ratings = p_table.loc[user_b].dropna()

            # common rated movies
            common_rated_movies = target_user_ratings.index.intersection(user_b_ratings.index)
            #filter for at least 2  common rated movies
            if len(common_rated_movies) >= 2:
                #filter  ratings to include only common rated movies
                target_user_ratings = target_user_ratings[common_rated_movies]
                user_b_ratings = user_b_ratings[common_rated_movies]
                #check if either contains all the same elements as correlation will be 1 regardless of actual rating
                if len(set(target_user_ratings)) == 1 or len(set(user_b_ratings)) == 1:
                    continue
                similar_users[user_b] = correlationfunction(target_user_ratings,user_b_ratings)
                    
    return similar_users
     
def user_prediction(user_a,item_p,p_table,similarities):
    '''
    Calculates the predicted rating of user `user_a` for item `item_p`.
    Parameters: int user_a - the index of the target user
                int item_p - the index of the unseen movie by target user
                p_table - pivot table of data
                similarities - the dictionary of correlations between target user
                  and other users.
    Return: int prediction - rating of user a for item p
    '''
    user_a_ratings = p_table.loc[user_a]
    mean_usera_ratings = user_a_ratings.mean()
    unseen_item_ratings = p_table.loc[:, item_p].dropna()

    # Get the similarity scores between the target user and other users who have rated the unseen item.
    #relevant_similarities = {}
    predicted_rating = 0
    weighted_difference = 0
    similarity_sum = 0
    for user_b, similarity in similarities.items():
        if user_b != user_a and user_b in unseen_item_ratings.index:
            user_b_ratings = p_table.loc[user_b]
            mean_userb_ratings = user_b_ratings.mean()
            rating_difference = unseen_item_ratings.loc[user_b] - mean_userb_ratings
            weighted_difference += (similarity*rating_difference)
            similarity_sum += abs(similarity)

    if similarity_sum != 0:
        # the prediction as the active user's mean plus the weighted rating differences
        predicted_rating = mean_usera_ratings + (weighted_difference / similarity_sum)
    else:
        predicted_rating = mean_usera_ratings

    return np.clip(predicted_rating,0.5,5)

def get_user_recommendations(user, p_table, correlation_function, prediction_function,top_n = 10):
    '''Function gets the user _recommendations for a particular user using the prediction function
    Returns a dictionary of the movie (key) and the predicted rating(value).
    p table in this case is the data in the said iteration
    '''
    similar_users = user_collaborative_filtering(user, p_table, correlation_function)
    sorted_similar_users =  sorted(similar_users.items(), key=lambda item: item[1],reverse=True)
    #sorted_similar_users = sorted(similar_users.items(), key=operator.itemgetter(1), reverse=True)
    top_similar_users = sorted_similar_users[:top_n]
    top_10_similar_users_dict={}
    for user,similarity in top_similar_users:
        top_10_similar_users_dict[user]=similarity
    user_recommendations = {}
    for movie in p_table.columns:
        if pd.isna(p_table.loc[user, movie]):
            user_recommendations[movie] = prediction_function(user, movie, p_table, top_10_similar_users_dict)
    sorted_user_recommendations =  sorted(user_recommendations.items(), key=lambda item: item[1],reverse=True)
    top_10_user_recommendations = sorted_user_recommendations[:top_n]
    return user_recommendations
   
                                                                                                                                                                          

In [207]:
def borda_count(user_recommendations_dict):
    borda_ratings = {}
    for _, recommendations in user_recommendations_dict.items():
        rank = len(recommendations)  # Initial rank value
        for movie, rating in recommendations.items():
            if movie not in borda_ratings:
                borda_ratings[movie] = 0
            borda_ratings[movie] += rank  # Assigning Borda score based on rank
            rank -= 1 
    # Normalize Borda count scores to a specific range (0.5 to 5 in this case)
    min_score = min(borda_ratings.values())
    max_score = max(borda_ratings.values())

    for movie, score in borda_ratings.items():
        normalized_score = ((score - min_score) / (max_score - min_score)) * (5 - 0.5) + 0.5
        borda_ratings[movie] = normalized_score
    return borda_ratings

In [208]:
def group_recommendations(user_recommendations_dict, aggregation_method, top_n = 10):
    '''Function calculates the group_recommendation based on the given aggregation method.
    The aggregation methods are average and least misery method.
    Returns :list(tuple) of the movies and predicted rating based on selected method'''
  
    movie_ratings = {}
    #user_recommendations_list = [user_recommendations]
    for user,recommendations in user_recommendations_dict.items():
        for movie, rating in recommendations.items():
            if movie not in movie_ratings:
                movie_ratings[movie] = []
            movie_ratings[movie].append(rating)
    aggregated_ratings = {}   
    if aggregation_method == 'average':
        aggregated_ratings = {movie: np.mean(ratings) for movie, ratings in movie_ratings.items()}

    elif aggregation_method == 'least misery':
        aggregated_ratings = {movie: np.min(ratings) for movie, ratings in movie_ratings.items()}
        
        
    #group recommendations
    sorted_group_recommendations = sorted(aggregated_ratings.items(), key=lambda item: item[1], reverse=True)
    top_group_recommendations = sorted_group_recommendations[:top_n]

    return aggregated_ratings

In [209]:
def weighted_group_recommendation(common_recommendations):
    '''
    parameters : dataframe of  common user recommendations
    returns the group recommendations ->pd series
    
    '''
    #Calculating disaggreements
    disagreements = 1 - cosine_similarity(common_recommendations.T)
    # Create a DataFrame with disagreements
    disagreements_df = pd.DataFrame(disagreements, columns=common_recommendations.columns, index=common_recommendations.columns)
    weights = 1 - (disagreements_df - disagreements_df.min()) / (disagreements_df.max() - disagreements_df.min())
    # Weighted Aggregation for group recommendations
    weighted_aggregated_recommendations = common_recommendations.copy()

    # multiplying each user's recommendation by their corresponding weight based on disagreements
    for user in common_recommendations.columns:
        weighted_aggregated_recommendations[user] *= weights.loc[user, user]

    # Sum up the weighted recommendations across users to get aggregated recommendations
    group_recommendations = weighted_aggregated_recommendations.sum(axis=1)
    # normalized aggregated recommendations to range (0 and 5)
    min_rating = 0 
    max_rating = 5  

    normalized_group_recommendations = (
        (group_recommendations - group_recommendations.min()) / (group_recommendations.max() - group_recommendations.min())
    ) * (max_rating - min_rating) + min_rating
    top_10_group_recommendations = normalized_group_recommendations.sort_values(ascending=False).head(10)

    return normalized_group_recommendations


In [210]:
def get_group_predictions(user_ptable,group_users):
    user_recommendations_dict = {}
    for user in group_users:
        user_recommendations_dict [user] = get_user_recommendations(user, user_ptable, pearson_correlation, user_prediction)
    #user recommendations dict as a DataFrame
    recommendations_df = pd.DataFrame(user_recommendations_dict)
    # commonly recommended items to the 3 users
    common_recommendations = recommendations_df.dropna(axis=0, how='any')    
    average_ratings = group_recommendations(user_recommendations_dict,'average')
    least_misery_ratings = group_recommendations(user_recommendations_dict,'least misery')
    weighted_agg = weighted_group_recommendation(common_recommendations)
    return least_misery_ratings,weighted_agg


# Satisfaction-Optimized Sequential Aggregation Model

## Design and Implementation:
Sequential Aggregation Model (satisfaction_optimized_sequential_agg_model function):

Purpose: Provides sequential recommendations for a group of users, optimizing satisfaction and adapting to evolving preferences.

### Parameters:

- i: Iteration number (sequential step).

- group_users: List of user IDs in the group.

- user_ptable: Pivot table of user ratings.

- prev_user_satisfactions: Dictionary storing satisfaction scores from previous iterations.

- top_n: Number of top recommendations to consider (default is set to 10).

### Key Components:

- Initialization:For the first iteration (i == 1), it calculates the initial group recommendation using the weighted_group_recommendation function based on the collaborative filtering approach.

- Sequential Scores:For subsequent iterations (i > 1), it calculates sequential scores using the compute_scores function, incorporating recommendations from previous iterations.

- User Satisfaction:Calculates user satisfaction scores based on either group recommendations (for the first iteration) or sequential scores (for subsequent iterations).

- Iteration Update:Updates the satisfaction scores for each user in the prev_user_satisfactions dictionary.

- Print Output:Prints the satisfaction scores and recommendations for the current iteration.

### Advantages:

- Sequential Consideration:Takes into account the sequential nature of group recommendations, considering recommendations and satisfaction scores from previous iterations for a refined experience.

- Adaptability with Iterations:Introduces an adaptive factor (alpha) to adjust the importance of previous recommendations, allowing the system to adapt to evolving group preferences.

- User-Centric Satisfaction:Incorporates user satisfaction as a key metric, ensuring that recommendations align with individual user preferences for higher overall satisfaction.

- Combination of Group and Sequential Approaches:Intelligently combines both group recommendations and sequential scores, starting with collaborative filtering and refining recommendations over iterations.

- Flexibility in Recommendation Strategies:Provides flexibility by allowing different aggregation methods for group recommendations, accommodating diverse group preferences.

### Theoretical Foundation:

- Adaptive Recommendation Adjustment:The method adjusts recommendations based on both predicted preferences and the lowest satisfaction in the group, optimizing for a balance between personalization and group satisfaction.

- Sequential Satisfaction Computation:Sequentially computes satisfaction, adapting to evolving preferences and providing a dynamic group recommendation experience.

- Combination of Collaborative Filtering and Sequential Scoring:Integrates collaborative filtering for initial group recommendations and sequential scoring for iterative refinement, ensuring a comprehensive and adaptive approach.

### Explanation of Method Effectiveness for Sequential Group Recommendations:

1. **Sequential Nature Consideration:**
   - *Reason:* The method acknowledges that group preferences evolve over time. By considering recommendations and satisfaction scores from previous iterations, it adapts to changing dynamics and provides recommendations aligned with the current state of user preferences.

2. **Adaptive Recommendation Adjustment:**
   - *Reason:* The introduction of an adaptive factor (`alpha`) allows the system to adjust the influence of previous recommendations. This adaptability optimizes the balance between historical preferences and the most recent ones, ensuring that the system is responsive to shifts in group tastes.

3. **User-Centric Satisfaction:**
   - *Reason:* The method prioritizes user satisfaction as a key metric for evaluating the effectiveness of recommendations. By focusing on individual satisfaction, it ensures that the group recommendations are tailored to the preferences of each user, contributing to an overall positive group experience.

4. **Combination of Collaborative Filtering and Sequential Scoring:**
   - *Reason:* The integration of collaborative filtering for initial group recommendations and sequential scoring for iterative refinement creates a comprehensive approach. Collaborative filtering captures the collective preferences of the group, while sequential scoring refines recommendations based on evolving tastes, resulting in a well-rounded solution.

5. **Flexibility in Recommendation Strategies:**
   - *Reason:* The method offers flexibility by allowing different aggregation methods for group recommendations, such as 'average' or 'least misery.' This adaptability accommodates diverse group preferences and ensures that the system can adjust its strategy based on the nature of the group.

6. **Balancing Predicted Preferences and Lowest Satisfaction:**
   - *Reason:* The theoretical foundation of balancing recommendations based on both predicted preferences and the lowest satisfaction in the group contributes to a more nuanced and considerate recommendation strategy. It avoids consistently overlooking the preferences of users who might be less satisfied in previous iterations.

7. **Holistic Group Satisfaction Approach:**
   - *Reason:* By combining sequential considerations, adaptive adjustments, and user-centric satisfaction, the method takes a holistic approach to group satisfaction. It strives to enhance the overall satisfaction of the entire group by catering to individual preferences and adapting to evolving tastes.

8. **Dynamic Evolution of Recommendations:**
   - *Reason:* The method's design allows for the dynamic evolution of recommendations over sequential iterations. This ensures that the system does not become static and remains responsive to the changing dynamics of the group, providing relevant and timely suggestions.

In essence, the proposed method excels in sequential group recommendations by leveraging adaptability, user-centricity, and a combination of collaborative and sequential strategies. It offers a comprehensive and flexible approach that aligns with the dynamic nature of group preferences, ultimately contributing to enhanced group satisfaction over sequential iterations.

In [211]:
def group_satisfaction():

IndentationError: expected an indented block (699556516.py, line 1)

In [212]:
def calculate_user_satisfaction(user_ratings_df, group_recommendations):
    user_satisfactions = {}

    for user_id, user_ratings in user_ratings_df.transpose().iterrows():
        user_list_sat = user_ratings.sum()  # total ratings given by the user
        group_list_sat = (user_ratings * group_recommendations).sum()  # sum of product of user ratings and group recommendations
        
        if user_list_sat != 0:  #division by zero
            user_satisfactions[user_id] = group_list_sat / user_list_sat
        else:
            user_satisfactions[user_id] = 0  # Set to 0 if no ratings
        
    return user_satisfactions


In [213]:
def compute_scores(G,user_ptable,alpha,group_users):
    '''G set of movies
    '''
    least_misery_ratings,weighted_agg = get_group_predictions(user_ptable,group_users)
    scores = {}
    for movie_id in G:
        # Find the least misery rating for the current movie
        least_score = next((rating for mid, rating in least_misery_ratings if mid == movie_id), 0)
        # Find the weighted aggregation score for the current movie
        weighted_score = weighted_agg.get(movie_id, 0)

        scores = (1 - alpha) * weighted_score + (alpha * least_score)
        scores[movie_id] = scores
    return scores


In [214]:
def sequential_agg_model(i,group_users,user_ptable,top_n = 10):
    prev_user_satisfactions = {}
    prev_user_satisfactions[0] = {}
    user_recommendations_dict = {}#a
    G = set()
    for user in group_users:
        recommendations = get_user_recommendations(user, user_ptable, pearson_correlation, user_prediction)
        user_recommendations_dict [user] = recommendations
        G.update(recommendations)

    #user recommendations dict as a DataFrame
    recommendations_df = pd.DataFrame(user_recommendations_dict)
    # commonly recommended items to the 3 users
    common_recommendations = recommendations_df.dropna(axis=0, how='any')

    sequential_scores = {}
    if i == 1:
        alpha = 0
        group_recommendations = weighted_group_recommendation(common_recommendations)#top 10 group recommendations
        satisfaction_scores = calculate_user_satisfaction(common_recommendations,group_recommendations)
        prev_user_satisfactions[i] = satisfaction_scores
    else:
        satisfaction_values = prev_user_satisfactions.get(i-1, {})
        alpha = max(satisfaction_values.values()) - min(satisfaction_values.values())
        
        sequential_scores = compute_scores(G,user_ptable,alpha,group_users)
        
        satisfaction_scores = calculate_user_satisfaction(common_recommendations,sequential_scores)
        prev_user_satisfactions[i] = satisfaction_scores
        
    recommended_movies = group_recommendations.index.tolist()  # List of recommended movie IDs
    # Remove recommended movies from user_ptable columns
    user_ptable = user_ptable.drop(recommended_movies, axis=1, errors='ignore')

    print(f'Iteration {i} Satisfaction Scores: {satisfaction_scores}')
    if i == 1:
         print(f'Group Recommendations for Iteration {i}: {group_recommendations}')
    else:
         print(f'Sequential Recommendations for Iteration {i}: {sequential_scores}')

    return user_ptable, group_recommendations,sequential_scores,prev_user_satisfactions


In [215]:
group_users = [1,2,3]
# First iteration
user_ptable, group_recommendations, sequential_scores, prev_user_satisfactions = sequential_agg_model(1, group_users, user_ptable)

Iteration 1 Satisfaction Scores: {1: 3.4223132037464166, 2: 3.435336012896775, 3: 3.4403983049137956}
Group Recommendations for Iteration 1: 2         3.635807
3         3.322349
4         3.432365
5         3.432365
6         3.483168
            ...   
193581    3.432365
193583    3.432365
193585    3.432365
193587    3.432365
193609    3.432365
Length: 9600, dtype: float64


In [None]:
group_recommendations

2959     5.000000
48516    2.511031
91529    0.000000
dtype: float64

In [None]:
prev_user_satisfactions

{0: {},
 1: {1: 3.428013807475973, 2: 3.5306512545090936, 3: 3.5508553686439486}}

In [None]:
i = 2
satisfaction_values = prev_user_satisfactions.get(i-1, {})
if satisfaction_values:
    alpha = max(satisfaction_values.values()) - min(satisfaction_values.values())
alpha

0.1228415611679754

In [216]:
# Second iteration
user_ptable, group_recommendations, sequential_scores, prev_user_satisfactions = sequential_agg_model(2, group_users, user_ptable)

ValueError: max() arg is an empty sequence

In [None]:
#group_users = np.random.choice(user_ptable.index, size=3, replace=False)
group_users = [1,2,3]
user_recommendations_dict = {}
G = set()
for user in group_users:
    recommendations = get_user_recommendations(user, user_ptable, pearson_correlation, user_prediction)
    user_recommendations_dict [user] = recommendations
    G.update(recommendations)
#user recommendations dict as a DataFrame
recommendations_df = pd.DataFrame(user_recommendations_dict)
# commonly recommended items to the 3 users
common_recommendations = recommendations_df.dropna(axis=0, how='any')





Unnamed: 0,1,2,3
2,3.951613,4.638581,2.790394
3,3.951613,3.953163,2.790394
4,3.951613,3.798077,3.186047
5,3.951613,3.798077,3.186047
6,3.951613,3.365724,3.729486
...,...,...,...
193581,3.951613,3.798077,3.186047
193583,3.951613,3.798077,3.186047
193585,3.951613,3.798077,3.186047
193587,3.951613,3.798077,3.186047


In [None]:

least_misery_ratings,weighted_agg = get_group_predictions(user_ptable,group_users)
least_misery_ratings

[(1674, 4.690743338008415),
 (2028, 4.408039976312374),
 (47, 4.363581333413423),
 (58559, 4.356046511627907),
 (3863, 4.306451612903226),
 (1198, 4.275758449361477),
 (2959, 4.111071948281663),
 (2571, 4.106465656249977),
 (8368, 4.058493589743589),
 (4993, 4.033420875171526)]

In [None]:
type(least_misery_ratings)

list

In [None]:
weighted_agg

47      5.000000
1198    4.959837
3863    4.861231
750     4.708015
3740    4.708015
1129    4.708015
2530    4.698955
3262    4.698955
2116    4.698955
4941    4.698955
dtype: float64

In [None]:
group_recommendation = weighted_group_recommendation(common_recommendations)
satisfaction_scores = calculate_user_satisfaction(common_recommendations,group_recommendation)
satisfaction_scores

{1: 0.005300486436839696, 2: 0.006554730670875895, 3: 0.007331621190056512}

In [None]:
prev_user_satisfactions = {}
prev_user_satisfactions[1] = satisfaction_scores
prev_user_satisfactions


{1: {1: 0.005300486436839696,
  2: 0.006554730670875895,
  3: 0.007331621190056512}}