In [99]:
import pandas as pd
import numpy as np
import math as m
import random as r
from tabulate import tabulate
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import itertools


links = pd.read_csv('ml-latest-small/links.csv')
links.head(5)
movies = pd.read_csv('ml-latest-small/movies.csv')
movies.head(5)
tags = pd.read_csv('ml-latest-small/tags.csv')
tags.head(5)
ratingss = pd.read_csv("ml-latest-small/ratings.csv")
ratingss.head(5)
#dropping the timestamp column
ratings = ratingss.drop(['timestamp'], axis=1)
ratings_subsets = [
    ratings[:10000], #subset 1
    ratings[:20000], #subsets 1-2
    ratings[:30000], #subsets 1-3
    ratings[:40000], #subsets 1-4
    ratings[:50000], #subsets 1-5
    ratings[:60000], #subsets 1-6
    ratings[:70000], #subsets 1-7
    ratings[:80000], #subsets 1-8
    ratings[:90000], #subsets 1-9
    ratings, #subsets 1-10
]
#movie and ratings dataset
movie_ratings = pd.merge(ratingss, movies, on='movieId')
movie_ratings.head()
#reshaping the data to table based on column values
user_ptable= ratings.pivot(index='userId', columns='movieId', values='rating')
user_ptable.head()


#pearson correlation coefficient
def pearson_correlation(user_a_ratings,user_b_ratings):
    corr,_ = pearsonr(user_a_ratings,user_b_ratings)
    return corr

def user_collaborative_filtering(target_user,p_table,correlationfunction):
    '''
    Gets the most similar users and their correlations to the target user
    Parameters: int target_user -user id in the dataset
                p_table - data as a pivot table
                correlationfunction - the correlation function to be used
    Return: dict similar_users -dictionary of users who have rated similar movies as the target user
    with their ratings.
    '''
    similar_users = {}
    #other users who are not the target user
    for user_b in p_table.index:
        if user_b != target_user:
            # ratings for the target user and user_b
            target_user_ratings = p_table.loc[target_user].dropna()
            user_b_ratings = p_table.loc[user_b].dropna()

            # common rated movies
            common_rated_movies = target_user_ratings.index.intersection(user_b_ratings.index)
            #filter for at least 2  common rated movies
            if len(common_rated_movies) >= 2:
                #filter  ratings to include only common rated movies
                target_user_ratings = target_user_ratings[common_rated_movies]
                user_b_ratings = user_b_ratings[common_rated_movies]
                #check if either contains all the same elements as correlation will be 1 regardless of actual rating
                if len(set(target_user_ratings)) == 1 or len(set(user_b_ratings)) == 1:
                    continue
                similar_users[user_b] = correlationfunction(target_user_ratings,user_b_ratings)
                    
    return similar_users
     
def user_prediction(user_a,item_p,p_table,similarities):
    '''
    Calculates the predicted rating of user `user_a` for item `item_p`.
    Parameters: int user_a - the index of the target user
                int item_p - the index of the unseen movie by target user
                p_table - pivot table of data
                similarities - the dictionary of correlations between target user
                  and other users.
    Return: int prediction - rating of user a for item p
    '''
    user_a_ratings = p_table.loc[user_a]
    mean_usera_ratings = user_a_ratings.mean()
    unseen_item_ratings = p_table.loc[:, item_p].dropna()

    # Get the similarity scores between the target user and other users who have rated the unseen item.
    #relevant_similarities = {}
    predicted_rating = 0
    weighted_difference = 0
    similarity_sum = 0
    for user_b, similarity in similarities.items():
        if user_b != user_a and user_b in unseen_item_ratings.index:
            user_b_ratings = p_table.loc[user_b]
            mean_userb_ratings = user_b_ratings.mean()
            rating_difference = unseen_item_ratings.loc[user_b] - mean_userb_ratings
            weighted_difference += (similarity*rating_difference)
            similarity_sum += abs(similarity)

    if similarity_sum != 0:
        # the prediction as the active user's mean plus the weighted rating differences
        predicted_rating = mean_usera_ratings + (weighted_difference / similarity_sum)
    else:
        predicted_rating = mean_usera_ratings

    return np.clip(predicted_rating,0.5,5)

def get_user_recommendations(user, p_table, correlation_function, prediction_function,top_n = 10):
    '''Function gets the user _recommendations for a particular user using the prediction function
    Returns a dictionary of the movie (key) and the predicted rating(value).
    p table in this case is the data in the said iteration
    '''
    similar_users = user_collaborative_filtering(user, p_table, correlation_function)
    sorted_similar_users =  sorted(similar_users.items(), key=lambda item: item[1],reverse=True)
    #sorted_similar_users = sorted(similar_users.items(), key=operator.itemgetter(1), reverse=True)
    top_similar_users = sorted_similar_users[:top_n]
    top_10_similar_users_dict={}
    for user,similarity in top_similar_users:
        top_10_similar_users_dict[user]=similarity
    user_recommendations = {}
    for movie in p_table.columns:
        if pd.isna(p_table.loc[user, movie]):
            user_recommendations[movie] = prediction_function(user, movie, p_table, top_10_similar_users_dict)
    return user_recommendations
   
                                                                                                                                                                          

In [100]:
def borda_count(user_recommendations_dict):
    borda_ratings = {}
    for _, recommendations in user_recommendations_dict.items():
        rank = len(recommendations)  # Initial rank value
        for movie, rating in recommendations.items():
            if movie not in borda_ratings:
                borda_ratings[movie] = 0
            borda_ratings[movie] += rank  # Assigning Borda score based on rank
            rank -= 1 
    # Normalize Borda count scores to a specific range (0.5 to 5 in this case)
    min_score = min(borda_ratings.values())
    max_score = max(borda_ratings.values())

    for movie, score in borda_ratings.items():
        normalized_score = ((score - min_score) / (max_score - min_score)) * (5 - 0.5) + 0.5
        borda_ratings[movie] = normalized_score
    return borda_ratings

In [101]:
def group_recommendations(user_recommendations_dict, aggregation_method, top_n = 10):
    '''Function calculates the group_recommendation based on the given aggregation method.
    The aggregation methods are average and least misery method.
    Returns :list(tuple) of the movies and predicted rating based on selected method'''
  
    movie_ratings = {}
    #user_recommendations_list = [user_recommendations]
    for user,recommendations in user_recommendations_dict.items():
        for movie, rating in recommendations.items():
            if movie not in movie_ratings:
                movie_ratings[movie] = []
            movie_ratings[movie].append(rating)
    aggregated_ratings = {}   
    if aggregation_method == 'average':
        aggregated_ratings = {movie: np.mean(ratings) for movie, ratings in movie_ratings.items()}

    elif aggregation_method == 'least misery':
        aggregated_ratings = {movie: np.min(ratings) for movie, ratings in movie_ratings.items()}
        
    elif aggregation_method == 'borda':
        aggregated_ratings = borda_count(user_recommendations_dict) 
        
    #group recommendations
    sorted_group_recommendations = sorted(aggregated_ratings.items(), key=lambda item: item[1], reverse=True)
    top_group_recommendations = sorted_group_recommendations[:top_n]

    return top_group_recommendations

In [102]:
#group_users = np.random.choice(user_ptable.index, size=3, replace=False)
group_users = [1,2,3]
user_recommendations_dict = {}
for user in group_users:
    user_recommendations_dict [user] = get_user_recommendations(user, user_ptable, pearson_correlation, user_prediction)
all_movie_ids = []
for user_id, recommendations in user_recommendations_dict.items():
    #getting movie ids
    all_movie_ids.extend(recommendations.keys())
# removing duplicates and converting to list
movie_list = list(set(all_movie_ids))#aggregated list from user recommendations

In [103]:
def calculate_user_satisfaction(user_id,group_recommendations,user_recommendations):
    group_recommendations_ratings = sum(group_recommendations.values())
    user_ratings = sum (user_recommendations[movie] for movie in group_recommendations.keys() if movie in user_recommendations) 
    if user_ratings == 0:
        return 0
    user_satisfaction = group_recommendations_ratings/user_ratings

    return user_satisfaction

def user_sat(iteration_data, predictions, rates_by_group, top_k_user_rates, userId):
  rates_by_user = rates_by_group[rates_by_group.userId == userId]


  group_list_sat = np.sum([user_based_pred(iteration_data, rates_by_user, p) for p in predictions])
  user_list_sat = np.sum(top_k_user_rates.rating)

  try:
    return min([1, group_list_sat / user_list_sat])
  except ZeroDivisionError:
    return 0



In [104]:
def get_group_predictions(user_ptable,group_users):
    user_recommendations_dict = {}
    for user in group_users:
        user_recommendations_dict [user] = get_user_recommendations(user, user_ptable, pearson_correlation, user_prediction)
    average_ratings = group_recommendations(user_recommendations_dict,'average')
    least_misery_ratings = group_recommendations(user_recommendations_dict,'least misery')
    borda_count_ratings = group_recommendations(user_recommendations_dict,'borda')
    return average_ratings,least_misery_ratings,borda_count_ratings


In [105]:
def get_combined_score_dict(avg_ratings,least_misery_ratings, borda_ratings, alpha, beta, gamma):
    combined_scores = {}
    for movie_id in avg_ratings:
        combined_score = alpha * avg_ratings[movie_id] + beta * borda_ratings[movie_id] + gamma * least_misery_ratings[movie_id]
        combined_scores[movie_id] = combined_score
    return combined_scores

In [106]:
def get_combined_score_dict(avg_ratings, least_misery_ratings, borda_ratings, alpha, beta, gamma):
    combined_scores = {}
    
    # Assuming each element in avg_ratings, least_misery_ratings, and borda_ratings is a tuple (movie_id, rating)
    for (movie_id, avg_rating), (_, least_misery_rating), (_, borda_rating) in zip(avg_ratings, least_misery_ratings, borda_ratings):
        combined_score = alpha * avg_rating + beta * borda_rating + gamma * least_misery_rating
        combined_scores[movie_id] = combined_score
    
    return combined_scores


In [107]:
def sequential_hybrid_agg_model(group_users, iterations = 3, top_n=10):
   prev_user_satisfactions = {0: [1 for user in group_users]}
   alpha, beta, gamma = 0, 0, 0

   for iteration in range(1, iterations + 1):
    data = ratings_subsets[iteration - 1]
    

    recommendations = []

    user_preferences = {}
    group_ratings = []
    for user in group_users:
        user_ratings = data[data.userId == user]

        group_ratings.append(user_ratings)

        user_preferences[user] = user_ratings.sort_values(by=['rating'], ascending=False)[:top_n]

    group_ratings = pd.concat(group_ratings)

    average_satisfaction = sum(prev_user_satisfactions[iteration - 1]) / len(prev_user_satisfactions[iteration - 1])
    variance_satisfaction = np.var(prev_user_satisfactions[iteration - 1])

    beta = average_satisfaction / 2 - variance_satisfaction
    gamma = average_satisfaction / 2 + average_satisfaction
    alpha = 1 - (beta + gamma)

    avg_aggregation, least_misery_aggregation, borda_aggregation = get_group_predictions(user_ptable,group_users)
    scores = get_combined_score_dict(avg_aggregation, least_misery_aggregation, borda_aggregation ,alpha, beta, gamma)

    for movie_id, score in scores.items():
    # Include aggregation details accordingly
        recommendations.append({
            'movieId': movie_id,
            'rate': score,
            'message': f"movieId:{movie_id} with rating:{score}, "
        })
        

    recommendations = sorted(recommendations, key=lambda i: i['rate'], reverse=True)[:top_n]
    predictions = [x['movieId'] for x in recommendations]

    
    print(f"Top {top_n} Recommended Movies for group:{group_users} in iteration {iteration}")
    for idx, each in enumerate(recommendations):
        print(f"{idx + 1}. {each['message']}")
    prev_user_satisfactions[iteration ] = [user_sat(data, predictions, group_ratings, user_preferences[user], user) for user in group_users]

In [108]:
group_users = [8, 312, 609]
s=sequential_hybrid_agg_model(group_users)
s

Top 10 Recommended Movies for group:[8, 312, 609] in iteration 1
1. movieId:1 with rating:5.0, 
2. movieId:318 with rating:4.99976743384785, 
3. movieId:2959 with rating:4.963820581981414, 
4. movieId:527 with rating:4.599277584315145, 
5. movieId:1304 with rating:4.523512611375011, 
6. movieId:3897 with rating:4.479036064545451, 
7. movieId:2706 with rating:4.241645711166858, 
8. movieId:1233 with rating:4.139315747152899, 
9. movieId:4878 with rating:4.135403759920003, 
10. movieId:1213 with rating:4.129614878659986, 


KeyError: 1