In [13]:
import pandas as pd
import numpy as np
import math as m
import random as r
from scipy.stats import pearsonr
from scipy.stats import spearmanr
links = pd.read_csv('ml-latest-small/links.csv')
links.head(5)
movies = pd.read_csv('ml-latest-small/movies.csv')
movies.head(5)
tags = pd.read_csv('ml-latest-small/tags.csv')
tags.head(5)
ratings = pd.read_csv("ml-latest-small/ratings.csv")
ratings.head(5)
#dropping the timestamp column
ratings = ratings.drop(['timestamp'], axis=1)
#movie and ratings dataset
movie_ratings = pd.merge(ratings, movies, on='movieId')
movie_ratings.head()
#reshaping the data to table based on column values
user_ptable= ratings.pivot(index='userId', columns='movieId', values='rating')
user_ptable.head()
#pearson correlation coefficient
def pearson_correlation(user_a_ratings,user_b_ratings):
    corr,_ = pearsonr(user_a_ratings,user_b_ratings)
    return corr
def user_collaborative_filtering(target_user, p_table, correlationfunction):
    '''
    Gets the most similar users and their correlations to the target user
    Parameters: int target_user - user id in the dataset
                p_table - data as a pivot table
                correlationfunction - the correlation function to be used
    Return: dict similar_users - dictionary of users who have rated similar movies as the target user
    with their ratings.
    '''
    similar_users = {}
    
    # other users who are not the target user
    for user_b in p_table.index:
        if user_b != target_user:
            # ratings for the target user and user_b
            target_user_ratings = p_table.loc[target_user].dropna()
            user_b_ratings = p_table.loc[user_b].dropna()

            # common rated movies
            common_rated_movies = target_user_ratings.index.intersection(user_b_ratings.index)
            
            # filter for at least 2 common rated movies
            if len(common_rated_movies) >= 2:
                # filter ratings to include only common rated movies
                target_user_ratings = target_user_ratings[common_rated_movies]
                user_b_ratings = user_b_ratings[common_rated_movies]
                
                # check if either contains all the same elements as correlation will be 1 regardless of actual rating
                if len(set(target_user_ratings)) == 1 or len(set(user_b_ratings)) == 1:
                    continue
                
                similarity = correlationfunction(target_user_ratings, user_b_ratings)
                
                if user_b not in similar_users:
                    similar_users[user_b] = {"ratings": [], "correlations": []}
                    
                similar_users[user_b]["ratings"].append(user_b_ratings.values)
                similar_users[user_b]["correlations"].append(similarity)

    return similar_users

                                                                                                                                                                                                                           

In [3]:
from typing import Dict


In [19]:
def group_recommendations(similar_users: Dict, aggregation_method: str = 'average') -> pd.Series:
    '''
    Aggregates individual user ratings using the specified method to generate group recommendations.
    Parameters: dict similar_users - dictionary of users with their ratings and correlations
                aggregation_method - method for aggregating individual user ratings ('average' or other)
    Return: pd.Series group_recommendations - aggregated group recommendations
    '''
    if not similar_users:
        raise ValueError("No similar users found for group recommendations.")

    # Collect individual user ratings and correlations
    aggregated_ratings = {user: pd.concat([pd.Series(r) for r in data["ratings"]], axis=1) for user, data in similar_users.items()}
    user_correlations = {user: data["correlations"] for user, data in similar_users.items()}

    # Aggregation based on the specified method
    if aggregation_method == 'average':
        group_ratings = {user: ratings.mean(axis=1) for user, ratings in aggregated_ratings.items()}

    # Add more aggregation methods as needed

    return pd.Series(group_ratings)


In [20]:
# Example usage:
target_user_id = 1  # Replace with the desired user ID
similar_users = user_collaborative_filtering(target_user_id, user_ptable, pearson_correlation)
group_rec_avg = group_recommendations(similar_users, aggregation_method='average')

print("Group Recommendations (Average):")
print(group_rec_avg)

Group Recommendations (Average):
3      0    0.5
1    3.5
2    4.5
3    0.5
4    0.5
5...
4      0     2.0
1     2.0
2     5.0
3     1.0
4     ...
5      0     4.0
1     4.0
2     4.0
3     5.0
4     ...
6      0     5.0
1     4.0
2     4.0
3     1.0
4     ...
7      0     4.5
1     4.5
2     5.0
3     5.0
4     ...
                             ...                        
606    0     2.5
1     3.0
2     4.5
3     4.0
4     ...
607    0     4.0
1     5.0
2     3.0
3     3.0
4     ...
608    0      2.5
1      2.0
2      4.5
3      4.5
4 ...
609    0    3.0
1    3.0
2    3.0
3    4.0
4    4.0
5...
610    0     5.0
1     5.0
2     5.0
3     4.0
4     ...
Length: 568, dtype: object


In [21]:
def group_recommendations_least_misery(similar_users: Dict) -> pd.Series:
    '''
    Aggregates individual user ratings using the least misery method to generate group recommendations.
    Parameters: dict similar_users - dictionary of users with their ratings and correlations
    Return: pd.Series group_recommendations - aggregated group recommendations
    '''
    if not similar_users:
        raise ValueError("No similar users found for group recommendations.")

    # Collect individual user ratings
    user_ratings = {user: pd.concat([pd.Series(r) for r in data["ratings"]], axis=1) for user, data in similar_users.items()}

    # Aggregation using least misery method (minimum score assigned to an item)
    group_ratings = {user: ratings.min(axis=1) for user, ratings in user_ratings.items()}

    return pd.Series(group_ratings)


In [22]:
group_rec_least_misery = group_recommendations_least_misery(similar_users)

print("Group Recommendations (Least Misery):")
print(group_rec_least_misery)


Group Recommendations (Least Misery):
3      0    0.5
1    3.5
2    4.5
3    0.5
4    0.5
5...
4      0     2.0
1     2.0
2     5.0
3     1.0
4     ...
5      0     4.0
1     4.0
2     4.0
3     5.0
4     ...
6      0     5.0
1     4.0
2     4.0
3     1.0
4     ...
7      0     4.5
1     4.5
2     5.0
3     5.0
4     ...
                             ...                        
606    0     2.5
1     3.0
2     4.5
3     4.0
4     ...
607    0     4.0
1     5.0
2     3.0
3     3.0
4     ...
608    0      2.5
1      2.0
2      4.5
3      4.5
4 ...
609    0    3.0
1    3.0
2    3.0
3    4.0
4    4.0
5...
610    0     5.0
1     5.0
2     5.0
3     4.0
4     ...
Length: 568, dtype: object
