In [2]:
import pandas as pd
import numpy as np

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


# Firt Assignment

### a) Look at the dataset

In [3]:
#read the table of ratings(so we get the structure and the dimentions)
ratings=pd.read_csv('ml-latest-small/ratings.csv')
print('number of ratings:', len(ratings))
ratings

number of ratings: 100836


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [4]:
movies=pd.read_csv('ml-latest-small/movies.csv')
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


### b) User-based collaborative filtering approach

b.1)Generate the matrix 

In [5]:

# Determin matrix user-movie (pivot table)
movie_matrix = ratings.pivot_table(index='movieId', columns='userId', values='rating')

movie_matrix


userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,,,4.0,,4.5,,,,...,4.0,,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,,,,,,4.0,,4.0,,,...,,4.0,,5.0,3.5,,,2.0,,
3,4.0,,,,,5.0,,,,,...,,,,,,,,2.0,,
4,,,,,,3.0,,,,,...,,,,,,,,,,
5,,,,,,5.0,,,,,...,,,,3.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,,,,,,,,,,,...,,,,,,,,,,
193583,,,,,,,,,,,...,,,,,,,,,,
193585,,,,,,,,,,,...,,,,,,,,,,
193587,,,,,,,,,,,...,,,,,,,,,,


b.2) implementation of Pearson correlation

In [6]:
#The Pearson correlation coefficient measures the linear relationship between two datasets. 
def pearson_similarity(ratings1, ratings2):
    # movies rated from both users
    movies = ratings1.index.intersection(ratings2.index)
    if len(movies) == 0:
        return 0  # No correlation
    
    user1_ratings = ratings1[movies]
    user2_ratings = ratings2[movies]

    # determine the avarage
    mean_user1 = user1_ratings.mean()
    mean_user2 = user2_ratings.mean()

    # summerize the product of the pair of the normilazed ratings
    numerator = ((user1_ratings - mean_user1) * (user2_ratings - mean_user2)).sum()
    #the squared deviation scores of your variables
    denominator = ((((user1_ratings - mean_user1) ** 2).sum())**0.5) * ((((user2_ratings - mean_user2) ** 2).sum())**0.5)
    if denominator == 0:
        return 0
    else:
        return numerator / denominator

### e) New function for the similarity

In [7]:
#Compute the positional intersection of two sets.Returns a set containing elements that are equal and in the same position in both sets.
def positional_intersection(list1, list2):
    intersection = []
    min_length = min(len(list1), len(list2))
    for i in range(min_length):
        if list1[i] == list2[i]:
            intersection.append(list1[i])
    return intersection

#return the number of element un the union between lists
def union_fun (list1, list2):
    min_length = min(len(list1), len(list2))
    union = len(list1) + len(list2)
    
    for i in range(min_length):
        if list1[i] == list2[i]: #if the is a two times the same ratings, one is not counted
            union-=1
    return union

#jaccard similarity (I choose this similarity because is the most famous that I know of )
#but if dataset is sparse, meaning many users have rated only a few items, it's more likely to observe high similarity scores 
#because there are fewer items to differentiate users. So a lot of users have the same Jaccard similarity of 1 if those items are rated identically.
def jaccard_similarity(target_user_ratings, other_user_ratings):
    # Remove the nan
    target_ratings = target_user_ratings.dropna().tolist()
    other_ratings = other_user_ratings.dropna().tolist()

    intersection =len(positional_intersection(target_ratings, other_ratings))
    union = union_fun(target_ratings, other_ratings)
    if not union:  # Check if the union is empty to avoid division by zero
        return 0
    # Calculate Jaccard similarity
    similarity = intersection / union
    
    return similarity

In [8]:
# Function to compute similarity with a specific user
def similarity_with_user(userId, movie_matrix):
    # Extract ratings of the target user from the movie ratings DataFrame
    target_user_ratings = movie_matrix[userId]
    
    # Compute similarity between the target user and all other users 
    user_similarity_with_other = {user: pearson_similarity(target_user_ratings, movie_matrix[user]) for user in movie_matrix.columns if user != userId}
    # Decomment the following code and comment the above one, if you want the jaccard similarity 
    #user_similarity_with_other = {user: jaccard_similarity(target_user_ratings, movie_matrix[user]) for user in movie_matrix.columns if user != userId}

    # Return the dictionary containing similarities with other users
    return user_similarity_with_other


### c) Function Prediction

In [9]:
# Prediction function
def predict_movie_rating(userId, movieId, ratings, user_similarity_with_other):
    # Filter ratings for the target user
    user_ratings = ratings[ratings['userId'] == userId].set_index('movieId')['rating']
    # Filter ratings for the movie in question
    movie_ratings = ratings[ratings['movieId'] == movieId].set_index('userId')
    # Select only the top 10 most similar users who have rated the movie
    similar_users = [(user, user_similarity_with_other[user]) for user in movie_ratings.index]
    similar_users = sorted(similar_users, key=lambda x: x[1], reverse=True)[:10]
    numerator = 0
    denominator = 0
    
    # Iterate over the top similar users
    for user, similarity in similar_users:
        # Check if similarity is positive
        if similarity > 0:
            # Retrieve the rating and mean rating of the current user
            rating = movie_ratings.loc[user, 'rating']
            mean_rating = movie_ratings['rating'].mean()
            # Update numerator and denominator
            numerator += similarity * (rating - mean_rating)
            denominator += abs(similarity)
                
    # Check if denominator is zero
    if denominator == 0:
        return np.nan  # No similar users found
    else:
        # Compute the predicted rating
        user_mean_rating = user_ratings.mean()
        predicted_rating = user_mean_rating + (numerator / denominator)
        return predicted_rating

    


### d) Texting for the top 10 users and movies

In [21]:
# Example usage:
userId = 3
movieId = 2
user_similarity_with_other = similarity_with_user(userId, movie_matrix)
predicted_rating = predict_movie_rating(userId, movieId, ratings, user_similarity_with_other)
print("Predicted rating for user", userId, "on movie", movieId, ":", predicted_rating)
sorted_users = sorted(user_similarity_with_other.items(), key=lambda x: x[1], reverse=True)
top_similar_users = sorted_users[:10]
print(top_similar_users)

Predicted rating for user 1 on movie 2 : 4.089186824118774
[(301, 0.12479906517911302), (597, 0.10263065929063651), (414, 0.10134803449460647), (477, 0.09921664240127363), (57, 0.09907007791369302), (369, 0.09829454350405929), (206, 0.09685159012412195), (535, 0.09649292687545016), (590, 0.09519062577565124), (418, 0.0941526193713487)]
Predicted rating for user 2 on movie 2 : 3.9543098491999333
[(189, 0.13467691447333485), (246, 0.10633397290711091), (378, 0.09958270021892503), (209, 0.08953864101469029), (227, 0.08542833279244076), (326, 0.07927295353257978), (393, 0.06339869622374057), (332, 0.06282101721964872), (196, 0.05069908258179105), (528, 0.04868250137133207)]
Predicted rating for user 3 on movie 2 : 2.674480351132912
[(441, 0.11741841994010987), (496, 0.06787752167982902), (549, 0.06400609969213772), (231, 0.061158903864791336), (527, 0.058455503222874745), (537, 0.058071938550124255), (313, 0.055313283670736894), (518, 0.05028788491396018), (244, 0.049511365158064743), (246

In [10]:
from joblib import Parallel, delayed

def predict_movie_rating_wrapper(userId, movieId, ratings, user_similarity_with_other):
    return predict_movie_rating(userId, movieId, ratings, user_similarity_with_other)

def predicting_best_movies(userId, ratings, user_similarity_with_other):
    # Get movies not rated by target user
    user_column = movie_matrix[userId]
    movies_to_recommend = user_column[user_column.isna()].index.tolist()
    
    # Create a DataFrame with movies to recommend
    df_to_recommend = pd.DataFrame({'movieId': list(movies_to_recommend)})
    
    # Calculate predicted ratings for movies to recommend (bottleneck)
    # Calculate predicted ratings for movies to recommend in parallel
    predicted_ratings = Parallel(n_jobs=-1)(
        delayed(predict_movie_rating_wrapper)(userId, movieId, ratings, user_similarity_with_other)
        for movieId in df_to_recommend['movieId']
    )
    df_to_recommend['predicted_rating'] = predicted_ratings
    
    # Filter out NaN predicted ratings
    df_to_recommend.dropna(inplace=True)
    
    # Sort recommended movies in descending order of predicted rating
    df_to_recommend.sort_values(by='predicted_rating', ascending=False, inplace=True)
    
    return df_to_recommend[['movieId', 'predicted_rating']].values.tolist()

In [26]:
# Find the top similar users for the target user
userId = 6
user_similarity_with_other = similarity_with_user(userId, movie_matrix)
sorted_users = sorted(user_similarity_with_other.items(), key=lambda x: x[1], reverse=True)
top_similar_users = sorted_users[:10]

print("Top 10 similar users for user", userId, ":")
for user_id, similarity_score in top_similar_users:
    print("User:", user_id, "--> Similarity Score:", similarity_score)

movies_suggested= predicting_best_movies(userId, ratings, user_similarity_with_other)
print("\nTop 10 recommended movies for user", userId, ":")
for movie_id, rating in movies_suggested[:10]:
    movie_title = movies[movies['movieId'] == movie_id]['title'].values[0]
    print("Movie:", movie_title, "--> Predicted Rating:", rating)


Top 10 similar users for user 6 :
User: 84 --> Similarity Score: 0.20436507936507936
User: 312 --> Similarity Score: 0.17763157894736842
User: 202 --> Similarity Score: 0.16396103896103897
User: 385 --> Similarity Score: 0.16252821670428894
User: 372 --> Similarity Score: 0.16033755274261605
User: 156 --> Similarity Score: 0.15772357723577235
User: 117 --> Similarity Score: 0.15421686746987953
User: 325 --> Similarity Score: 0.1541501976284585
User: 352 --> Similarity Score: 0.15370018975332067
User: 469 --> Similarity Score: 0.14896755162241887

Top 10 recommended movies for user 6 :
Movie: Twin Dragons (Shuang long hui) (1992) --> Predicted Rating: 5.30919060007604
Movie: Beautiful (2000) --> Predicted Rating: 4.699942575864782
Movie: Hard Way, The (1991) --> Predicted Rating: 4.692684874651702
Movie: To Grandmother's House We Go (1992) --> Predicted Rating: 4.649875204755022
Movie: Serbian Film, A (Srpski film) (2010) --> Predicted Rating: 4.6051252773062705
Movie: Paper Heart (2009

# Second Assignment

### a) group recommendation

In [10]:
from statistics import mean

#function for average aggregation
def average_aggregation(users):

    #dictionary to store all the movies and the ratings of all the users
    movie_ratings_dict={}
    for user in users:
        #to get the similarity and all the movies recommended
        user_similarity_with_other = similarity_with_user(user, movie_matrix)
        movies_suggested=predicting_best_movies(user, ratings, user_similarity_with_other)
        
        for movieId, rating in movies_suggested:
            if movieId in movie_ratings_dict:
                if isinstance(movie_ratings_dict[movieId], list):
                    movie_ratings_dict[movieId].append(rating)
                else:
                    movie_ratings_dict[movieId] = [movie_ratings_dict[movieId], rating] 
            else:
                movie_ratings_dict[movieId] = rating
    #print(movie_ratings_dict)
                
    #to memorize the average ratings of all movies            
    movie_avg = {}
    for movie, votes in movie_ratings_dict.items():
        if isinstance(votes, list):  # Check if votes is a list
            if len(votes) == len(users): #consider only the movies with prediction from all users
                movie_avg[movie] = mean(votes)
        
        
    #sort it to get the one with the higest avg
    sorted_movie_avg = dict(sorted(movie_avg.items(), key=lambda item: item[1], reverse=True))
    return sorted_movie_avg

users = [1, 3, 480]
average = average_aggregation(users)

# Print only the top 10 movies
top_10_movies = dict(list(average.items())[:10])
print("Top 10 Movies:")
for movie, avg_rating in top_10_movies.items():
    movie_title = movies[movies['movieId'] == movie]['title'].values[0]
    print("Movie:", movie_title, "--> Predicted Rating:", avg_rating)

Top 10 Movies:
Movie: Hush (2016) --> Predicted Rating: 4.971062119440762
Movie: Opera (1987) --> Predicted Rating: 4.91882532435635
Movie: The Cloverfield Paradox (2018) --> Predicted Rating: 4.887814582824376
Movie: Ivan's Childhood (a.k.a. My Name is Ivan) (Ivanovo detstvo) (1962) --> Predicted Rating: 4.882981421284085
Movie: Unbelievable Adventures of Italians in Russia (1974) --> Predicted Rating: 4.609132121155715
Movie: The Great Train Robbery (1903) --> Predicted Rating: 4.56979111350653
Movie: Dragon Ball Z: The History of Trunks (Doragon bôru Z: Zetsubô e no hankô!! Nokosareta chô senshi - Gohan to Torankusu) (1993) --> Predicted Rating: 4.457192915928266
Movie: Skin I Live In, The (La piel que habito) (2011) --> Predicted Rating: 4.457061709642226
Movie: Peeping Tom (1960) --> Predicted Rating: 4.4350452060414165
Movie: To Be or Not to Be (1942) --> Predicted Rating: 4.431458151523233


In [11]:
#use three users
users=[1, 3, 480]

#least misery aggregation consider the user less satisfiaed 
def least_misery_aggregation(users):

    movie_ratings_dict={}
    for user in users:
        user_similarity_with_other = similarity_with_user(user, movie_matrix)
        movies_suggested=predicting_best_movies(user, ratings, user_similarity_with_other)
        
        for movieId, rating in movies_suggested:
            if movieId in movie_ratings_dict:
                if isinstance(movie_ratings_dict[movieId], list):
                    movie_ratings_dict[movieId].append(rating)
                else:
                    movie_ratings_dict[movieId] = [movie_ratings_dict[movieId], rating]
            else:
                movie_ratings_dict[movieId] = rating
    movie_min={}
    for movie, votes in movie_ratings_dict.items():
        if isinstance(votes, list):  # Check if votes is a list
            if len(votes) == len(users): #consider only the movies with prediction from all users
                movie_min[movie] = min(votes)
        
        
    sorted_movie_min = dict(sorted(movie_min.items(), key=lambda item: item[1], reverse=True))
    return sorted_movie_min

least_misery = least_misery_aggregation(users)
# Print only the top 10 movies
top_10_movies = dict(list(least_misery.items())[:10])
print("Top 10 Movies:")
for movie, rat in top_10_movies.items():
    movie_title = movies[movies['movieId'] == movie]['title'].values[0]
    print("Movie:", movie_title, "--> Predicted Rating:", rat)

Top 10 Movies:
Movie: Opera (1987) --> Predicted Rating: 4.435897435897436
Movie: Hush (2016) --> Predicted Rating: 4.269230769230769
Movie: The Cloverfield Paradox (2018) --> Predicted Rating: 4.185897435897436
Movie: Peeping Tom (1960) --> Predicted Rating: 4.002858871881985
Movie: Hachiko: A Dog's Story (a.k.a. Hachi: A Dog's Tale) (2009) --> Predicted Rating: 3.935897435897436
Movie: Skin I Live In, The (La piel que habito) (2011) --> Predicted Rating: 3.9358974358974357
Movie: Witchfinder General (Conquerer Worm, The) (1968) --> Predicted Rating: 3.9358974358974357
Movie: The Great Train Robbery (1903) --> Predicted Rating: 3.9206524589793657
Movie: Witches of Eastwick, The (1987) --> Predicted Rating: 3.7841442921880972
Movie: Maze Runner: Scorch Trials (2015) --> Predicted Rating: 3.7066252076548514


### b) my aggregation

In [15]:
from statistics import mean

#aggregation that use the difference between ratings to order the movies
def weighted_aggregation(users):
    
    movie_ratings_dict={}
    for user in users:
        user_similarity_with_other = similarity_with_user(user, movie_matrix)
        movies_suggested=predicting_best_movies(user, ratings, user_similarity_with_other)
        for movieId, rating in movies_suggested:
            if movieId in movie_ratings_dict:
                if isinstance(movie_ratings_dict[movieId], list):
                    movie_ratings_dict[movieId].append(rating)
                else:
                    movie_ratings_dict[movieId] = [movie_ratings_dict[movieId], rating]
            else:
                movie_ratings_dict[movieId] = rating
    #print(movie_ratings_dict)
    movie_avg={}
    for movie, votes in movie_ratings_dict.items():
        if isinstance(votes, list):  # Check if votes is a list
            if len(votes) == len(users): #consider only the movies with prediction from all users
                avg_rating = mean(votes)
                diff = max(votes) - min(votes) #disagreement between users on a movie rating
                movie_avg[movie] = (avg_rating, diff)
    #oreder on average rating and disagreement between users(disagreement decrease, average rating increase)
    sorted_movie_avg = dict(sorted(movie_avg.items(), key=lambda item: item[1][0] * (1/item[1][1]), reverse=True))
    return sorted_movie_avg

users = [1, 3, 480]
top_mov = weighted_aggregation(users)
 # Print only the top 10 movies
top_10_movies = dict(list(top_mov.items())[:10])
print("Top 10 Movies:")
for movie, (avg_rating, weight) in top_10_movies.items():
    movie_title = movies[movies['movieId'] == movie]['title'].values[0]
    print(f"Movie: {movie_title} --> Avg Rating: {avg_rating}, Weight: {weight}")

Top 10 Movies:
Movie: Joy Ride (2001) --> Avg Rating: 3.456479642400125, Weight: 0.21312798603271643
Movie: I Sell the Dead (2008) --> Avg Rating: 3.5196430889089556, Weight: 0.22748756197518638
Movie: Zombeavers (2014) --> Avg Rating: 3.5196430889089556, Weight: 0.22748756197518638
Movie: Tideland (2005) --> Avg Rating: 3.519858957607847, Weight: 0.23394264430986622
Movie: Hostel: Part II (2007) --> Avg Rating: 3.453501686937617, Weight: 0.23224986355126997
Movie: Shakes the Clown (1992) --> Avg Rating: 3.2068246708506103, Weight: 0.3037833840901434
Movie: Furious 7 (2015) --> Avg Rating: 3.616738974366626, Weight: 0.35935521340790055
Movie: Forget Paris (1995) --> Avg Rating: 3.1647922856653135, Weight: 0.3157907217435163
Movie: Innkeepers, The (2011) --> Avg Rating: 3.489781274565654, Weight: 0.39242381954338645
Movie: Project A 2 ('A' gai wak juk jap) (1987) --> Avg Rating: 3.489781274565654, Weight: 0.39242381954338645


# Third assignment

In [17]:
#compute the satisfation of all users with the movies 
def compute_users_satisfaction(users, top_group):
    
    users_satisfaction=[]
    group_prediction=0 #sum up group prediction for the top movies
    for movie, rat in top_group.items():
        group_prediction += rat

    #for every users determine his sudisfaction
    for user in users:

        users_prediction=0 
        
        #sum up user prediction for the top movies of the group
        user_similarity_with_other = similarity_with_user(user, movie_matrix)
        movies_suggested=predicting_best_movies(user, ratings, user_similarity_with_other)
        for movie, rating in movies_suggested:
            if movie in top_group:
                users_prediction += rating

        users_satisfaction.append(group_prediction/users_prediction)

    return users_satisfaction



def compute_min_max_users_satisfaction(users, top_group):
    
    users_sadisfaction=compute_users_satisfaction(users, top_group)
    return max(users_sadisfaction), min(users_sadisfaction)


#copute the overall satisfaction of the users at a given iteration
def compute_users_overall_satisfaction(satisfactions, iteration):
    users_overall_satisfaction={}
    for user, ratings in satisfactions.items():
        users_overall_satisfaction[user] = sum(ratings) / (iteration+1)
    return users_overall_satisfaction


In [15]:

#the classic version
def seq_aggregation(users, iteration):
    
    top_group={}
    old_iterations_moives={}
    users_satisfaction_iterations = {user: [] for user in range(len(users))}
    #get the two method aggregation
    movies_avg = average_aggregation(users)
    movies_least_misery = least_misery_aggregation(users)
    for iter in range(iteration):
        print("\nIteration: ", iter)
        if iter == 0: #the first iteration use only the average method
            top_group = movies_avg
        else:
            #compute previous iteration satisfction (alfa)
            max_user_satisfaction, min_user_satisfaction = compute_min_max_users_satisfaction(users, top_group)
            alfa = max_user_satisfaction - min_user_satisfaction
            new_recommendation = {}
            for movie, avg_rating in movies_avg.items():
                if movie not in old_iterations_moives: #to consider different movies for the new iteration
                    if movie in movies_least_misery:
                        min_rating = movies_least_misery[movie]
                        new_recommendation[movie] = avg_rating * (1 - alfa) + min_rating * alfa
                        top_group = dict(sorted(new_recommendation.items(), key=lambda item: item[1], reverse=True))

        old_iterations_moives.update(dict(list(top_group.items())[:10])) #to memorize the top movies predicted in the old iterations
        #top movies
        print("Top 10 Movies:")
        for movie, rat in dict(list(top_group.items())[:10]).items():
            movie_title = movies[movies['movieId'] == movie]['title'].values[0]
            print("Movie:", movie_title, "--> Predicted Rating:", rat)
        #compute all users satisfaction
        users_sud = compute_users_satisfaction(users, top_group)
        for i in range(len(users_sud)):
            users_satisfaction_iterations[i].append(users_sud[i])
        #print(users_satisfaction_iterations)
        users_satisfaction_overall = compute_users_overall_satisfaction(users_satisfaction_iterations, iter)
        #print(users_satisfaction_overall)
        group_overall_satisfaction = sum(users_satisfaction_overall[user] for user in range(len(users)))/len(users)
        group_overall_disageement = (max(users_satisfaction_overall.values())) - (min(users_satisfaction_overall.values()))
        print("\nGroup overall sadisfaction:", group_overall_satisfaction)
        print("Group overall disageement: ", group_overall_disageement)
        print(users_sud)

users = [1, 3, 480]
iteration = 3
seq_aggregation(users, iteration)


Iteration:  0
Top 10 Movies:
Movie: Hush (2016) --> Predicted Rating: 4.971062119440762
Movie: Opera (1987) --> Predicted Rating: 4.91882532435635
Movie: The Cloverfield Paradox (2018) --> Predicted Rating: 4.887814582824376
Movie: Ivan's Childhood (a.k.a. My Name is Ivan) (Ivanovo detstvo) (1962) --> Predicted Rating: 4.882981421284085
Movie: Unbelievable Adventures of Italians in Russia (1974) --> Predicted Rating: 4.609132121155715
Movie: The Great Train Robbery (1903) --> Predicted Rating: 4.56979111350653
Movie: Dragon Ball Z: The History of Trunks (Doragon bôru Z: Zetsubô e no hankô!! Nokosareta chô senshi - Gohan to Torankusu) (1993) --> Predicted Rating: 4.457192915928266
Movie: Skin I Live In, The (La piel que habito) (2011) --> Predicted Rating: 4.457061709642226
Movie: Peeping Tom (1960) --> Predicted Rating: 4.4350452060414165
Movie: To Be or Not to Be (1942) --> Predicted Rating: 4.431458151523233

Group overall sadisfaction: 1.0520011005548267
Group overall disageement: 

In [16]:
#variance of the satisfaction between the single users' satisfaction and the mean
def get_users_satisfaction_variance(users_satisfaction, users):
    
    mean_satisfaction = sum(users_satisfaction) / len(users)
    sum_absolute_differences = sum(abs(s - mean_satisfaction) for s in users_satisfaction)
    disagreement = sum_absolute_differences / len(users)

    return disagreement



#modified version that use the variance
def my_sequence_aggregation(users, iteration):
    
    top_group={} #to memorize the movies recommended in the current itration
    old_iterations_moives={} #to memorize the movies recommended in the iteeration before
    users_satisfaction_iterations = {user: [] for user in range(len(users))}
    #get the two method aggregation
    movies_avg = average_aggregation(users)
    movies_least_misery = least_misery_aggregation(users)
    for iter in range(iteration):
        print("\nIteration: ", iter)
        # the first iteration has the recommendation of the average aggregation
        if iter == 0:
            top_group = movies_avg
        else:
            #compute previous iteration sadisfction alfa
            users_sadisfaction = compute_users_satisfaction(users,  top_group)
            alfa = get_users_satisfaction_variance(users_sadisfaction, users)
            new_recommendation = {}
            for movie, avg_rating in movies_avg.items():
                if movie not in old_iterations_moives: #to consider different movies for the new iteration
                    if movie in movies_least_misery:
                        min_rating = movies_least_misery[movie]
                        new_recommendation[movie] = avg_rating * (1 - alfa) + min_rating * alfa
            top_group = dict(sorted(new_recommendation.items(), key=lambda item: item[1], reverse=True))

        old_iterations_moives.update(dict(list(top_group.items())[:10])) #to memorize the movies predicted in the old iteration
        #top movies
        print("Top 10 Movies:")
        for movie, rat in dict(list(top_group.items())[:10]).items():
            movie_title = movies[movies['movieId'] == movie]['title'].values[0]
            print("Movie:", movie_title, "--> Predicted Rating:", rat)
        
        users_sut = compute_users_satisfaction(users,  dict(list(top_group.items())))
        for i in range(len(users_sut)):
            users_satisfaction_iterations[i].append(users_sut[i])
        #print(users_satisfaction_iterations)
        users_satisfaction_overall = compute_users_overall_satisfaction(users_satisfaction_iterations, iter)
        #print(users_satisfaction_overall)
        group_overall_satisfaction = sum(users_satisfaction_overall[user] for user in range(len(users)))/len(users)
        group_overall_disageement = (max(users_satisfaction_overall.values())) - (min(users_satisfaction_overall.values()))
        print("\nGroup overall satisfaction:", group_overall_satisfaction)
        print("Group overall disageement: ", group_overall_disageement)
        print(users_sut)

users = [1, 3, 480]
iteration = 3
my_sequence_aggregation(users, iteration)


Iteration:  0
Top 10 Movies:
Movie: Hush (2016) --> Predicted Rating: 4.971062119440762
Movie: Opera (1987) --> Predicted Rating: 4.91882532435635
Movie: The Cloverfield Paradox (2018) --> Predicted Rating: 4.887814582824376
Movie: Ivan's Childhood (a.k.a. My Name is Ivan) (Ivanovo detstvo) (1962) --> Predicted Rating: 4.882981421284085
Movie: Unbelievable Adventures of Italians in Russia (1974) --> Predicted Rating: 4.609132121155715
Movie: The Great Train Robbery (1903) --> Predicted Rating: 4.56979111350653
Movie: Dragon Ball Z: The History of Trunks (Doragon bôru Z: Zetsubô e no hankô!! Nokosareta chô senshi - Gohan to Torankusu) (1993) --> Predicted Rating: 4.457192915928266
Movie: Skin I Live In, The (La piel que habito) (2011) --> Predicted Rating: 4.457061709642226
Movie: Peeping Tom (1960) --> Predicted Rating: 4.4350452060414165
Movie: To Be or Not to Be (1942) --> Predicted Rating: 4.431458151523233

Group overall satisfaction: 1.0520011005548267
Group overall disageement: 

In [23]:
#variance of the satisfaction between the single users' satisfaction and the mean
def get_users_satisfaction_variance(users_satisfaction, users):
    
    mean_satisfaction = sum(users_satisfaction) / len(users)
    sum_absolute_differences = sum(abs(s - mean_satisfaction) for s in users_satisfaction)
    disagreement = sum_absolute_differences / len(users)

    return disagreement

#another version with the weighted aggregation and the least misery
def my_seq_aggregation(users, iteration):
    
    top_group={}
    old_iterations_moives={}
    users_satisfaction_iterations = {user: [] for user in range(len(users))}
    #get the two method aggregation
    movies_weight = weighted_aggregation(users)
    movies_least_misery = least_misery_aggregation(users)
    for iter in range(iteration):
        print("\nIteration: ", iter)
        if iter == 0: # the first iteration use only the weighted method and memorize the rating such that it's bigger if the difference is smaller
            top_group = {movie: rating/diff for movie, (rating, diff) in movies_weight.items() if diff != 0}
            top_group = dict(sorted(top_group.items(), key=lambda item: item[1], reverse=True))
        else:
            new_recommendation = {}
            #compute previous iteration sadisfction alfa
            users_sadisfaction = compute_users_satisfaction(users,  top_group)
            alfa = get_users_satisfaction_variance(users_sadisfaction, users)
            for movie, (avg_rating,diff) in movies_weight.items():
                if movie not in old_iterations_moives: #to consider different movies for the new iteration
                    if movie in movies_least_misery:
                        min_rating = movies_least_misery[movie]
                        new_recommendation[movie] = (avg_rating/diff) * (1 - alfa) + min_rating * alfa
            top_group = dict(sorted(new_recommendation.items(), key=lambda item: item[1], reverse=True))

        old_iterations_moives.update(dict(list(top_group.items())[:10])) #to memorize the movies predicted in the old iteration
        #top movies
        print("Top 10 Movies:")
        for movie, rat in dict(list(top_group.items())[:10]).items():
            movie_title = movies[movies['movieId'] == movie]['title'].values[0]
            print("Movie:", movie_title, "--> Predicted Rating:", rat)
        
        users_sat = compute_users_satisfaction(users,  dict(list(top_group.items())))
        print(users_sat)
        for i in range(len(users_sat)):
            users_satisfaction_iterations[i].append(users_sat[i])
        #print(users_sadisfaction_iterations)
        users_satisfaction_overall = compute_users_overall_satisfaction(users_satisfaction_iterations, iter)
        #print(users_sadisfaction_overall)
        group_overall_satisfaction = sum(users_satisfaction_overall[user] for user in range(len(users)))/len(users)
        group_overall_disageement = (max(users_satisfaction_overall.values())) - (min(users_satisfaction_overall.values()))
        print("\nGroup overall satisfaction:", group_overall_satisfaction)
        print("Group overall disageement: ", group_overall_disageement)

users = [1, 3, 480]
iteration = 3
my_seq_aggregation(users, iteration)


Iteration:  0
Top 10 Movies:
Movie: Joy Ride (2001) --> Predicted Rating: 16.217859074920057
Movie: I Sell the Dead (2008) --> Predicted Rating: 15.471804516911861
Movie: Zombeavers (2014) --> Predicted Rating: 15.471804516911861
Movie: Tideland (2005) --> Predicted Rating: 15.045820175246268
Movie: Hostel: Part II (2007) --> Predicted Rating: 14.869768421522643
Movie: Shakes the Clown (1992) --> Predicted Rating: 10.556287271785184
Movie: Furious 7 (2015) --> Predicted Rating: 10.064523455963615
Movie: Forget Paris (1995) --> Predicted Rating: 10.021802629894054
Movie: Innkeepers, The (2011) --> Predicted Rating: 8.892888506682054
Movie: Project A 2 ('A' gai wak juk jap) (1987) --> Predicted Rating: 8.892888506682054
[0.4656324386484439, 0.8093297738764336, 0.6258482795538067]

Group overall satisfaction: 0.6336034973595613
Group overall disageement:  0.3436973352279897

Iteration:  1
Top 10 Movies:
Movie: Hachiko: A Dog's Story (a.k.a. Hachi: A Dog's Tale) (2009) --> Predicted Ratin