# Lab 5 - Group Recommenders

## Lab Setup

 * Download and extract: https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
   * Read more: https://grouplens.org/datasets/movielens/
 * [optional] Create virtual enviroment
 `python3 -m venv ./recsyslab5`
 * install needed libraries:
 `pip install numpy pandas matplotlib`

## Part 1 - Data preparation and preprocessing

In [9]:
# import needed packages

import math
import numpy as np
import pandas

from random import choice, sample
from statistics import mean, stdev

from reco_utils import *

In [10]:
# read user ratings and calculate expected movie ratings with collaborative filtering

raw_ratings = pandas.read_csv('ml-latest-small/ratings.csv').drop(columns=['timestamp'])
movies = list(raw_ratings['movieId'].unique())
users = list(raw_ratings['userId'].unique())
ratings = get_predicted_ratings(raw_ratings)
ratings

Total error: 214170.09780038404
Total error: 207428.14573806542
Total error: 201160.6803795276
Total error: 195319.18872642025
Total error: 189861.42092832897
Total error: 184750.42983826736
Total error: 179953.78135130176
Total error: 175442.90076708063
Total error: 171192.52839635217
Total error: 167180.26358170254
Total error: 163386.18078545926
Total error: 159792.50480770977
Total error: 156383.33481535714
Total error: 153144.40889060387
Total error: 150062.90239044942
Total error: 147127.25465478934
Total error: 144327.01958862352
Total error: 141652.7364326853
Total error: 139095.81767081318
Total error: 136648.45153515888
Total error: 134303.51698748642
Total error: 132054.5093960615
Total error: 129895.47540822372
Total error: 127820.9557505594
Total error: 125825.93488100792
Total error: 123905.79657758829
Total error: 122056.28468262003
Total error: 120273.46833399155
Total error: 118553.71110998398
Total error: 116893.64359442676
Total error: 115290.13893702856
Total error:

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
1,5,7,7,5,5,5,6,5,8,6,...,8,8,4,8,6,5,3,6,4,7
2,9,10,6,6,10,8,5,10,2,10,...,0,0,9,8,5,10,10,10,3,6
3,4,4,4,5,3,7,10,10,6,1,...,10,9,0,10,3,7,3,0,3,10
4,6,7,6,5,7,1,6,4,9,6,...,3,10,2,6,8,3,6,5,6,10
5,4,6,10,2,4,2,0,10,8,10,...,1,8,4,10,3,10,9,10,10,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,6,6,6,6,6,6,6,6,6,6,...,6,6,6,6,6,6,6,6,6,6
607,7,7,5,5,9,2,9,3,7,7,...,5,6,9,6,6,7,5,6,8,6
608,6,6,6,6,6,6,6,5,6,5,...,7,6,6,6,6,5,6,5,5,6
609,8,9,0,2,8,10,10,5,6,5,...,7,10,1,6,0,0,0,7,10,9


In [36]:
ratings.loc[groups[0]]

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
111,6,6,5,5,6,6,6,6,5,5,...,6,6,7,6,7,5,7,6,7,5
307,6,6,6,5,6,6,6,5,6,6,...,6,6,6,6,6,6,6,6,7,6
474,6,6,6,6,6,6,6,6,6,6,...,6,6,6,6,6,6,6,6,6,6
599,6,6,6,6,6,6,6,6,6,6,...,6,6,6,6,6,6,6,7,7,6
414,6,6,6,6,6,6,6,6,6,6,...,6,6,6,6,6,6,6,6,6,6


In [23]:
# read movie categories

movies_metadata = pandas.read_csv('ml-latest-small/movies.csv').set_index('movieId')
movies_metadata

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy
...,...,...
193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
193585,Flint (2017),Drama
193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [19]:
df = pandas.read_csv('groups.csv')
df.shape

(8, 5)

In [38]:
# read example user groups
groups = pandas.read_csv('groups.csv',header=None).values.tolist()
groups

[[606, 274, 474, 599, 448],
 [111, 307, 474, 599, 414],
 [469, 182, 232, 448, 600],
 [508, 581, 497, 402, 566],
 [300, 515, 245, 568, 507],
 [2, 371, 252, 518, 37],
 [269, 360, 469, 287, 308],
 [243, 527, 418, 118, 370],
 [186, 559, 327, 553, 314]]

In [24]:
# helper function

def describe_group(group, N=10):
    print(f'\n\nUser ids: {group}')
    group_size = len(group)
    
    mean_stdev = ratings.loc[group].std(axis=0).mean()
    median_stdev = ratings.loc[group].std(axis=0).median()
    std_stdev = ratings.loc[group].std(axis=0).std()
    print(f'\nMean ratings deviation: {mean_stdev}')
    print(f'Median ratings deviation: {median_stdev}')
    print(f'Standard deviation of ratings deviation: {std_stdev}')
    
    average_scores = ratings.iloc[group].mean(axis=0)
    average_scores = average_scores.sort_values()
    best_movies = [(movies_metadata['title'][movie_id], average_scores[movie_id]) for movie_id in list(average_scores[-N:].index)]
    worst_movies = [(movies_metadata['title'][movie_id], average_scores[movie_id]) for movie_id in list(average_scores[:N].index)]
    
    print('\nBest movies:')
    for movie, score in best_movies[::-1]:
        print(f'{movie}, {score}*')
    print('\nWorst movies:')
    for movie, score in worst_movies:
        print(f'{movie}, {score}*')

describe_group(groups[7])



User ids: [186, 559, 327, 553, 314]

Mean ratings deviation: 3.111895921101595
Median ratings deviation: 3.1304951684997055
Standard deviation of ratings deviation: 0.9164014063709972

Best movies:
Banger Sisters, The (2002), 9.6*
Nobody Knows (Dare mo shiranai) (2004), 9.6*
A Street Cat Named Bob (2016), 9.6*
Fifty Shades of Grey (2015), 9.6*
Adventures of Huck Finn, The (1993), 9.6*
War of the Worlds (2005), 9.6*
I Want to Live! (1958), 9.6*
Spring (2015), 9.4*
I'll Be Home For Christmas (1998), 9.4*
Legend of Rita, The (Stille nach dem Schuß, Die) (1999), 9.4*

Worst movies:
Moby Dick (1956), 1.2*
Ender's Game (2013), 1.2*
Miller's Crossing (1990), 1.2*
Cat People (1982), 1.2*
September (1987), 1.4*
Endgame (2009), 1.4*
Highway 61 (1991), 1.4*
Zathura (2005), 1.4*
Big Year, The (2011), 1.4*
Tinker, Tailor, Soldier, Spy (1979), 1.4*


## Part 2 - simple algorithms

In [None]:
# define an interface for all recommending algorithms

class Recommender:
    def recommend(self, movies, ratings, group, size):
        pass

# random algorithm - baseline and comparison
    
class RandomRecommender(Recommender):
    def __init__(self):
        self.name = 'random'
        
    def recommend(self, movies, ratings, group, size):
        return sample(movies, size)

In [None]:
# recommend movies with the highest average rating

class AverageRecommender(Recommender):
    def __init__(self):
        self.name = 'average'
    
    def recommend(self, movies, ratings, group, size):
        movie_idx = ratings.loc[group,].mean().sort_values(ascending=False)[:size].index
        return movies.loc[movie_idx]
        
        

In [61]:
movie_idx = ratings.loc[groups[0],].mean().sort_values(ascending=False)[:5].index
movies_metadata.loc[movie_idx]

Unnamed: 0,title,genres
42018,Mrs. Henderson Presents (2005),Comedy|Drama
77266,Disgrace (2008),Drama
141820,Old Men: Robbers (1971),Comedy
26498,Boy Meets Girl (1984),Drama
8875,"Come Back, Little Sheba (1952)",Drama


In [44]:
# highest average rating but without movies with at least one score below the threshold

class AverageWithoutMiseryRecommender(Recommender):
    def __init__(self, score_threshold):
        self.name = 'average_without_misery'
        self.score_threshold = score_threshold
        
    def recommend(self, movies, ratings, group, size):
        raise NotImplementedError()

NameError: name 'Recommender' is not defined

In [None]:
# preferences of only one user in every iteration

class FairnessRecommender(Recommender):
    def __init__(self):
        self.name = 'fairness'
        
    def recommend(self, movies, ratings, group, size):
        raise NotImplementedError()

In [None]:
# election algorithms (dictatorship, Bord, Copeland, simple vote)

class VotingRecommender(Recommender):
    def __init__(self):
        self.name = # name of the algorithm
    
    def recommend(self, movies, ratings, group, size):
        raise NotImplementedError()

In [None]:
# greedy algorithm, approximate Proportional Approval Voting
#in every iteration chose the movie that improves the PAV score

class ProportionalApprovalVotingRecommender(Recommender):
    def __init__(self, threshold):
        self.threshold = threshold
        self.name = 'PAV'
        
    def recommend(self, movies, ratings, group, size):
        raise NotImplementedError()

## Part 3 - Objective function

In [None]:
# 2 helper functions
# find user's favourite movies
# calculate the sum of ratings given by a user for all movies in a give recommendation

def top_n_movies_for_user(ratings, movies, user_id, n):
    raise NotImplementedError()

def total_score(recommendation, user_id, ratings):
    raise NotImplementedError()

In [None]:
# function to calculate the satisfaction score for each user
# fraction - satisfaction from generated recommendation/satisfaction from ideal recommendation
def overall_user_satisfaction(recommendation, user_id, movies, ratings):
    raise NotImplementedError()

# objective function - average satisfaction for all users in a group
def overall_group_satisfaction(recommendation, group, movies, ratings):
    raise NotImplementedError()

# objective function - difference between the highest and lowest satisfaction score
def group_disagreement(recommendation, group, movies, ratings):
    raise NotImplementedError()

## Part 4 - Sequential Hybrid Aggregation

In [None]:
# Algorithm to balance between highest scored elements and highest minimal score
# in each iteration calcuate the alpha parameter
class SequentialHybridAggregationRecommender(Recommender):
     def __init__(self):
        self.name = 'sequential_hybrid_aggregation'
    
    def recommend(self, movies, ratings, group, size):
        raise NotImplementedError()

## Part 5 - Algorithm comparison

In [None]:
recommenders = [
    RandomRecommender(),
    AverageRecommender(),
    AverageWithoutMiseryRecommender(5),
    FairnessRecommender(),
    VotingRecommender(),
    ProportionalApprovalVotingRecommender(5),
    SequentialHybridAggregationRecommender()
]

recommendation_size = 10

# for each algorithm:
#  - generate one recommendation for each group
#  - calculatevalue of both objective functions for each recommendation
#  - calculate mean and standard deviation for both objective functions
#  - print the results


for recommender in recommenders:
    raise NotImplementedError()