# Laboratorium 6 - rekomendacje grupowe

## Przygotowanie

 * pobierz i wypakuj dataset: https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
   * więcej możesz poczytać tutaj: https://grouplens.org/datasets/movielens/
 * [opcjonalnie] Utwórz wirtualne środowisko
 `python3 -m venv ./recsyslab6`
 * zainstaluj potrzebne biblioteki:
 `pip install numpy pandas matplotlib`

## Część 1. - przygotowanie danych

In [1]:
# importujemy wszystkie potrzebne pakiety

import math
import numpy as np
import pandas
from collections import defaultdict

from random import choice, sample
from statistics import mean, stdev

from reco_utils import *

In [2]:
# wczytujemy oceny uytkownikow i obliczamy (za pomocą collaborative filtering) wszystkie przewidywane oceny filmow

raw_ratings = pandas.read_csv('ml-latest-small/ratings.csv').drop(columns=['timestamp'])
movies = list(raw_ratings['movieId'].unique())
users = list(raw_ratings['userId'].unique())
ratings = get_predicted_ratings(raw_ratings)
ratings

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
1,10,9,9,9,8,10,8,7,7,9,...,7,9,8,7,7,8,9,9,7,8
2,6,5,5,5,4,5,5,4,4,5,...,4,5,4,4,4,4,6,5,4,5
3,4,4,4,4,3,4,4,3,3,4,...,3,4,3,3,3,4,4,5,4,3
4,8,8,7,7,7,8,7,6,6,8,...,6,8,6,6,6,7,7,8,6,7
5,6,6,5,5,5,5,5,4,4,6,...,5,6,5,4,5,5,6,6,4,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,9,8,7,8,7,8,7,6,6,8,...,7,8,7,7,6,7,8,9,7,7
607,9,8,8,8,7,8,7,6,6,8,...,6,8,7,7,6,7,8,8,6,7
608,8,7,6,6,6,7,6,5,5,7,...,6,7,6,6,6,6,7,7,6,6
609,5,5,5,4,4,5,5,4,3,5,...,4,5,4,4,4,4,5,5,4,4


In [3]:
# definiujemy testowe grupy uzytkownikow, dla ktorych bedziemy generowac rekomendacje

groups_no = 50
group_size = 5
groups = [sample(users, group_size) for i in range(groups_no)]
# groups

## Część 2. - algorytmy proste

In [4]:
# zdefiniujmy interfejs dla wszystkich algorytmow rekomendacyjnych

class Recommender:
    def recommend(self, movies, ratings, group, size):
        pass


# jako pierwszy zaimplementujemy algorytm losowy - dla porownania
    
class RandomRecommender(Recommender):
    def __init__(self):
        self.name = 'random'
        
    def recommend(self, movies, ratings, group, size):
        return sample(movies, size)

In [5]:
# algorytm rekomendujacy filmy o najwyzszej sredniej ocen

class AverageRecommender(Recommender):
    def __init__(self):
        self.name = 'average'
    
    def recommend(self, movies, ratings, group, size):
        films = ratings.loc[group]
        avg = np.sum(films, axis=0)/len(group)
        return np.argsort(avg)[-size:].index.values

In [6]:
# algorytm rekomendujacy filmy o najwyzszej sredniej ocen,
#   ale rownoczesnie wykluczajacy te filmy, ktore otrzymaly choc jedna ocene ponizej thresholdu

class AverageWithoutMiseryRecommender(Recommender):
    def __init__(self, score_threshold):
        self.name = 'average_without_misery'
        self.score_threshold = score_threshold
        
    def recommend(self, movies, ratings, group, size):
        films = ratings.loc[group]
        min_rating= np.min(films, axis=0)
        accepted = np.where(min_rating[min_rating > self.score_threshold])[0]
        avg = np.sum(films, axis=0)/len(group)
        avg = avg.iloc[accepted]
        return np.argsort(avg)[-size:].index.values

In [7]:
# algorytm uwzgledniajacy preferencje tylko jednego uzytkownika

class DictatorshipRecommender(Recommender):
    def __init__(self, dictator_id):
        self.name = 'dictatorship'
        self.dictator_id = dictator_id
        
    def recommend(self, movies, ratings, group, size):
        films = ratings.loc[self.dictator_id]
        return np.argsort(films)[-size:].index.values

In [8]:
# algorytm, ktory w kazdej turze uwzglednia preferencje tylko jednego, kolejnego uzytkownika

class FairnessRecommender(Recommender):
    def __init__(self):
        self.name = 'fairness'
    
    def recommend(self, movies, ratings, group, size):
        res = []
        for i in range(size):
            uid = group[i%len(group)]
            films = ratings.loc[uid]
            res.append(np.argsort(films)[-1:].index.values[0])
        return res

In [9]:
# algorytm zachlanny, aproksymujacy metode Proportional Approval Voting
#   w kazdej iteracji wybieramy ten film, ktory najbardziej zwieksza zadowolenie zgodnie z punktacja PAV

class PAVRecommender(Recommender):
    def __init__(self, threshold):
        self.threshold = threshold
        self.name = 'PAV'
        
    def recommend(self, movies, ratings, group, size):  
        accepting = {}
        films = ratings.loc[group]
        for film_id in movies:
            group_ratings = films.loc[:, film_id]
            users = group_ratings[group_ratings > self.threshold].index.values
            accepting[film_id] = users
        user_accepted = defaultdict(int)
        
        res = []
        for _ in range(size):
            points = [sum([1/(user_accepted[user_id]+1) for user_id in accepting[film_id]]) for film_id in movies]
            chosen_id = movies[np.argmax(points)]
            res.append(chosen_id)
            for uid in accepting[chosen_id]:
                user_accepted[uid] += 1
        
        return res

## Część 3. - funkcje celu

In [10]:
# dwie funkcje pomocnicze:
#  - znajdujaca ulubione filmy danego uzytkownika
#  - obliczajaca sume ocen wystawionych przez uzytkownika wszystkim filmom w rekomendacji

def top_n_movies_for_user(ratings, movies, user_id, n):
    return np.argsort(ratings.loc[user_id])[-n:].index.values

def total_score(recommendation, user_id, ratings):
    return np.sum(ratings.loc[user_id][recommendation])

In [11]:
# funkcja obliczajaca zadowolenie pojedynczego uzytkownika
#  - iloraz zadowolenia z wygenerowanej rekomendacji oraz zadowolenia z hipotetycznej rekomendacji idealnej
def overall_user_satisfaction(recommendation, user_id, movies, ratings):
    recomendation_score = total_score(recommendation, user_id, ratings)
    best_score = total_score(top_n_movies_for_user(ratings, movies, user_id, len(recommendation)), user_id, ratings)
    return  recomendation_score / best_score

# funkcja celu - srednia z zadowolenia wszystkich uzytkownikow w grupie
def overall_group_satisfaction(recommendation, group, movies, ratings):
    return 1.0 * sum([overall_user_satisfaction(recommendation, user_id, movies, ratings) for user_id in group]) / len(group)

# funkcja celu - roznica miedzy maksymalnym i minimalnym zadowolenie w grupie
def group_dissatisfaction(recommendation, group, movies, ratings):
    satisfaction_scores = [overall_user_satisfaction(recommendation, user_id, movies, ratings) for user_id in group]
    return max(satisfaction_scores) - min(satisfaction_scores)

## Część 4. - Sequential Hybrid Aggregation

In [12]:
# algorytm balansujacy pomiedzy wyborem elementow o najwyzszej sredniej ocen
#   i o najwyzszej minimalnej ocenie
#   wyliczajacy w kazdej iteracji parametr alfa - jak na wykladzie
class SequentialHybridAggregationRecommender(Recommender):
    def __init__(self):
        self.name = 'sequential_hybrid_aggregation'
    
    def recommend(self, movies, ratings, group, size):
        films = ratings.loc[group]
        recommendation = []
        alpha = 0
        for _ in range(size):
            score = (1-alpha) * np.average(films, axis=0) + alpha * np.min(films, axis=0)
            film_id = score.index.values[np.argmax(score)]
            recommendation.append(film_id)
            films = films.drop(film_id, axis=1)
            alpha = group_dissatisfaction(recommendation, group, movies, ratings)
        return recommendation
    
SequentialHybridAggregationRecommender().recommend(movies, ratings, groups[0], 10)

[4770, 94325, 5943, 159976, 3032, 31221, 124859, 85, 296, 318]

## Część 5. - porównanie algorytmów

In [13]:
recommenders = [
    RandomRecommender(),
    AverageRecommender(),
    AverageWithoutMiseryRecommender(2),
    DictatorshipRecommender(1),
    FairnessRecommender(),
    PAVRecommender(2),
    SequentialHybridAggregationRecommender()
]

recommendation_size = 10

# dla kazdego algorytmu:
#  - wygenerujmy jedna rekomendacje dla kazdej grupy
#  - obliczmy wartosci funkcji celu dla kazdej rekomendacji
#  - obliczmy srednia i odchylenie standardowe wartosci funkcji celu
#  - wypiszmy wyniki na konsole

for loss in [overall_group_satisfaction, group_dissatisfaction]:
    print(f'Loss function: {loss.__name__}')
    losses = []
    for recommender in recommenders:
        losses = [
            loss(
                recommender.recommend(movies, ratings, group, recommendation_size),
                group, movies, ratings
            ) for group in groups]
        print(f'{recommender.name}: AVG={sum(losses)/len(losses)} STD={np.std(losses)}')
    print()

Loss function: overall_group_satisfaction
random: AVG=0.9725493926745947 STD=0.040718675569064416
average: AVG=1.0 STD=0.0
average_without_misery: AVG=0.9754267127990884 STD=0.0293929109790848
dictatorship: AVG=1.0 STD=0.0
fairness: AVG=1.001925503110905 STD=0.033247423237854505
PAV: AVG=1.2006293490869389 STD=0.02979713889985054
sequential_hybrid_aggregation: AVG=1.36670172208606 STD=0.021009183972020173

Loss function: group_dissatisfaction
random: AVG=0.06106707888186794 STD=0.02372921333298144
average: AVG=0.0 STD=0.0
average_without_misery: AVG=0.0500873583858752 STD=0.03578516586680111
dictatorship: AVG=0.0 STD=0.0
fairness: AVG=0.15228185669982525 STD=0.04941206936434229
PAV: AVG=0.16021398500888634 STD=0.05953691460108414
sequential_hybrid_aggregation: AVG=0.10768638529370818 STD=0.05095846233505483

