# Laboratorium 6 - rekomendacje grupowe

## Przygotowanie

 * pobierz i wypakuj dataset: https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
   * więcej możesz poczytać tutaj: https://grouplens.org/datasets/movielens/
 * [opcjonalnie] Utwórz wirtualne środowisko
 `python3 -m venv ./recsyslab6`
 * zainstaluj potrzebne biblioteki:
 `pip install numpy pandas matplotlib`

In [1]:
pip install numpy pandas matplotlib

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'C:\Users\lenovo\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip' command.


## Część 1. - przygotowanie danych

In [2]:
# importujemy wszystkie potrzebne pakiety

import math
import numpy as np
import pandas

from random import choice, sample
from statistics import mean, stdev

from reco_utils import *

In [3]:
# wczytujemy oceny uytkownikow i obliczamy (za pomocą collaborative filtering) wszystkie przewidywane oceny filmow

raw_ratings = pandas.read_csv('ml-latest-small/ratings.csv').drop(columns=['timestamp'])
movies = list(raw_ratings['movieId'].unique())
users = list(raw_ratings['userId'].unique())
ratings = get_predicted_ratings(raw_ratings)
ratings

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
1,10,9,8,8,8,9,7,9,9,8,...,10,8,9,8,8,8,8,8,9,9
2,5,5,4,4,4,4,4,5,5,4,...,5,4,5,4,4,4,4,5,5,5
3,4,4,3,4,3,4,3,4,4,3,...,4,4,4,3,4,3,4,3,4,4
4,8,7,6,6,7,8,6,7,7,7,...,8,7,8,7,7,6,7,6,7,7
5,5,5,4,5,5,5,4,5,5,4,...,5,5,5,4,5,4,4,4,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,9,8,7,7,7,8,6,8,7,7,...,8,7,8,7,7,7,7,7,7,8
607,8,8,7,7,7,8,6,7,8,7,...,9,7,8,6,7,7,7,6,7,8
608,7,6,6,6,6,7,6,6,6,6,...,7,6,7,6,6,6,6,6,6,7
609,5,5,4,5,4,5,4,4,5,4,...,6,4,5,4,4,4,4,4,4,5


In [4]:
# definiujemy testowe grupy uzytkownikow, dla ktorych bedziemy generowac rekomendacje

groups_no = 50
group_size = 5
groups = [sample(users, group_size) for i in range(groups_no)]
groups

[[3, 521, 490, 194, 501],
 [217, 225, 397, 193, 516],
 [295, 341, 7, 153, 483],
 [496, 195, 78, 139, 541],
 [386, 552, 467, 195, 15],
 [487, 462, 500, 63, 25],
 [63, 48, 95, 250, 521],
 [40, 4, 259, 324, 607],
 [562, 78, 607, 491, 593],
 [460, 376, 333, 408, 23],
 [609, 323, 407, 183, 217],
 [306, 240, 224, 374, 407],
 [592, 426, 101, 336, 167],
 [572, 257, 156, 416, 574],
 [245, 575, 111, 364, 419],
 [588, 535, 199, 232, 282],
 [581, 178, 118, 342, 122],
 [402, 269, 412, 588, 388],
 [35, 72, 9, 146, 172],
 [572, 181, 83, 32, 15],
 [356, 351, 563, 38, 145],
 [23, 475, 525, 588, 450],
 [149, 22, 349, 563, 400],
 [352, 531, 307, 436, 45],
 [76, 307, 96, 67, 423],
 [270, 238, 222, 286, 319],
 [583, 568, 218, 92, 11],
 [565, 122, 359, 488, 400],
 [297, 169, 352, 475, 512],
 [67, 305, 399, 98, 560],
 [315, 306, 10, 171, 438],
 [211, 281, 474, 85, 134],
 [585, 286, 525, 539, 185],
 [579, 158, 497, 607, 332],
 [587, 288, 51, 30, 98],
 [565, 175, 171, 588, 160],
 [55, 215, 158, 182, 607],
 [17

In [None]:
movies

In [None]:
users

## Część 2. - algorytmy proste

In [5]:
# zdefiniujmy interfejs dla wszystkich algorytmow rekomendacyjnych

class Recommender:
    def recommend(self, movies, ratings, group, size):
        pass


# jako pierwszy zaimplementujemy algorytm losowy - dla porownania
    
class RandomRecommender(Recommender):
    def __init__(self):
        self.name = 'random'
        
    def recommend(self, movies, ratings, group, size):
        return sample(movies, size)

In [6]:
r = RandomRecommender()
r.recommend(movies, ratings, 4, 10)

[3459, 3962, 2000, 549, 141810, 155064, 2571, 4014, 2106, 26901]

In [7]:
ratings # ratings[1] - oceny filmu 1., ratings[1][10] -> jak 10. user ocenil 1. film

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
1,10,9,8,8,8,9,7,9,9,8,...,10,8,9,8,8,8,8,8,9,9
2,5,5,4,4,4,4,4,5,5,4,...,5,4,5,4,4,4,4,5,5,5
3,4,4,3,4,3,4,3,4,4,3,...,4,4,4,3,4,3,4,3,4,4
4,8,7,6,6,7,8,6,7,7,7,...,8,7,8,7,7,6,7,6,7,7
5,5,5,4,5,5,5,4,5,5,4,...,5,5,5,4,5,4,4,4,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,9,8,7,7,7,8,6,8,7,7,...,8,7,8,7,7,7,7,7,7,8
607,8,8,7,7,7,8,6,7,8,7,...,9,7,8,6,7,7,7,6,7,8
608,7,6,6,6,6,7,6,6,6,6,...,7,6,7,6,6,6,6,6,6,7
609,5,5,4,5,4,5,4,4,5,4,...,6,4,5,4,4,4,4,4,4,5


In [15]:
print(len(ratings[1]))

610


In [8]:
print(len(users))

610


In [9]:
print(len(movies))

9724


In [26]:
ratings[2]

1      9
2      5
3      4
4      7
5      5
      ..
606    8
607    8
608    6
609    5
610    8
Name: 2, Length: 610, dtype: int32

In [10]:
# algorytm rekomendujacy filmy o najwyzszej sredniej ocen

class AverageRecommender(Recommender):
    def __init__(self):
        self.name = 'average'
    
    def recommend(self, movies, ratings, group, size):
        average_ratings = {}
        
        for movie in movies:
            for user in groups[group]:
                if movie in average_ratings:
                    average_ratings[movie] += ratings[movie][user]
                else:
                    average_ratings[movie] = ratings[movie][user]
            
        
        for key, value in average_ratings.items():
            average_ratings[key] /= len(groups[group])
            
        recommended_movies = []
        
        sorted_averages = sorted(average_ratings.items(), key=lambda pair: pair[1], reverse=True)
        for item in sorted_averages[0:size]:
            recommended_movies.append(item[0])
        
        return recommended_movies
            

In [11]:
av_r = AverageRecommender()
av_r.recommend(movies, ratings, 4, 10)

[318, 4021, 38, 52694, 3682, 95145, 110, 296, 356, 527]

In [12]:
# algorytm rekomendujacy filmy o najwyzszej sredniej ocen,
#   ale rownoczesnie wykluczajacy te filmy, ktore otrzymaly choc jedna ocene ponizej thresholdu

class AverageWithoutMiseryRecommender(Recommender):
    def __init__(self, score_threshold):
        self.name = 'average_without_misery'
        self.score_threshold = score_threshold
        
    def recommend(self, movies, ratings, group, size):
        
        average_ratings = {}
        bad_movies = []
        
        for movie in movies:
            for user in groups[group]:
                if ratings[movie][user] < self.score_threshold:
                    bad_movies.append(movie)
                
                if movie in average_ratings:
                    average_ratings[movie] += ratings[movie][user]
                else:
                    average_ratings[movie] = ratings[movie][user]       
        
        for key in average_ratings.keys():
            average_ratings[key] /= len(groups[group])
            
        averages_good_movies = {}
        for key, value in average_ratings.items():
            if key not in bad_movies:
                averages_good_movies[key] = value
            
        recommended_movies = []
        
        sorted_averages = sorted(averages_good_movies.items(), key=lambda pair: pair[1], reverse=True)
        for item in sorted_averages[0:size]:
            recommended_movies.append(item[0])
        
        return recommended_movies
            
        

In [30]:
t = 5
av_r = AverageWithoutMiseryRecommender(t)
av_r.recommend(movies, ratings, 4, 10)

[318, 4021, 38, 52694, 3682, 95145, 110, 296, 356, 527]

In [81]:
# algorytm uwzgledniajacy preferencje tylko jednego uzytkownika

class DictatorshipRecommender(Recommender):
    def __init__(self, dictator_id):
        self.name = 'dictatorship'
        self.dictator_id = dictator_id
        
    def recommend(self, movies, ratings, group, size):
        
        user_ratings = {}
        for movie in movies:
            try:
                user_ratings[movie] = ratings[movie][dictator_id]
            except:
                pass
         
        recommended_movies = []
        sorted_averages = sorted(user_ratings.items(), key=lambda pair: pair[1], reverse=True)
        for item in sorted_averages[0:size]:
            recommended_movies.append(item[0])
        
        return recommended_movies

In [82]:
dictator_id = 30
dictator_r = DictatorshipRecommender(dictator_id)
dictator_r.recommend(movies, ratings, 4, 10)

[593, 318, 95145, 50, 110, 260, 296, 356, 457, 527]

In [88]:
# algorytm, ktory w kazdej turze uwzglednia preferencje tylko jednego, kolejnego uzytkownika

class FairnessRecommender(Recommender):
    def __init__(self):
        self.name = 'fairness'
        self.dictator_id = 1
        
    def recommend(self, movies, ratings, group, size):
        
        user_ratings = {}
        for movie in movies:
            try:
                user_ratings[movie] = ratings[movie][dictator_id]
            except:
                pass
              
        recommended_movies = []
        sorted_averages = sorted(user_ratings.items(), key=lambda pair: pair[1], reverse=True)
        for item in sorted_averages[0:size]:
            recommended_movies.append(item[0])
        
        self.dictator_id += 1
        if self.dictator_id > len(users):
            self.dictator_id = 1
        
        return recommended_movies

In [89]:
fairness_dictator_id = 30
fairness_dictator_id = FairnessRecommender()
fairness_dictator_id.recommend(movies, ratings, 4, 10)

[593, 318, 95145, 50, 110, 260, 296, 356, 457, 527]

In [79]:
# algorytm zachlanny, aproksymujacy metode Proportional Approval Voting
#   w kazdej iteracji wybieramy ten film, ktory najbardziej zwieksza zadowolenie zgodnie z punktacja PAV

class PAVRecommender(Recommender):
    def __init__(self, threshold):
        self.threshold = threshold
        self.name = 'PAV'
        
    def recommend(self, movies, ratings, group, size):
        
        # dla kazdego usera trzymamy n
        user_n = {}
        for user in groups[group]:
            user_n[user] = 1
            
        best_movie = 0
        best_score = 0
        current_score = 0
        
        recommended_movies = []
        
        for iteration in range(size):
            # dla kazdego filmu idziemy po userach w grupie i zgarniamy punkty
            for movie in movies:
                
                if movie in recommended_movies:
                    continue
                
                for user in groups[group]:
                    if ratings[movie][user] < self.threshold:
                        pass
                    else:
                        current_score += 1 / user_n[user]
                        user_n[user] += 1
                
                if current_score > best_score:
                    best_score = current_score
                    best_movie = movie
                    
            recommended_movies.append(best_movie)
                    
        return recommended_movies
        
        

In [80]:
t = 5
pav_r = PAVRecommender(t)
pav_r.recommend(movies, ratings, 4, 10)

[163981,
 163937,
 160836,
 160527,
 160341,
 158721,
 152372,
 149011,
 148166,
 147662]

## Część 3. - funkcje celu

In [100]:
# dwie funkcje pomocnicze:
#  - znajdujaca ulubione filmy danego uzytkownika
#  - obliczajaca sume ocen wystawionych przez uzytkownika wszystkim filmom w rekomendacji

def top_n_movies_for_user(ratings, movies, user_id, n):
    
    user_ratings = {}
    for movie in movies:
        try:
            user_ratings[movie] = ratings[movie][user_id]
        except:
            pass
    
    top_n = sorted(user_ratings.items(), key=lambda pair: pair[1], reverse=True)[0:n]
    top_n_movies = []
    for item in top_n:
        top_n_movies.append(item[0])
    return top_n_movies
        
def total_score(recommendation, user_id, ratings):
    total = 0
    for movie in recommendation:
        try:
            total += ratings[movie][user_id]
        except:
            pass
    return total

In [98]:
top_n_movies_for_user(ratings, movies, 30, 17)

[593,
 318,
 95145,
 50,
 110,
 260,
 296,
 356,
 457,
 527,
 608,
 648,
 733,
 1090,
 1136,
 1210,
 1291]

In [101]:
recommendation = [163981,
 163937,
 160836,
 160527,
 160341,
 158721,
 152372,
 149011,
 148166,
 147662]

total_score(recommendation, 10, ratings)

65

In [None]:
# funkcja obliczajaca zadowolenie pojedynczego uzytkownika
#  - iloraz zadowolenia z wygenerowanej rekomendacji oraz zadowolenia z hipotetycznej rekomendacji idealnej
def overall_user_satisfaction(recommendation, user_id, movies, ratings):
    # ...

# funkcja celu - srednia z zadowolenia wszystkich uzytkownikow w grupie
def overall_group_satisfaction(recommendation, group, movies, ratings):
    return 1.0 * sum([overall_user_satisfaction(recommendation, user_id, movies, ratings) for user_id in group]) / len(group)

# funkcja celu - roznica miedzy maksymalnym i minimalnym zadowolenie w grupie
def group_dissatisfaction(recommendation, group, movies, ratings):
    satisfaction_scores = [overall_user_satisfaction(recommendation, user_id, movies, ratings) for user_id in group]
    return max(satisfaction_scores) - min(satisfaction_scores)

## Część 4. - Sequential Hybrid Aggregation

In [None]:
# algorytm balansujacy pomiedzy wyborem elementow o najwyzszej sredniej ocen
#   i o najwyzszej minimalnej ocenie
#   wyliczajacy w kazdej iteracji parametr alfa - jak na wykladzie
class SequentialHybridAggregationRecommender(Recommender):
     def __init__(self, dictator_id):
        self.name = 'sequential_hybrid_aggregation'
    
    def recommend(self, movies, ratings, group, size):
        pass

## Część 5. - porównanie algorytmów

In [None]:
recommenders = [
    RandomRecommender(),
    AverageRecommender(),
    AverageWithoutMiseryRecommender(2),
    DictatorshipRecommender(1),
    FairnessRecommender(),
    PAVRecommender(),
    SequentialHybridAggregationRecommender()
]

recommendation_size = 10

# dla kazdego algorytmu:
#  - wygenerujmy jedna rekomendacje dla kazdej grupy
#  - obliczmy wartosci funkcji celu dla kazdej rekomendacji
#  - obliczmy srednia i odchylenie standardowe wartosci funkcji celu
#  - wypiszmy wyniki na konsole

for recommender in recommenders:
    # ...