# Evaluator of Recommender

In [1]:
# imports
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import jaccard_score
from IPython.display import display, HTML

In [2]:
movies = pd.read_pickle('./data/movies.pkl')
ratings = pd.read_pickle('./data/ratings.pkl')

In [34]:
class Evaluator():
    def __init__(self, movies, ratings):
#         self.moviesss = movies
        self.ratings = ratings
    
    def evaluate(self, gen_movies, username=None, eval_type="jaccard", manual_gt=None, split=0.2):
        # z ratings vybrat useroviny, seradit podle data, vzit `split` procent, 
        # => vyhodit movie_id
        
        if manual_gt == None:
            sorted_user = self.ratings.loc[self.ratings.username == username].sort_values(by="date", ascending=False)
            ground_truth = sorted_user.head(round(len(sorted_user)*split)).movie_id
    #         display(HTML(sorted_user.head(round(len(sorted_user)*split)).to_html()))
        else:
            ground_truth = manual_gt

        if eval_type == "jaccard":
            return self.jaccard(ground_truth, gen_movies)

        if eval_type == "manual":
            is_good = self.manual_evaluation(ground_truth, gen_movies)
            precision = sum(is_good)/len(is_good)
            recall = sum(is_good)/len(ground_truth)
            print(f"precision: {precision}")
            print(f"recall: {recall}")
            print(f"f1-score: {2*precision*recall/(precision+recall)}")
            
            
    def jaccard(self, a, b):
        set_a, set_b = set(a), set(b)
#         print(set_a)
        print(set_a.intersection(set_b))
        l_inters = len(set_a.intersection(set_b))
        return l_inters/(len(a)+len(b)-l_inters)
        
    def manual_evaluation(self, ground_truth, gen_movies):
        print(f"the GROUND TRUTH is:\n {ground_truth}\n")
        answers = []
        for gm in gen_movies:
            ans = ""
            while ans != "y" and ans != "n" and ans != "over":
                ans = input(f"is the movie {gm} a good prediciton? (y/n/over): ")
            if ans == "over":
                break
            answers.append(ans)
        print(np.array(answers))
        return np.array(answers) == "y"
            
    
    def split_users(self):
        # serad usery podle poctu ratingu
        index = ratings["username"].value_counts().index
        self.valid_names = index.index.values[0::2]
        self.test_names = index.index.values[1::2]
        

In [35]:
evaluator = Evaluator(movies, ratings)

In [36]:
gen_movies = ['1430-projekt-a', '19952-ve-sluzbach-papeze', '391274-joy', '7389-hra-bez-pravidel', '7626-stuj-pri-mne', '29561-the-quiet-earth', '50300-instalater-z-tuchlovic', '426009-deadpool-2', '175257-hustle-a-flow', '396645-tata-je-doma', '474915-vezmes-si-me-kamo', '411706-teorie-tygra', '14284-dej-si-pohov-kamosi-2', '320638-strazci-galaxie', '9392-evropa', '381728-kickboxer-vengeance', '13128-vzpoura-na-bounty', '336286-syn-zmaru', '318271-mazel', '250727-papirovy-hrdina', '381274-kong-ostrov-lebek', '17622-na-pokraji-slavy', '317342-x-men-budouci-minulost', '127-hrac', '55410-smrtihlav', '12670-tajemstvi-meho-uspechu', '1562-karel-a-ja', '56206-svycarak', '552677-vice', '4677-pokoj-s-vyhlidkou', '59851-svatek-matek', '319631-mission-impossible-narod-grazlu', '348705-philomena', '279901-cinsky-zverokruh', '263658-maly-velky-bojovnik', '88180-divoka-planeta', '195035-tri', '23830-uplne-zatmeni', '494367-nocni-hra', '33071-bitva-o-planetu-opic', '353868-mrozi-muz', '47525-moje-tlusta-recka-svatba', '87412-superstar', '238136-chips-blazniva-hlidka', '32534-navrat-velkeho-blondyna']

evaluator.evaluate(gen_movies, "Martin741", eval_type="manual")

the GROUND TRUTH is:
 2062289           3065-mary-reilly
2474835               21306-bytost
655532           451893-muzzikanti
2089931    560981-prezident-blanik
1485507     9142-cekani-na-patrika
                    ...           
1152556           101170-vecny-zid
884127         2610-cabiriiny-noci
422422                  32350-obet
953903          1755-hory-maji-oci
2134526          2617-darmoslapove
Name: movie_id, Length: 1027, dtype: object



is the movie 1430-projekt-a a good prediciton? (y/n/over):  y
is the movie 19952-ve-sluzbach-papeze a good prediciton? (y/n/over):  y
is the movie 391274-joy a good prediciton? (y/n/over):  n
is the movie 7389-hra-bez-pravidel a good prediciton? (y/n/over):  n
is the movie 7626-stuj-pri-mne a good prediciton? (y/n/over):  n
is the movie 29561-the-quiet-earth a good prediciton? (y/n/over):  y
is the movie 50300-instalater-z-tuchlovic a good prediciton? (y/n/over):  y
is the movie 426009-deadpool-2 a good prediciton? (y/n/over):  y
is the movie 175257-hustle-a-flow a good prediciton? (y/n/over):  over


['y' 'y' 'n' 'n' 'n' 'y' 'y' 'y']
precision: 0.625
recall: 0.004868549172346641
f1-score: 0.00966183574879227


In [27]:
ratings.loc[ratings.username == "Martin741"].movie_id

138                                      230421-houbicky
344                                     10789-prvni-liga
739                                       235032-yes-man
2136                  234754-chlapec-v-pruhovanem-pyzamu
3159                                   4380-povestny-muz
                               ...                      
2794959    39648-master-commander-odvracena-strana-sveta
2795591                        197196-vzpominky-na-lasku
2796526                        220743-world-trade-center
2797172                          43239-muzska-zalezitost
2797314                             221264-zivot-jde-dal
Name: movie_id, Length: 5135, dtype: object

In [17]:
np.array(["y","y","y","y","n","n"]) == "n"

array([False, False, False, False,  True,  True])

In [110]:
ans = input("hehe: ")

hehe:  y


In [111]:
ans

'y'