## Generate recomendation statistics

In [3]:
from recomender.preprocessor import RatingDataset, MovieDataset
from recomender.evaluation import EvaluationGenerator

import pandas as pd
import json
import numpy as np

### With MoviesLens dataset

In [None]:
movie_processor = MovieDataset(pd.read_csv("data/ml-latest-small/overviews.csv"))
movie_details_df = movie_processor.join_movies_details(movies_df=pd.read_csv('data/ml-latest-small/movies.csv'))
ratings_df = RatingDataset().process(pd.read_csv('data/ml-latest-small/ratings.csv'), movie_processor.missing_description_list)

print("quantity of unique users: "+str(len(set(ratings_df.userId.to_list()))))

combination_pre_process_techniques = [
    (1, (False, False, False)),
    (2, (False, False, True)),
    (3, (False, True, False)),
    (4, (False, True, True)),
    (5, (True, False, False)),
    (6, (True, False, True)),
    (7, (True, True, False)),
    (8, (True, True, True)),
]

evaluate_generator = EvaluationGenerator(item_df = movie_details_df, rating_df=ratings_df)

with open(f"{evaluate_generator.export_folder}/labels.json", "w", encoding="utf-8") as labels_file:
    labels_file.write(json.dumps({
    "recomendations_1": 'nenhuma técnica',
    "recomendations_2": 'stemm',
    "recomendations_3": 'lemma',
    "recomendations_4": 'stopword',
    "recomendations_5": 'stemm + lemma',
    "recomendations_6": 'stopword + stemm',
    "recomendations_7": 'stopword + lemma',
    "recomendations_8": 'todas as técnincas'
}))
for count, technique in combination_pre_process_techniques:
    evaluate_generator.generate( technique, count)

#### Evaluate Recomendations

In [None]:
import glob
import os
import json
from recomender.plotter import Plotter

In [None]:
def load_recomendations(result_folder:str) -> list:
    
    labels_f = open(f"{result_folder}/labels.json", "r")
    labels = json.loads(labels_f.read())
    labels_f.close()

    recomendations_uri = glob.glob(result_folder+"/*.csv")
    recomendations = {}
    for i in range(len(recomendations_uri)):
        name =  os.path.split(recomendations_uri[i])[1].replace(".csv", "")
        recomendations[name] = {
                            "label": labels[name],
                            "dataset": pd.read_csv(recomendations_uri[i])
                            }
    
    return recomendations

In [None]:
export_folder = "result/first_run"
recomendations = load_recomendations(export_folder)

In [None]:
plotter = Plotter()
plotter.plot_col(recomendations, "prc", "Average Precision", export_folder)
plotter.plot_col(recomendations, "ap", "Mean Average Precision", export_folder)
plotter.plot_col(recomendations, "rr", "Mean Reciprocal Rank", export_folder)