In [64]:
import pandas as pd
from clayrs import evaluation as eva
import os

def getFileInfo(filename):
    file_info = {'fields' : '', 'representation': '', 'algorithm': '', 'candidate': '', 'cutoff': 0}
    
    filename = filename.split(' - ')
    filename[2] = filename[2].split(' (')
    filename[2][1] = filename[2][1].replace(').csv','')
    filename[2][1] = filename[2][1].split('@')
    
    # es. 'description - Doc2Vec - Centroid Vector (All Items@10).csv' dopo gli split e replace diventa:
    # ['description', 'Doc2Vec', ['Centroid Vector', ['All Items', '10']]]
    
    file_info['fields'] = filename[0]
    file_info['representation'] = filename[1]
    file_info['algorithm'] = filename[2][0]
    file_info['candidate'] = filename[2][1][0] # 'All Items'
    file_info['cutoff'] = int(filename[2][1][1]) # 10
    
    return file_info

def systems_str(filename, position):
    file = getFileInfo(filename)
    string = f'System {str(position)}: {file["algorithm"]}'
    return string

In [65]:
path = 'D:/Repository/RecSys-Algorithms-Evaluation/'
dir = f'{path}/Eval Results - 1M/USER/'

frames = []
systems = []

algorithms = ['Centroid Vector', 'Logistic Regression', 'Random Forest', 'SVC']

representations_list =  [
            'SK-TFIDF',
            'Word2Vec', 'Doc2Vec',
            'GensimLDA','GensimRandomIndexing', 'GensimFastText', 'GensimLSA',
            'Word2Doc-GloVe','Sentence2Doc-Sbert'
]

fields = [
    'description',
    'genres',
    'tags',
    'reviews',
    'description,genres,tags',
    'description,genres,reviews',
    'description,tags,reviews',
    'genres,tags,reviews',
    'description,genres,tags,reviews'
]

for field in fields:
    for representation in representations_list:
        current_dir = f'{dir}{representation}/'
        for cutoff in [5,10]:
            frames = []
            systems = []
            i = 1
            for algorithm in algorithms:
                filename = f'{field} - {representation} - {algorithm} (Test Ratings@{cutoff}).csv'
                frames.append(pd.read_csv(current_dir + filename))
                systems.append(systems_str(filename, i))
                i += 1
            print(f'Evaluation of: {field} represented with {representation} and Cutoff@{cutoff}')
            for string in systems:
                print(string)
            tt = eva.Ttest().perform(frames)
            tt

ts = tt.rename(index={'(system_1, system_2)': 'Centroid - Logistic'})
ts

Evaluation of: description represented with SK-TFIDF and Cutoff@5
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description represented with SK-TFIDF and Cutoff@10
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description represented with Word2Vec and Cutoff@5
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description represented with Word2Vec and Cutoff@10
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description represented with Doc2Vec and Cutoff@5
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description represented with Doc2Vec and Cutoff@10
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description represented 

Evaluation of: reviews represented with SK-TFIDF and Cutoff@5
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: reviews represented with SK-TFIDF and Cutoff@10
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: reviews represented with Word2Vec and Cutoff@5
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: reviews represented with Word2Vec and Cutoff@10
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: reviews represented with Doc2Vec and Cutoff@5
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: reviews represented with Doc2Vec and Cutoff@10
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: reviews represented with GensimLDA and Cutoff@5


Evaluation of: description,genres,reviews represented with Word2Doc-GloVe and Cutoff@5
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description,genres,reviews represented with Word2Doc-GloVe and Cutoff@10
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description,genres,reviews represented with Sentence2Doc-Sbert and Cutoff@5
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description,genres,reviews represented with Sentence2Doc-Sbert and Cutoff@10
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description,tags,reviews represented with SK-TFIDF and Cutoff@5
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description,tags,reviews represented with SK-TFIDF and Cutoff@10
System 1: Cen

Evaluation of: description,genres,tags,reviews represented with GensimLDA and Cutoff@10
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description,genres,tags,reviews represented with GensimRandomIndexing and Cutoff@5
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description,genres,tags,reviews represented with GensimRandomIndexing and Cutoff@10
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description,genres,tags,reviews represented with GensimFastText and Cutoff@5
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description,genres,tags,reviews represented with GensimFastText and Cutoff@10
System 1: Centroid Vector
System 2: Logistic Regression
System 3: Random Forest
System 4: SVC
Evaluation of: description,genres,tags,reviews represented w

Unnamed: 0_level_0,Precision@10 - macro,Recall@10 - macro,F1@10 - macro,NDCG@10
Systems evaluated,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"(system_1, system_2)","(-5.456012367097168, 4.965509991601528e-08)","(-1.50722531255885, 0.131779076566213)","(-2.995048629799868, 0.00274957629381973)","(-2.725479021138219, 0.006430079688505388)"
"(system_1, system_3)","(-5.695096431559423, 1.2617955639434631e-08)","(-1.1815683092482425, 0.23740030569460618)","(-2.622539722592256, 0.008738668788031674)","(-2.1336293033600455, 0.032893233850282116)"
"(system_1, system_4)","(-5.939401511043811, 2.9394002240445796e-09)","(-0.48701920089817874, 0.6262535650342433)","(-2.0265175784725367, 0.04273370988972657)","(5.567380615119732, 2.64117889201674e-08)"
"(system_2, system_3)","(-0.2793402099693216, 0.7799885354156881)","(0.3305577904580661, 0.7409842693277906)","(0.39129193806559803, 0.6955883620392462)","(0.6045943591684647, 0.5454599165565547)"
"(system_2, system_4)","(-0.6238090089623941, 0.5327648071792652)","(1.0372871998891207, 0.29962280222955046)","(1.0066185588430487, 0.31413828485479656)","(8.173538078594822, 3.292585671216389e-16)"
"(system_3, system_4)","(-0.34712811229971247, 0.7285011245808455)","(0.7063804887525178, 0.4799651814331525)","(0.616761575895839, 0.5374036285790063)","(7.628718902419531, 2.5487254515575635e-14)"
