In [1]:
import pandas as pd

In [2]:
meta_dataset = pd.read_csv("./performance_meta_dataset.csv", index_col=0)

In [3]:
list(meta_dataset.columns)

['alg_name',
 'dataset_name',
 'max_test_metric_ARHR_ALL_HITS_cut_1',
 'max_test_metric_ARHR_ALL_HITS_cut_10',
 'max_test_metric_ARHR_ALL_HITS_cut_15',
 'max_test_metric_ARHR_ALL_HITS_cut_2',
 'max_test_metric_ARHR_ALL_HITS_cut_20',
 'max_test_metric_ARHR_ALL_HITS_cut_3',
 'max_test_metric_ARHR_ALL_HITS_cut_30',
 'max_test_metric_ARHR_ALL_HITS_cut_4',
 'max_test_metric_ARHR_ALL_HITS_cut_40',
 'max_test_metric_ARHR_ALL_HITS_cut_5',
 'max_test_metric_ARHR_ALL_HITS_cut_50',
 'max_test_metric_ARHR_ALL_HITS_cut_6',
 'max_test_metric_ARHR_ALL_HITS_cut_7',
 'max_test_metric_ARHR_ALL_HITS_cut_8',
 'max_test_metric_ARHR_ALL_HITS_cut_9',
 'max_test_metric_AVERAGE_POPULARITY_cut_1',
 'max_test_metric_AVERAGE_POPULARITY_cut_10',
 'max_test_metric_AVERAGE_POPULARITY_cut_15',
 'max_test_metric_AVERAGE_POPULARITY_cut_2',
 'max_test_metric_AVERAGE_POPULARITY_cut_20',
 'max_test_metric_AVERAGE_POPULARITY_cut_3',
 'max_test_metric_AVERAGE_POPULARITY_cut_30',
 'max_test_metric_AVERAGE_POPULARITY_cut_4',


In [4]:
def rank_algorithms(test_datasets, metric_name):
    filtered_dataset = meta_dataset[~meta_dataset['dataset_name'].isin(test_datasets)]
    
    all_ranks = []
    for dataset_name, dataset_performance in filtered_dataset.groupby("dataset_name"):
        dataset_performance["rank"] = dataset_performance["max_test_metric_" + metric_name].rank(method='min', ascending=False)
        dataset_performance.set_index("alg_name", inplace=True)
        dataset_performance = dataset_performance[["rank"]]
        dataset_performance = dataset_performance.rename(columns={"rank": dataset_name})
        all_ranks.append(dataset_performance)
        
    ranked_algs = pd.concat(all_ranks, axis=1)
    return ranked_algs

In [5]:
def select_algs(test_datasets, metric_name, num_algs = 10):
    return list(ranked_algs.T.mean().sort_values().iloc[:num_algs].index)

In [6]:
# Assume some split
test_datasets = ["AnimeReader", "CiaoDVDReader"]

In [7]:
# Assume some metric
metric_name = "ARHR_ALL_HITS_cut_1"

In [8]:
ranked_algs = rank_algorithms(test_datasets, metric_name)
ranked_algs

Unnamed: 0_level_0,BookCrossingReader,DatingReader,EpinionsReader,FilmTrustReader,FrappeReader,GowallaReader,Jester2Reader,LastFMReader,MarketBiasAmazonReader,MarketBiasModClothReader,MovieTweetingsReader,Movielens100KReader,Movielens10MReader,Movielens1MReader,Movielens20MReader,MovielensHetrec2011Reader,NetflixPrizeReader,RecipesReader,WikilensReader
alg_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
CoClustering,21.0,20.0,23.0,24.0,24.0,14.0,20.0,23.0,22.0,23.0,21.0,23.0,22.0,25.0,21.0,23.0,,20.0,25.0
GlobalEffects,19.0,17.0,19.0,25.0,24.0,14.0,21.0,23.0,18.0,21.0,17.0,23.0,17.0,23.0,16.0,21.0,17.0,18.0,22.0
IALSRecommender,9.0,9.0,9.0,16.0,20.0,,24.0,11.0,14.0,19.0,11.0,12.0,12.0,12.0,11.0,16.0,,10.0,11.0
ItemKNNCF_asymmetric,5.0,7.0,6.0,8.0,1.0,1.0,6.0,13.0,10.0,7.0,3.0,5.0,6.0,5.0,5.0,8.0,4.0,6.0,2.0
ItemKNNCF_cosine,1.0,1.0,4.0,5.0,2.0,5.0,1.0,3.0,3.0,3.0,4.0,7.0,5.0,4.0,1.0,3.0,3.0,1.0,6.0
ItemKNNCF_dice,2.0,3.0,1.0,2.0,5.0,3.0,10.0,1.0,7.0,12.0,7.0,1.0,2.0,3.0,2.0,2.0,1.0,2.0,1.0
ItemKNNCF_euclidean,6.0,8.0,10.0,1.0,11.0,,11.0,8.0,13.0,16.0,10.0,3.0,8.0,10.0,9.0,7.0,8.0,13.0,2.0
ItemKNNCF_jaccard,3.0,4.0,3.0,4.0,5.0,4.0,8.0,5.0,7.0,12.0,9.0,2.0,7.0,1.0,3.0,5.0,2.0,2.0,2.0
ItemKNNCF_tversky,3.0,2.0,2.0,3.0,2.0,2.0,8.0,2.0,6.0,7.0,5.0,4.0,4.0,2.0,4.0,1.0,5.0,5.0,2.0
MatrixFactorization_AsySVD_Cython,20.0,,18.0,21.0,18.0,13.0,13.0,16.0,19.0,17.0,18.0,17.0,,18.0,,,,16.0,18.0


In [9]:
select_algs(test_datasets, metric_name)

['ItemKNNCF_cosine',
 'ItemKNNCF_dice',
 'ItemKNNCF_tversky',
 'ItemKNNCF_jaccard',
 'ItemKNNCF_asymmetric',
 'SLIM_BPR_Cython',
 'SLIMElasticNetRecommender',
 'P3alphaRecommender',
 'EASE_R_Recommender',
 'ItemKNNCF_euclidean']

In [10]:
ranked_algs.T.mean().sort_values().iloc[:10]

alg_name
ItemKNNCF_cosine             3.263158
ItemKNNCF_dice               3.526316
ItemKNNCF_tversky            3.631579
ItemKNNCF_jaccard            4.631579
ItemKNNCF_asymmetric         5.684211
SLIM_BPR_Cython              5.944444
SLIMElasticNetRecommender    6.933333
P3alphaRecommender           7.611111
EASE_R_Recommender           8.272727
ItemKNNCF_euclidean          8.555556
dtype: float64