In [None]:
import argparse
from collections import defaultdict
import json
import os
import pandas as pd
import numpy as np
from tqdm import tqdm

from agq.metrics import get_rank_error, get_hit_ratio, get_ndcg, get_spearman
from constants import DATA_STREAM_FOLDER, RANKING_FOLDER

In [None]:
args = argparse.Namespace(**{
    'path_to_scenario': 'XX',  #change me
    'streams_root': 'card_10000_202303010000YY',
    'aggregation_level': 'file',
    'k': -1
})

In [None]:
# load params
with open(f'{args.path_to_scenario}/params.json', 'r') as f:
    params = json.load(f)
    params.update(vars(args))
    args = argparse.Namespace(**params)

#### Specify the scenarios you want to compare, and the datasets. Examples:

In [None]:
stream_roots = ['card_1000_20230302000000'] # from your local filesystem; name depends on when run
scenario_paths = ['../synthetics/scenarios/scenario_{}'.format(i) for i in range(6)]  # indices depend on order in which they are run

In [None]:
metrics = ['rank_error', 'spearman']

In [None]:
res = {} 
for root in stream_roots:
    args.streams_root = root
    for met in metrics:
        dataframe = defaultdict(list)
        for scenario in scenario_paths:
            args.path_to_scenario = scenario
            print(args)
            for topK in [50]:
                streams_filenames = os.listdir(f'{args.data_folder}{DATA_STREAM_FOLDER}/')
                if args.streams_root != '':
                    streams_filenames = [e for e in streams_filenames if args.streams_root in e]

                streams_filenames = sorted(streams_filenames)

                scores = defaultdict(list)
                for filename in tqdm(streams_filenames):
                    stream_name = filename.split('.csv')[0]
                    truth_ranking = pd.read_csv(f'{args.data_folder}{RANKING_FOLDER}/{filename}')

                    filename_scores, counter = defaultdict(list), 0
                    while os.path.isfile(f'{args.path_to_scenario}/{stream_name}_ranking_{counter}.csv'):
                        estimated_ranking = pd.read_csv(
                            f'{args.path_to_scenario}/{stream_name}_ranking_{counter}.csv')
                        
                        if topK != -1:
                            estimated_ranking = estimated_ranking.iloc[:topK - 1]

                        filename_scores['rank_error'].append(
                            get_rank_error(truth_ranking=truth_ranking, estimated_ranking=estimated_ranking, weighted=False)
                        )
                        filename_scores['spearman'].append(
                            get_spearman(truth_ranking=truth_ranking, estimated_ranking=estimated_ranking, weighted=False)
                        )
                        counter += 1

                    for k, v in filename_scores.items():
                        v_array = np.array(v)
                        scores[k].append((v_array.mean(), v_array.std()))

                dataframe[met].append(["{:.2f} ({:.2f})".format(mean, std) for mean, std in scores[met]])
        res[(root, met)] = dataframe

In [None]:
print(res.keys())

In [None]:
for s in stream_roots:
    for m in metrics:
        d = pd.DataFrame(res[(s, m)][m], index  =['UP-F', 'UP-T', 'UN-F','UN-T', 'PURE', 'CDF'] ).transpose()
        print(d[['PURE', 'CDF', 'UN-F','UP-F',   'UN-T',  'UP-T']].to_latex())