In [None]:
# %reload_ext autoreload
# %autoreload 2
import numpy as np
import pandas as pd
from scipy import stats
import torch

from common import load_x264, split_data, evaluate_ii, evaluate_cc

# import argparse


In [None]:

## Configuration
random_seed = 33154

# Enter names of performance columns to consider
performances = ["rel_kbs"]

# Number of nearest neighbours to consider
# Make multiples to allow better budget comparison
topk_values = (1, 3, 5, 15, 25)
topr_values = (1, 3, 5, 15, 25)

## Load and prepare data
## Load and prepare data
data_dir = "../data"
perf_matrix, input_features, config_features, all_performances = load_x264(
    data_dir=data_dir
)

print(f"Loaded data x264")
print(f"perf_matrix:{perf_matrix.shape}")
print(f"input_features:{input_features.shape}")
print(f"config_features:{config_features.shape}")

data_split = split_data(perf_matrix, random_state=random_seed)
train_inp = data_split["train_inp"]
train_cfg = data_split["train_cfg"]
test_inp = data_split["test_inp"]
test_cfg = data_split["test_cfg"]

# This is a look up for performance measurements from inputname + configurationID
input_config_map = (
    perf_matrix[["inputname", "configurationID"] + performances]
    .sort_values(["inputname", "configurationID"])
    .set_index(["inputname", "configurationID"])
)
all_input_names = pd.Series(
    input_config_map.index.get_level_values("inputname").unique()
)
all_config_ids = pd.Series(
    input_config_map.index.get_level_values("configurationID").unique()
)

regret_map = input_config_map.groupby("inputname").transform(
    lambda x: (x - x.min()).abs() / abs(x.min())
)
average_mape = regret_map.mean(axis=1)

rank_map = input_config_map.groupby("inputname").transform(
    lambda x: stats.rankdata(x, method="min")
)
average_ranks = rank_map.mean(axis=1)



In [None]:

#
rank_arr = torch.from_numpy(
    rank_map.reset_index()  # .loc[(train_inp, train_cfg), :]
    .pivot_table(index="inputname", columns="configurationID", values=performances[0])
    .values
)
regret_arr = torch.from_numpy(
    regret_map.reset_index()  # .loc[(train_inp, train_cfg), :]
    .pivot_table(index="inputname", columns="configurationID", values=performances[0])
    .values
)

input_arr = torch.from_numpy(input_features.values).float()
config_arr = torch.from_numpy(config_features.values).float()

train_input_mask = input_features.index.isin(train_inp)
test_input_mask = input_features.index.isin(test_inp)

train_config_mask = config_features.index.isin(train_cfg)
test_config_mask = config_features.index.isin(test_cfg)

train_input_arr = input_arr[train_input_mask]
train_config_arr = config_arr[train_config_mask]



In [None]:

train_cc = []
test_cc = []
test_ii_rank = []
test_ii_ratio = []
test_ii_regret = []


for topk in topk_values:
    train_cc.append(
        evaluate_cc(
            config_arr[train_config_mask],
            rank_arr=rank_arr[:, train_config_mask],
            n_neighbors=topk,
            n_recs=topr_values,
            # config_mask=train_config_mask
        ).numpy()
    )

    test_cc.append(
        evaluate_cc(
            config_arr,
            rank_arr=rank_arr,
            n_neighbors=topk,
            n_recs=topr_values,
            config_mask=test_config_mask,
        ).numpy()
    )

    test_ii = evaluate_ii(
        input_arr,
        rank_arr=rank_arr,
        regret_arr=regret_arr,
        n_neighbors=topk,
        n_recs=topr_values,
        input_mask=train_input_mask,
    )
    test_ii_rank.append(test_ii[0].numpy())
    test_ii_regret.append(test_ii[1].numpy())
    test_ii_ratio.append(test_ii[2].numpy())



In [None]:
def prepare_df(results, topr_values, topk_values):
    df = pd.DataFrame(results, columns=topr_values)
    df["k"] = topk_values
    df.set_index("k", inplace=True)
    df.columns = pd.MultiIndex.from_product([["r"], df.columns])
    return df

# TODO Scale ii_ranks results
# TODO Share results in README

print("train cc\n", prepare_df(train_cc, topr_values, topk_values), "\n")
print("test cc\n", prepare_df(test_cc, topr_values, topk_values), "\n")
print("ii rank\n", prepare_df(test_ii_rank, topr_values, topk_values), "\n")
print("ii ratio\n", prepare_df(test_ii_ratio, topr_values, topk_values), "\n")
print("ii regret\n", prepare_df(test_ii_regret, topr_values, topk_values), "\n")