# Compute metrics on toy data

In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("..")

from pathlib import Path

import pandas as pd

from src.dependence_measures.compare import compute_bivariate_scores

In [None]:
PROCESSED_DATA_FOLDER_PATH = Path("../data/processed")

INPUTS_COLUMNS = ["x_0"]
OUTPUTS_COLUMNS = ["y"]

## Load noiseless toy data

In [None]:
data_df = pd.read_csv(PROCESSED_DATA_FOLDER_PATH.joinpath("toy_data", "noiseless", "cubic_500.csv"), index_col=False)
data_df.head()

## Compute bi-variate metrics

In [None]:
compute_bivariate_scores(data_df, ["0"], ["1"])

In [None]:
def display_bivariate_scores(path_rglob):
    scores_list = []

    for file in sorted(path_rglob):

        data_df = pd.read_csv(file, index_col=False)

        scores_df = compute_bivariate_scores(data_df, ["0"], ["1"])
        scores_df["file"] = file.name
        scores_df.set_index("file", append=True, inplace=True)
        scores_df = scores_df.reorder_levels(["file", 0, 1])

        scores_list.append(scores_df)

    scores_df = pd.concat(scores_list)
    scores_df = scores_df.sort_index()

    display(scores_df.style.background_gradient(cmap="OrRd", axis=0))

In [None]:
display_bivariate_scores(PROCESSED_DATA_FOLDER_PATH.joinpath("toy_data", "noiseless").rglob("*500.csv"))

In [None]:
display_bivariate_scores(PROCESSED_DATA_FOLDER_PATH.joinpath("toy_data", "noisy").rglob("*500.csv"))