[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/crunchdao/quickstarters/blob/master/competitions/causality-discovery/supervised_baseline/supervised_baseline.ipynb)

In [1]:
%pip install crunch-cli --upgrade
%pip install gcastle torch
!crunch setup --notebook causality-discovery default --token bWcbLyy3zswW3nxvVGR34UinGGB5V5ZR03bzxnZe16q7lbk0iPZeDKYKJMLb8p5x

Collecting crunch-cli
  Downloading crunch_cli-4.1.3-py3-none-any.whl.metadata (3.2 kB)
Downloading crunch_cli-4.1.3-py3-none-any.whl (81 kB)
Installing collected packages: crunch-cli
  Attempting uninstall: crunch-cli
    Found existing installation: crunch-cli 4.1.1
    Uninstalling crunch-cli-4.1.1:
      Successfully uninstalled crunch-cli-4.1.1
Successfully installed crunch-cli-4.1.3
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.

---
Your token seems to have expired or is invalid.

Please follow this link to copy and paste your new setup command:
https://hub.crunchdao.com/competitions/causality-discovery/submit

If you think that is an error, please contact an administrator.


In [2]:
import crunch
crunch = crunch.load_notebook()

loaded inline runner with module: <module '__main__'>


## Preliminary step

### Imports

In [3]:
"""
This is a basic example of what you need to do to enter the crunch.
The code will not have access to the internet (or any socket related operation).
"""

import functools
import os
import typing
import torch
import joblib
import networkx as nx
import numpy as np
import pandas as pd
from scipy.stats import pearsonr, ttest_rel
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import mutual_info_regression
from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import GroupKFold, cross_val_score
from sklearn.preprocessing import LabelEncoder
from tqdm.auto import tqdm
import pandas as pd
import numpy as np
from scipy.stats import ttest_rel, sem, t
from statsmodels.stats.multitest import multipletests
import logging
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis, spearmanr, kendalltau, shapiro
from sklearn.feature_selection import mutual_info_regression
from statsmodels.stats.outliers_influence import variance_inflation_factor
from scipy.stats import entropy
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score
import warnings
import networkx as nx
import numpy as np
import pandas as pd
import networkx as nx

from tqdm import tqdm
from castle.algorithms import PC  

import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis, spearmanr, kendalltau, shapiro, entropy
from sklearn.feature_selection import mutual_info_regression
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score
import warnings
from statsmodels.tsa.stattools import grangercausalitytests
from lightgbm import LGBMClassifier
from sklearn.model_selection import GroupKFold
from sklearn.metrics import balanced_accuracy_score
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm
2024-10-18 22:39:09,085 - c:\Users\rafae\miniconda3\envs\torch_env\lib\site-packages\castle\backend\__init__.py[line:36] - INFO: You can use `os.environ['CASTLE_BACKEND'] = backend` to set the backend(`pytorch` or `mindspore`).
2024-10-18 22:39:09,535 - c:\Users\rafae\miniconda3\envs\torch_env\lib\site-packages\castle\algorithms\__init__.py[line:36] - INFO: You are using ``pytorch`` as the backend.


## Useful Functions

The following functions are provided to help you get the class labels of the variables in a graph according to their relationship to `X` and `Y`. The variables can be colliders, confounders, mediators, etc.

This is the core of the solution code, which reads one record at a time, applies the PC algorithm, ensures that the result is a DAG, and then puts the result into a single data frame in the required format, ready for submission.

In [4]:
def graph_nodes_representation(graph, nodelist):
    """
    Create an alternative representation of a graph which is hashable
    and equivalent graphs have the same hash.

    Python cannot PROPERLY use nx.Graph/DiGraph as key for
    dictionaries, because two equivalent graphs with just different
    order of the nodes would result in different keys. This is
    undesirable here.

    So here we transform the graph into an equivalent form that is
    based on a specific nodelist and that is hashable. In this way,
    two equivalent graphs, once transformed, will result in identical
    keys.

    So we use the following trick: extract the adjacency matrix
    (with nodes in a fixed order) and then make a hashable thing out
    of it, through tuple(array.flatten()):
    """

    # This get the adjacency matrix with nodes in a given order, as
    # numpy array (which is not hashable):
    adjacency_matrix = nx.adjacency_matrix(graph, nodelist=nodelist).todense()

    # This transforms the numpy array into a hashable object:
    hashable = tuple(adjacency_matrix.flatten())

    return hashable

In [5]:
def create_graph_label():
    """
    Create a dictionary from graphs to labels, in two formats.
    """
    graph_label = {
        nx.DiGraph([("X", "Y"), ("v", "X"), ("v", "Y")]): "Confounder",
        nx.DiGraph([("X", "Y"), ("X", "v"), ("Y", "v")]): "Collider",
        nx.DiGraph([("X", "Y"), ("X", "v"), ("v", "Y")]): "Mediator",
        nx.DiGraph([("X", "Y"), ("v", "X")]):             "Cause of X",
        nx.DiGraph([("X", "Y"), ("v", "Y")]):             "Cause of Y",
        nx.DiGraph([("X", "Y"), ("X", "v")]):             "Consequence of X",
        nx.DiGraph([("X", "Y"), ("Y", "v")]):             "Consequence of Y",
        nx.DiGraph({"X": ["Y"], "v": []}):                "Independent",
    }

    nodelist = ["v", "X", "Y"]

    # This is an equivalent alternative to graph_label but in a form
    # for which two equivalent graphs have the same key:
    adjacency_label = {
        graph_nodes_representation(graph, nodelist): label
        for graph, label in graph_label.items()
    }

    return graph_label, adjacency_label

In [6]:
def get_labels(adjacency_matrix, adjacency_label):
    """
    Transform an adjacency_matrix (as pd.DataFrame) into a dictionary of variable:label
    """

    result = {}
    for variable in adjacency_matrix.columns.drop(["X", "Y"]):
        submatrix = adjacency_matrix.loc[[variable, "X", "Y"], [variable, "X", "Y"]]  # this is not hashable
        key = tuple(submatrix.values.flatten())  # this is hashable and a compatible with adjacency_label

        result[variable] = adjacency_label[key]

    return result

## Variable embedding

The following functions compute features for each variable in a dataset according to its relationship to the other variables, and specifically to `X` and `Y`. The features are divided into three groups:
- Based on the Pearson's correlation coefficient between variables.
- Based on the $t$ test.
- Based on mutual information.

Using the same API and the `get_label()` function above, we can also get labels for each variable.

In [7]:
def pearson_correlation(dataset):
    """
    Given a dataset, we compute the correlation-based features for each
    varibale, which are the correlation between that variable with X and Y,
    as well as summary statistics (max, min, mean, std) of all pairs
    of correlations.
    """

    variables = dataset.columns.drop(["X", "Y"])

    df = []
    for variable in variables:
        tmp = dataset.corr().drop([variable], axis="columns").loc[variable].abs()

        df.append({
            "variable": variable,
            "corr(v,X)": dataset[[variable, "X"]].corr().loc[variable, "X"],
            "corr(v,Y)": dataset[[variable, "Y"]].corr().loc[variable, "Y"],
            "max(corr(v, others))": tmp.max(),
            "min(corr(v, others))": tmp.min(),
            "mean(corr(v, others))": tmp.mean(),
            "std(corr(v, others))": tmp.std(),
        })

    df = pd.DataFrame(df)
    df["dataset"] = dataset.name

    df["corr(X,Y)"] = dataset[["X", "Y"]].corr().loc["X", "Y"]

    # pearsonr is NaN when the variance is 0, so we fill with 0
    df.fillna(0, inplace=True)

    # Reorder columns:
    df = df[["dataset"] + [colname for colname in df.columns if colname != "dataset"]]

    return df

In [8]:
def ttest(dataset, pvalue_threshold=0.05):
    """
    Given a dataset, this function computes the t-test between the
    values each variable v and X, Y. The t value and the result of the
    t-test with a given pvalue_threshold, are used to create features
    to describe/embed v, as well as the t-test result between the
    values of X and Y.
    """

    variables = dataset.columns.drop(["X", "Y"])

    df = []
    for variable in variables:
        ttest_vX = ttest_rel(dataset[variable], dataset["X"])
        ttest_vY = ttest_rel(dataset[variable], dataset["Y"])

        df.append({
            "variable": variable,
            "ttest(v,X)": ttest_vX.statistic,
            f"pvalue(ttest(v,X))<={pvalue_threshold}": (ttest_vX.pvalue <= pvalue_threshold).astype(float),
            "ttest(v,Y)": ttest_vY.statistic,
            f"pvalue(ttest(v,Y))<={pvalue_threshold}": (ttest_vY.pvalue <= pvalue_threshold).astype(float),
        })

    df = pd.DataFrame(df)
    df["dataset"] = dataset.name

    ttest_XY = ttest_rel(dataset["X"], dataset["Y"])
    df["ttest(X,Y)"] = ttest_XY.statistic
    df[f"pvalue(ttest(X,Y))<={pvalue_threshold}"] = (ttest_XY.pvalue <= pvalue_threshold).astype(float)

    # some the ttest returns NaN when the variance is 0, so we fill with 0:
    df.fillna(0, inplace=True)

    # Reorder columns:
    df = df[["dataset"] + [colname for colname in df.columns if colname != "dataset"]]

    return df

In [9]:
def mutual_information(dataset):
    """
    Given a dataset, we compute the mutual-information-based features
    for each varibale, which are the correlation between that variable
    X and Y, as well as summary statistics (max, min, mean, std) of
    all pairs of correlations.
    """

    variables = dataset.columns.drop(["X", "Y"])

    df = []
    for variable in variables:
        tmp = mutual_info_regression(dataset.drop(columns=[variable]), dataset[variable])

        df.append({
            "variable": variable,
            "MI(v,X)": mutual_info_regression(dataset[[variable]], dataset["X"], discrete_features=False)[0],
            "MI(v,Y)": mutual_info_regression(dataset[[variable]], dataset["Y"], discrete_features=False)[0],
            "max(MI(v, others))": tmp.max(),
            "min(MI(v, others))": tmp.min(),
            "mean(MI(v, others))": tmp.mean(),
            "std(MI(v, others))": tmp.std(),
        })

    df = pd.DataFrame(df)
    df["dataset"] = dataset.name

    df["MI(X,Y)"] = mutual_info_regression(dataset[["X"]], dataset["Y"], discrete_features=False)[0]

    # Reorder columns:
    df = df[["dataset"] + [colname for colname in df.columns if colname != "dataset"]]

    return df

In [10]:
def label(adjacency_matrix):
    """
    Given a graph as adjacency_matrix, create the class labels of each variable.
    """

    adjacency_graph, adjacency_label = create_graph_label()
    labels = get_labels(adjacency_matrix, adjacency_label)
    variables = adjacency_matrix.columns.drop(["X", "Y"])

    df = pd.DataFrame({
        "variable": variables,
        "label": [labels[variable] for variable in variables],
    })
    df["dataset"] = adjacency_matrix.name

    # Reorder columns:
    df = df[["dataset"] + [colname for colname in df.columns if colname != "dataset"]]

    return df

In [11]:


def cohen_d(x, y):
    """Calcula o tamanho de efeito de Cohen's d para amostras pareadas."""
    diff = x - y
    return np.mean(diff) / np.std(diff, ddof=1)

def compute_confidence_interval(x, y, confidence=0.95):
    """Calcula o intervalo de confiança para a diferença das médias."""
    diff = x - y
    n = len(diff)
    mean_diff = np.mean(diff)
    sem_diff = sem(diff)
    h = sem_diff * t.ppf((1 + confidence) / 2., n-1)
    return mean_diff - h, mean_diff + h

def ttest(dataset, pvalue_threshold=0.05):
    """
    Given a dataset, this function computes the t-test between the
    values each variable v and X, Y. The t value and the result of the
    t-test with a given pvalue_threshold, are used to create features
    to describe/embed v, as well as the t-test result between the
    values of X and Y.
    """

    variables = dataset.columns.drop(["X", "Y"])

    df = []
    for variable in variables:
        ttest_vX = ttest_rel(dataset[variable], dataset["X"])
        ttest_vY = ttest_rel(dataset[variable], dataset["Y"])
        cohen_vX = cohen_d(dataset[variable], dataset["X"])
        cohen_vY = cohen_d(dataset[variable], dataset["Y"])
        ci_lowX, ci_highX = compute_confidence_interval(dataset[variable], dataset["X"])
        ci_lowY, ci_highY = compute_confidence_interval(dataset[variable], dataset["Y"])

        df.append({
            "variable": variable,
            "ttest(v,X)": ttest_vX.statistic,
            f"pvalue(ttest(v,X))<={pvalue_threshold}": (ttest_vX.pvalue <= pvalue_threshold).astype(float),
            "ttest(v,Y)": ttest_vY.statistic,
            f"pvalue(ttest(v,Y))<={pvalue_threshold}": (ttest_vY.pvalue <= pvalue_threshold).astype(float),
            "cohen_d(v,X)": cohen_vX,
            "cohen_d(v,Y)": cohen_vY,
            "ci_low(v,X)": ci_lowX,
            "ci_high(v,X)": ci_highX,
            "ci_low(v,Y)": ci_lowY,
            "ci_high(v,Y)": ci_highY
        })

    df = pd.DataFrame(df)
    df["dataset"] = getattr(dataset, 'name', 'Unknown')

    ttest_XY = ttest_rel(dataset["X"], dataset["Y"])
    df["ttest(X,Y)"] = ttest_XY.statistic
    df[f"pvalue(ttest(X,Y))<={pvalue_threshold}"] = (ttest_XY.pvalue <= pvalue_threshold).astype(float)
    df["cohen_d(X,Y)"] = cohen_d(dataset["X"], dataset["Y"])
    df['ci_lowXY'], df['ci_highXY'] = compute_confidence_interval(dataset["X"], dataset["Y"])

    # some the ttest returns NaN when the variance is 0, so we fill with 0:
    df.fillna(0, inplace=True)

    # Reorder columns:
    df = df[["dataset"] + [colname for colname in df.columns if colname != "dataset"]]

    return df


In [12]:
def advanced_nonlinear_feature_extraction(dataset):
    """
    Given a dataset, compute an extensive set of correlation-based and statistical features for each variable,
    including metrics that capture non-linear relationships such as quadratic and exponential dependencies,
    specifically between the variable and other variables.
    """
    # # Convert column names to strings
    # dataset = dataset.rename(columns=str)

    # Ensure 'X' and 'Y' are in the dataset
    if 'X' not in dataset.columns or 'Y' not in dataset.columns:
        raise ValueError("Dataset must contain 'X' and 'Y' columns.")

    if dataset.shape[1] > 3:
        num_min_vals = False
        variables = dataset.columns.drop(["X", "Y"])
    else:
        num_min_vals = True
        variables = dataset.columns
    df_list = []

    # Prepare data for VIF calculation
    X_vif = dataset[variables].fillna(0)

    # Initialize VIF dictionary
    vif_dict = {}


    vif_data = pd.DataFrame()
    vif_data["variable"] = variables
    vif_data["VIF"] = [variance_inflation_factor(X_vif.values, i) for i in range(len(variables))]
    vif_dict = dict(zip(vif_data["variable"], vif_data["VIF"]))


    for variable in variables:
        # Convert variable name to string
        # variable = str(variable)

        # Exclude 'X', 'Y', and current variable from other_vars
        other_vars = dataset.columns.drop([variable])
        other_vars = other_vars.map(str)  # Ensure other_vars are strings

        # Prepare lists to store R² values
        r2_lin_list = []
        r2_quad_list = []
        r2_exp_list = []
        tpm_mi = []

        # Prepare data
        v = dataset[variable].values.reshape(-1, 1)

        for other_var in other_vars:
            # Prepare other variable data
            ov = dataset[other_var].values

            # Linear Regression between v and other_var
            lin_reg = LinearRegression()
            lin_reg.fit(v, ov)
            ov_pred_lin = lin_reg.predict(v)
            r2_lin = r2_score(ov, ov_pred_lin)
            r2_lin_list.append(r2_lin)

            # Quadratic Regression between v and other_var
            poly = PolynomialFeatures(degree=2)
            v_poly = poly.fit_transform(v)
            lin_reg.fit(v_poly, ov)
            ov_pred_quad = lin_reg.predict(v_poly)
            r2_quad = r2_score(ov, ov_pred_quad)
            r2_quad_list.append(r2_quad)

            # Exponential Regression between v and other_var
            if np.all(ov > 0):
                log_ov = np.log(ov)
                lin_reg.fit(v, log_ov)
                ov_pred_exp = np.exp(lin_reg.predict(v))
                r2_exp = r2_score(ov, ov_pred_exp)
                r2_exp_list.append(r2_exp)
            else:
                # If any value is non-positive, skip exponential regression
                r2_exp_list.append(np.nan)
            
            # calcular mutual information em outras variaveis 
            mi = mutual_info_regression(
                    dataset[[variable]].fillna(0), dataset[other_var], discrete_features=False, random_state=0
                )[0]
            tpm_mi.append(mi)

        # Compute statistical summaries for R² values
        # Linear Model
        r2_lin_list = np.array(r2_lin_list)
        max_r2_lin_others = np.nanmax(r2_lin_list)
        min_r2_lin_others = np.nanmin(r2_lin_list)
        mean_r2_lin_others = np.nanmean(r2_lin_list)
        std_r2_lin_others = np.nanstd(r2_lin_list)

        # Quadratic Model
        r2_quad_list = np.array(r2_quad_list)
        max_r2_quad_others = np.nanmax(r2_quad_list)
        min_r2_quad_others = np.nanmin(r2_quad_list)
        mean_r2_quad_others = np.nanmean(r2_quad_list)
        std_r2_quad_others = np.nanstd(r2_quad_list)

        # Exponential Model
        r2_exp_list = np.array(r2_exp_list)
        # Handle case where all values are NaN
        if np.all(np.isnan(r2_exp_list)):
            max_r2_exp_others = min_r2_exp_others = mean_r2_exp_others = std_r2_exp_others = np.nan
        else:
            max_r2_exp_others = np.nanmax(r2_exp_list)
            min_r2_exp_others = np.nanmin(r2_exp_list)
            mean_r2_exp_others = np.nanmean(r2_exp_list)
            std_r2_exp_others = np.nanstd(r2_exp_list)

        # Compute correlation with other variables
        tmp_corr = dataset[other_vars].corrwith(dataset[variable]).abs()
        tmp_corr.index = tmp_corr.index.map(str)  # Ensure index is string

        # Compute covariance with other variables
        cov_matrix = dataset[[variable] + list(other_vars)].cov()
        cov_matrix.index = cov_matrix.index.map(str)
        cov_matrix.columns = cov_matrix.columns.map(str)
        tmp_cov = cov_matrix[variable].abs()
        tmp_cov = tmp_cov.drop(variable, errors='ignore')  # Exclude self-covariance

        # Handle cases where tmp_corr or tmp_cov might be empty
        if tmp_corr.empty:
            max_corr = min_corr = mean_corr = std_corr = np.nan
        else:
            max_corr = tmp_corr.max()
            min_corr = tmp_corr.min()
            mean_corr = tmp_corr.mean()
            std_corr = tmp_corr.std()

        if tmp_cov.empty:
            max_cov = min_cov = mean_cov = std_cov = np.nan
        else:
            max_cov = tmp_cov.max()
            min_cov = tmp_cov.min()
            mean_cov = tmp_cov.mean()
            std_cov = tmp_cov.std()

        # computar mutual information
        tpm_mi = np.array(tpm_mi)
        max_mi_others = np.nanmax(tpm_mi)
        min_mi_others = np.nanmin(tpm_mi)
        mean_mi_others = np.nanmean(tpm_mi)
        std_mi_others = np.nanstd(tpm_mi)


        # Prepare data for 'X' and 'Y'
        X = dataset["X"].values
        Y = dataset["Y"].values

        # Linear Regression between v and X
        lin_reg = LinearRegression()
        lin_reg.fit(v, X)
        X_pred_lin = lin_reg.predict(v)
        r2_lin_v_X = r2_score(X, X_pred_lin)
        # Shapiro-Wilk test on residuals
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            try:
                shapiro_p_X = shapiro(X - X_pred_lin)[1]
            except:
                shapiro_p_X = np.nan

        # Linear Regression between v and Y
        lin_reg.fit(v, Y)
        Y_pred_lin = lin_reg.predict(v)
        r2_lin_v_Y = r2_score(Y, Y_pred_lin)
        # Shapiro-Wilk test on residuals
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            try:
                shapiro_p_Y = shapiro(Y - Y_pred_lin)[1]
            except:
                shapiro_p_Y = np.nan

        # Quadratic Regression between v and X
        poly = PolynomialFeatures(degree=2)
        v_poly = poly.fit_transform(v)
        lin_reg.fit(v_poly, X)
        X_pred_quad = lin_reg.predict(v_poly)
        r2_quad_v_X = r2_score(X, X_pred_quad)

        # Quadratic Regression between v and Y
        lin_reg.fit(v_poly, Y)
        Y_pred_quad = lin_reg.predict(v_poly)
        r2_quad_v_Y = r2_score(Y, Y_pred_quad)

        # Exponential Regression between v and X
        if np.all(X > 0):
            log_X = np.log(X)
            lin_reg.fit(v, log_X)
            X_pred_exp = np.exp(lin_reg.predict(v))
            r2_exp_v_X = r2_score(X, X_pred_exp)
        else:
            r2_exp_v_X = np.nan

        # Exponential Regression between v and Y
        if np.all(Y > 0):
            log_Y = np.log(Y)
            lin_reg.fit(v, log_Y)
            Y_pred_exp = np.exp(lin_reg.predict(v))
            r2_exp_v_Y = r2_score(Y, Y_pred_exp)
        else:
            r2_exp_v_Y = np.nan

        # Granger Causality Test
        try:
            granger_v_X = grangercausalitytests(dataset[[variable, "X"]], maxlag=1, verbose=False)[1][0]["ssr_ftest"][1]
        except:
            granger_v_X = np.nan
        try:
            granger_v_Y = grangercausalitytests(dataset[[variable, "Y"]], maxlag=1, verbose=False)[1][0]["ssr_ftest"][1]
        except:
            granger_v_Y = np.nan

        # Pearson correlation with X and Y
        corr_v_X = dataset[[variable, "X"]].corr().iloc[0, 1]
        corr_v_Y = dataset[[variable, "Y"]].corr().iloc[0, 1]

        # Spearman's rank correlation
        spearman_v_X, _ = spearmanr(dataset[variable], dataset["X"])
        spearman_v_Y, _ = spearmanr(dataset[variable], dataset["Y"])

        # Kendall's tau correlation
        kendall_v_X, _ = kendalltau(dataset[variable], dataset["X"])
        kendall_v_Y, _ = kendalltau(dataset[variable], dataset["Y"])

        # Mutual information
        mi_v_X = mutual_info_regression(
            dataset[[variable]].fillna(0), dataset["X"], discrete_features=False, random_state=0
        )[0]
        mi_v_Y = mutual_info_regression(
            dataset[[variable]].fillna(0), dataset["Y"], discrete_features=False, random_state=0
        )[0]

        # Entropy
        entropy_v = entropy(pd.cut(dataset[variable], bins=10).value_counts())

        # Variable statistics
        v_mean = dataset[variable].mean()
        v_median = dataset[variable].median()
        v_std = dataset[variable].std()
        v_skewness = skew(dataset[variable], nan_policy='omit')
        v_kurtosis = kurtosis(dataset[variable], nan_policy='omit')
        v_min = dataset[variable].min()
        v_max = dataset[variable].max()
        v_range = v_max - v_min
        v_iqr = dataset[variable].quantile(0.75) - dataset[variable].quantile(0.25)

        # Variance Inflation Factor (VIF)
        v_vif = vif_dict.get(variable, np.nan)

        # Add all computed features to the list
        df_list.append({
            "variable": variable,
            # Existing features...
            "granger_v_X": granger_v_X,
            "granger_v_Y": granger_v_Y,
            "corr(v,X)": corr_v_X,
            "corr(v,Y)": corr_v_Y,
            "spearman(v,X)": spearman_v_X,
            "spearman(v,Y)": spearman_v_Y,
            "kendall(v,X)": kendall_v_X,
            "kendall(v,Y)": kendall_v_Y,
            "mutual_info(v,X)": mi_v_X,
            "mutual_info(v,Y)": mi_v_Y,
            "r2_lin_v_X": r2_lin_v_X,
            "r2_lin_v_Y": r2_lin_v_Y,
            "r2_quad_v_X": r2_quad_v_X,
            "r2_quad_v_Y": r2_quad_v_Y,
            "r2_exp_v_X": r2_exp_v_X,
            "r2_exp_v_Y": r2_exp_v_Y,
            "shapiro_p_X": shapiro_p_X,
            "shapiro_p_Y": shapiro_p_Y,
            "entropy(v)": entropy_v,
            "max(corr(v, others))": max_corr,
            "min(corr(v, others))": min_corr,
            "mean(corr(v, others))": mean_corr,
            "std(corr(v, others))": std_corr,
            "max(cov(v, others))": max_cov,
            "min(cov(v, others))": min_cov,
            "mean(cov(v, others))": mean_cov,
            "std(cov(v, others))": std_cov,
            "max(mi(v, others))": max_mi_others,
            "min(mi(v, others))": min_mi_others,
            "mean(mi(v, others))": mean_mi_others,
            "std(mi(v, others))": std_mi_others,
            # New features for Linear Model R²
            "max(r2_lin_v, others)": max_r2_lin_others,
            "min(r2_lin_v, others)": min_r2_lin_others,
            "mean(r2_lin_v, others)": mean_r2_lin_others,
            "std(r2_lin_v, others)": std_r2_lin_others,
            # New features for Quadratic Model R²
            "max(r2_quad_v, others)": max_r2_quad_others,
            "min(r2_quad_v, others)": min_r2_quad_others,
            "mean(r2_quad_v, others)": mean_r2_quad_others,
            "std(r2_quad_v, others)": std_r2_quad_others,
            # New features for Exponential Model R²
            "max(r2_exp_v, others)": max_r2_exp_others,
            "min(r2_exp_v, others)": min_r2_exp_others,
            "mean(r2_exp_v, others)": mean_r2_exp_others,
            "std(r2_exp_v, others)": std_r2_exp_others,
            # Variable statistics...
            "v_mean": v_mean,
            "v_median": v_median,
            "v_std": v_std,
            "v_skewness": v_skewness,
            "v_kurtosis": v_kurtosis,
            "v_min": v_min,
            "v_max": v_max,
            "v_range": v_range,
            "v_iqr": v_iqr,
            "v_vif": v_vif,
            "num_min_vals": num_min_vals
        })

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(df_list)
    df["dataset"] = getattr(dataset, 'name', 'Unknown')


    corr_X_Y = pearsonr(dataset["X"], dataset["Y"])[0]
    spearman_X_Y, _ = spearmanr(dataset["X"], dataset["Y"])
    kendall_X_Y, _ = kendalltau(dataset["X"], dataset["Y"])
    mi_X_Y = mutual_info_regression(
        dataset[["X"]].fillna(0), dataset["Y"], discrete_features=False, random_state=0
    )[0]
    # Linear regression between X and Y
    lin_reg.fit(dataset[["X"]], dataset["Y"])
    Y_pred_lin = lin_reg.predict(dataset[["X"]])
    r2_lin_X_Y = r2_score(dataset["Y"], Y_pred_lin)
    # Shapiro-Wilk test on residuals
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        try:
            shapiro_p_XY = shapiro(dataset["Y"] - Y_pred_lin)[1]
        except:
            shapiro_p_XY = np.nan

    df["corr(X,Y)"] = corr_X_Y
    df["spearman(X,Y)"] = spearman_X_Y
    df["kendall(X,Y)"] = kendall_X_Y
    df["mutual_info(X,Y)"] = mi_X_Y
    df["r2_lin_X_Y"] = r2_lin_X_Y
    df["shapiro_p_XY"] = shapiro_p_XY

    # Handle NaNs
    df.fillna(0, inplace=True)

    # Reorder columns
    columns_order = [
        "dataset", "variable",
        # Existing features...
        "granger_v_X", "granger_v_Y",
        "corr(v,X)", "corr(v,Y)", "spearman(v,X)", "spearman(v,Y)",
        "kendall(v,X)", "kendall(v,Y)", "mutual_info(v,X)", "mutual_info(v,Y)",
        "r2_lin_v_X", "r2_lin_v_Y", "r2_quad_v_X", "r2_quad_v_Y",
        "r2_exp_v_X", "r2_exp_v_Y", "shapiro_p_X", "shapiro_p_Y",
        "entropy(v)",
        "max(corr(v, others))", "min(corr(v, others))", "mean(corr(v, others))", "std(corr(v, others))",
        "max(cov(v, others))", "min(cov(v, others))", "mean(cov(v, others))", "std(cov(v, others))",
        # New features for Linear Model R²
        "max(r2_lin_v, others)", "min(r2_lin_v, others)", "mean(r2_lin_v, others)", "std(r2_lin_v, others)",
        # New features for Quadratic Model R²
        "max(r2_quad_v, others)", "min(r2_quad_v, others)", "mean(r2_quad_v, others)", "std(r2_quad_v, others)",
        # New features for Exponential Model R²
        "max(r2_exp_v, others)", "min(r2_exp_v, others)", "mean(r2_exp_v, others)", "std(r2_exp_v, others)",
        # new mutual information features
        "max(mi(v, others))", "min(mi(v, others))", "mean(mi(v, others))", "std(mi(v, others))",
        # Variable statistics...
        "v_mean", "v_median", "v_std", "v_skewness", "v_kurtosis",
        "v_min", "v_max", "v_range", "v_iqr", "v_vif",
        # Correlations between X and Y
        "corr(X,Y)", "spearman(X,Y)", "kendall(X,Y)", "mutual_info(X,Y)",
        "r2_lin_X_Y", "shapiro_p_XY","num_min_vals"
    ]
    df = df[columns_order]

    return df

In [13]:

def fix_DAG(g):
    """
    Ensure that the graph is a DAG and has an edge X→Y

    We look for cycles, and remove an edge in each cycle, until there are no cycles left.

    Inputs: g: nx.DiGraph
    Output: g: nx.DiGraph

    This function provides just a possible solution to the problem
    of DAG-ifying a graph. Other solutions can be conceived that could
    be better for the competition.
    """
    assert 'X' in g.nodes
    assert 'Y' in g.nodes

    gg = g.copy()

    # Add X→Y if it is missing
    if ('X', 'Y') not in gg.edges:
        gg.add_edge('X', 'Y')

    # Look for cycles and remove them
    while not nx.is_directed_acyclic_graph(gg):

        h = gg.copy()

        # Remove all the sources and sinks
        while True:
            finished = True

            for i, v in list(nx.in_degree_centrality(h).items()):
                if v == 0:
                    h.remove_node(i)
                    finished = False

            for i, v in list(nx.out_degree_centrality(h).items()):
                if v == 0:
                    h.remove_node(i)
                    finished = False

            if finished:
                break

        if len(h.nodes) == 0:
            break  # No cycles left

        # Find a cycle, with a random walk starting at a random node
        node = list(h.nodes)[0]
        cycle = [node]
        while True:
            edges = list(h.out_edges(node))
            if not edges:
                break  # Dead end, shouldn't happen
            _, node = edges[np.random.choice(len(edges))]
            if node in cycle:
                break
            cycle.append(node)

        # We have a path that ends with a cycle: remove the beginning, if it is not part of the cycle
        cycle = np.array(cycle)
        i = np.argwhere(cycle == node)[0][0]
        cycle = cycle[i:]
        cycle = cycle.tolist() + [node]

        # Edges in that cycle
        edges = list(zip(cycle[:-1], cycle[1:]))

        # Pick an edge at random, but make sure it is not X→Y -- we want to keep that one
        edges = [e for e in edges if e != ('X', 'Y')]
        if not edges:
            break  # Cannot remove X→Y
        edge = edges[np.random.choice(len(edges))]

        gg.remove_edge(*edge)

    return gg

In [14]:


def generate_pc_parallel(dataset, variant_, ci_test_, alpha_):
    predictions = {}
    X = dataset
    nodes = X.columns
    model = PC(variant=variant_, ci_test=ci_test_, alpha=alpha_)
    model.learn(X)
    A_hat = pd.DataFrame(model.causal_matrix, columns=nodes, index=nodes)
    g_hat = nx.from_pandas_adjacency(A_hat, create_using=nx.DiGraph)
    g_hat = fix_DAG(g_hat)
    
    # Compute centrality measures
    betweenness = nx.betweenness_centrality(g_hat)
    closeness = nx.closeness_centrality(g_hat)
    pagerank = nx.pagerank(g_hat)
    in_degree_dict = dict(g_hat.in_degree())
    out_degree_dict = dict(g_hat.out_degree())

    name = f'pc_{variant_}_{ci_test_}_{alpha_}'
    
    for i in nodes:
        predictions[i] = {}
        # Existing features
        predictions[i][f'{name}_count_widout_x_y'] = A_hat.loc[i].drop(['X', 'Y']).count()
        predictions[i][f'{name}_sum_widout_x_y'] = A_hat.loc[i].drop(['X', 'Y']).sum()
        predictions[i][f'{name}_mean_widout_x_y'] = A_hat.loc[i].drop(['X', 'Y']).mean()
        predictions[i][f'{name}_std_widout_x_y'] = A_hat.loc[i].drop(['X', 'Y']).std()
        predictions[i][f'{name}_count'] = A_hat.loc[i].count()
        predictions[i][f'{name}_sum'] = A_hat.loc[i].sum()
        predictions[i][f'{name}_mean'] = A_hat.loc[i].mean()
        predictions[i][f'{name}_std'] = A_hat.loc[i].std()
        predictions[i][f'{name}_has_x'] = A_hat.loc[i]['X']
        predictions[i][f'{name}_has_y'] = A_hat.loc[i]['Y']
        
        # New features
        # Centrality measures
        predictions[i][f'{name}_betweenness'] = betweenness[i]
        predictions[i][f'{name}_closeness'] = closeness[i]
        predictions[i][f'{name}_pagerank'] = pagerank[i]
        # Degree
        predictions[i][f'{name}_in_degree'] = in_degree_dict[i]
        predictions[i][f'{name}_out_degree'] = out_degree_dict[i]
        predictions[i][f'{name}_total_degree'] = in_degree_dict[i] + out_degree_dict[i]
        
        # Shortest path to 'X' and 'Y'
        try:
            predictions[i][f'{name}_shortest_path_to_X'] = nx.shortest_path_length(g_hat, source=i, target='X')
        except nx.NetworkXNoPath:
            predictions[i][f'{name}_shortest_path_to_X'] = np.nan
        try:
            predictions[i][f'{name}_shortest_path_to_Y'] = nx.shortest_path_length(g_hat, source=i, target='Y')
        except nx.NetworkXNoPath:
            predictions[i][f'{name}_shortest_path_to_Y'] = np.nan

        # Clustering coefficient
        predictions[i][f'{name}_clustering_coef'] = nx.clustering(g_hat.to_undirected(), i)
        
        # Is ancestor/descendant of 'X' or 'Y'
        predictions[i][f'{name}_is_ancestor_of_X'] = 1 if nx.has_path(g_hat, i, 'X') else 0
        predictions[i][f'{name}_is_ancestor_of_Y'] = 1 if nx.has_path(g_hat, i, 'Y') else 0
        predictions[i][f'{name}_is_descendant_of_X'] = 1 if nx.has_path(g_hat, 'X', i) else 0
        predictions[i][f'{name}_is_descendant_of_Y'] = 1 if nx.has_path(g_hat, 'Y', i) else 0

    df = pd.DataFrame(predictions).T
    try:
        df["dataset"] = dataset.name
    except:
        df["dataset"] = 'dataset'
    df.reset_index(inplace=True)
    df.rename(columns={'index':'variable'}, inplace=True)
    # Reorder columns:
    df = df[["dataset"] + [colname for colname in df.columns if colname != "dataset"]]
    return df

## Computing `(X_train, y_train)`

The following functions loop over a list of given dataset names and execute the desired variable embeddings defined previously to create a single `(X_train, y_train)` suitable for a multiclass classification problem.

In [15]:
def create_some_columns(names_datasets, function):
    """
    Apply an embedding function to a list of datasets.
    """

    df = []
    for name, dataset in tqdm(names_datasets.items()):
        dataset = names_datasets[name]
        dataset.name = name

        try:
            df_dataset = function(dataset)
        except ValueError as e:
            print(name, e)
            raise NotImplementedError

        df_dataset["dataset"] = name
        df.append(df_dataset)

    df = pd.concat(df, axis="index").reset_index(drop=True)
    return df

In [16]:
def create_some_columns_parallel(names_datasets, function, n_jobs=-1):
    """
    Apply an embedding function to a list of datasets.

    Parallel version.
    """

    def f(name, dataset, function):
        dataset.name = name
        df_dataset = function(dataset)
        df_dataset["dataset"] = name
        return df_dataset

    df = joblib.Parallel(n_jobs=n_jobs)(
        joblib.delayed(f)(name, dataset, function)
        for name, dataset in tqdm(names_datasets.items())
    )

    df = pd.concat(df, axis="index").reset_index(drop=True)
    return df

In [17]:
def create_all_columns(functions_names_datasets, n_jobs=-1):
    """
    given a dictionary of {function1:names, function2:names,...} apply
    the desired functions to the list of datasets and merge all of them
    in a single X_y_group dataframe.
    """

    columns = []
    for function, names_datasets in functions_names_datasets.items():
        print(f"set: {function.__name__}")

        if n_jobs != 1:
            feature_set = create_some_columns_parallel(names_datasets, function, n_jobs=n_jobs)
        else:
            feature_set = create_some_columns(names_datasets, function)

        columns.append(feature_set)

    # Merge all feature sets into a single dataframe:
    columns = functools.reduce(
        lambda left, right: pd.merge(left, right, on=["dataset", "variable"]),
        columns,
    )

    return columns

##  mlp preds

In [25]:
import typing
import os
from tqdm.auto import tqdm
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
import pytorch_lightning as pl
import networkx as nx
import joblib
import pickle

class CausalDataset(Dataset):
    """
    A PyTorch Dataset class for handling causal discovery data.

    Attributes:
        X (np.ndarray): A 3D numpy array of shape (num_samples, 1000, 10) containing the input features.
        y (np.ndarray): A 3D numpy array of shape (num_samples, 10, 10) containing the target values.
        target_mask (np.ndarray): A 3D boolean numpy array of shape (num_samples, 10, 10) indicating the presence of target values.
    """

    def __init__(
        self,
        X: typing.List[pd.DataFrame],
        y: typing.List[pd.DataFrame]
    ) -> None:
        """
        Initializes the dataset with input features and target values.
        """

        # The shape of X is (num_samples, 1000, 10), where 1000 is number of rows and 10 is maximum number of variables
        self.X = np.zeros([len(X), 1000, 10], dtype=np.float32)

        # The shape of y is (num_samples, 10, 10), where 10 is the maximum number of variables
        self.y = np.zeros([len(X), 10, 10], dtype=np.float32)

        # The target mask is a boolean array indicating the presence of target values, it is need for model training because not all datasets have 10 variables
        self.target_mask = np.zeros([len(X), 10, 10], dtype=bool)

        for i in range(len(X)):
            self.X[i, :X[i].shape[0], :X[i].shape[1]] = X[i].values
            self.y[i, :y[i].shape[0], :y[i].shape[1]] = y[i].values
            self.target_mask[i, :y[i].shape[0], :y[i].shape[1]] = True

    def __len__(self) -> int:
        """
        Returns:
            The number of samples in the dataset.
        """

        return len(self.X)

    def __getitem__(self, idx: int) -> dict:
        """
        Retrieves the sample at the specified index.

        Args:
            idx: The index of the sample to retrieve.

        Returns:
            A dictionary containing 'X', 'y', and 'target_mask' for the specified index.
        """

        X = self.X[idx]
        y = self.y[idx]
        target_mask = self.target_mask[idx]

        return {
            'X': X,
            'y': y,
            'target_mask': target_mask
        }
def preprocessing(X: pd.DataFrame):
    """
    Preprocesses the input data for neural network.

    Args:
        X: The input data as a pandas DataFrame.

    Returns:
        tuple: A tuple containing:
            - torch.Tensor: The input data converted to a PyTorch tensor and unsqueezed.
            - torch.Tensor: A mask tensor of ones with the same shape as the input tensor.
    """

    x = torch.Tensor(X.values).unsqueeze(0)
    return x
class CausalModel(nn.Module):
    """
    A neural network model for causal discovery.

    Attributes:
        input_layer (nn.Sequential): The input layer consisting of a linear layer, ReLU activation, and another linear layer.
        conv_block (nn.Sequential): A convolutional block with multiple Conv2d layers, BatchNorm2d, and ReLU activations.
        final (nn.Sequential): The final layer consisting of a linear layer, ReLU activation, and another linear layer.
    """

    def __init__(self, d_model=64):
        """
        Args:
            d_model: The dimension of the model. Default is 64.
        """

        super().__init__()

        self.input_layer = nn.Sequential(
            nn.Linear(1, d_model),
            nn.ReLU(),
            nn.Linear(d_model, 2 * d_model)
        )

        # Define a more complex convolutional block using Conv2d
        self.conv_block = nn.Sequential(
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
        )

        self.final = nn.Sequential(
            nn.Linear(d_model, d_model),
            nn.ReLU(),
            nn.Linear(d_model, 1)
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Performs a forward pass through the network.

        Args:
            x: The input tensor. Shape: (batch_size, s)

        Returns:
            The output tensor after applying the model. Shape: (batch_size, s, s)
        """

        # Processa a entrada através da camada de entrada
        input_processed = self.input_layer(x.unsqueeze(-1))  # Shape: (batch_size, s, 2*d_model)

        # Divide em consultas (q) e chaves (k)
        q, k = input_processed.chunk(2, dim=-1)  # Cada um tem shape: (batch_size, s, d_model)

        # Realiza a atenção de produto escalar escalado
        # x: (batch_size, s, s, d_model)
        x = torch.einsum('b s i d, b s j d -> b i j d', q, k) * (x.shape[1] ** -0.5)  # Shape: (batch_size, s, s, d_model)

        # Reorganiza as dimensões para aplicar a convolução 2D
        # Conv2d espera entrada no formato (batch_size, channels, height, width)
        batch_size, s_i, s_j, d = x.shape
        x = x.permute(0, 3, 1, 2).contiguous()  # Shape: (batch_size, d_model, s_i, s_j)

        # Aplica o bloco de convolução complexo
        x = self.conv_block(x)  # Shape: (batch_size, d_model, s_i, s_j)

        # Passa pelo bloco final
        # Primeiro, reorganizamos para aplicar a camada linear
        x = x.permute(0, 2, 3, 1).contiguous()  # Shape: (batch_size, s_i, s_j, d_model)
        y = self.final(x)  # Shape: (batch_size, s_i, s_j, 1)
        y = y.squeeze(-1)  # Shape: (batch_size, s_i, s_j)

        return y
class ModelWrapper(pl.LightningModule):
    """
    A PyTorch Lightning Module wrapper for a causal model.

    Attributes:
        model (CausalModel): The causal model being wrapped.
        train_criterion (nn.BCEWithLogitsLoss): The loss function used for training, which is Binary Cross-Entropy with a class weight of 5.0 for the positive class.
    """

    def __init__(self, d_model=128):
        """
        Args:
            d_model: The dimension of the model. Default is 64.
        """

        super().__init__()

        self.model = CausalModel(d_model)

        # The loss function is Binary Cross-Entropy with a class weight of 5.0 for the positive class, to account for class imbalance.
        self.train_criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(5.0))

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Performs a forward pass through the model.
        """

        return self.model(x)

    def configure_optimizers(self):
        """
        Configures the optimizer and learning rate scheduler for training.
        """

        # We use the Adam optimizer with a learning rate of 1e-3.
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)

        # Initialize StepLR scheduler with verbose=True
        scheduler = torch.optim.lr_scheduler.StepLR(
            optimizer,
            step_size=7,  # Number of epochs between each learning rate update
            gamma=0.1,    # Multiplicative factor of learning rate decay
            verbose=True  # Enables logging of learning rate updates
        )

        return [optimizer], [scheduler]

    def training_step(self, train_batch: dict, batch_idx: int):
        """
        Defines a single training step, including the computation of the loss and logging.
        """

        x = train_batch['X']
        y = train_batch['y']
        target_mask = train_batch['target_mask']

        preds = self(x)

        loss = self.train_criterion(preds[target_mask], y[target_mask])

        self.log(
            "train_loss",
            loss,
            on_step=True,
            on_epoch=True,
            prog_bar=True
        )

        return loss

    def validation_step(self, val_batch: dict, batch_idx: int):
        x = val_batch['X']
        y = val_batch['y']
        target_mask = val_batch['target_mask']

        preds = self(x)

        val_loss = self.train_criterion(preds[target_mask], y[target_mask])

        self.log(
            "val_loss",
            val_loss,
            on_step=False,
            on_epoch=True,
            prog_bar=True
        )
    def test_step(self, test_batch: dict, batch_idx: int):
        x = test_batch['X']
        y = test_batch['y']
        target_mask = test_batch['target_mask']

        preds = self(x)

        test_loss = self.train_criterion(preds[target_mask], y[target_mask])

        self.log(
            "test_loss",
            test_loss,
            on_step=False,
            on_epoch=True,
            prog_bar=True
        )
def transform_proba_to_DAG(
    nodes: typing.List[str],
    pred: np.ndarray
) -> np.ndarray:
    """
    Transforms a probability matrix into a Directed Acyclic Graph (DAG).

    Parameters:
        nodes: A list of node names.
        pred: A 2D numpy array representing the probability matrix.

    Returns:
        A 2D numpy array representing the adjacency matrix of the DAG.
    """

    G = nx.DiGraph()
    G.add_nodes_from(nodes)
    G.add_edge('X', 'Y')

    x_index, y_index = np.unravel_index(np.argsort(pred.ravel())[::-1], pred.shape)
    for i, j in zip(x_index, y_index):
        n1 = nodes[i]
        n2 = nodes[j]

        if i == j:
            continue

        if ((n1 == 'X') and (n2 == 'Y')) or ((n1 == 'Y') and (n2 == 'X')):
            continue

        if pred[i, j] > 0.5:
            G.add_edge(n1, n2)

            if not nx.is_directed_acyclic_graph(G):
                G.remove_edge(n1, n2)

    G = nx.to_numpy_array(G)
    return G
def graph_nodes_representation(graph, nodelist):
    """
    Create an alternative representation of a graph which is hashable
    and equivalent graphs have the same hash.

    Python cannot PROPERLY use nx.Graph/DiGraph as key for
    dictionaries, because two equivalent graphs with just different
    order of the nodes would result in different keys. This is
    undesirable here.

    So here we transform the graph into an equivalent form that is
    based on a specific nodelist and that is hashable. In this way,
    two equivalent graphs, once transformed, will result in identical
    keys.

    So we use the following trick: extract the adjacency matrix
    (with nodes in a fixed order) and then make a hashable thing out
    of it, through tuple(array.flatten()):
    """

    # This get the adjacency matrix with nodes in a given order, as
    # numpy array (which is not hashable):
    adjacency_matrix = nx.adjacency_matrix(graph, nodelist=nodelist).todense()

    # This transforms the numpy array into a hashable object:
    hashable = tuple(adjacency_matrix.flatten())

    return hashable
def create_graph_label():
    """
    Create a dictionary from graphs to labels, in two formats.
    """

    graph_label = {
        nx.DiGraph([("X", "Y"), ("v", "X"), ("v", "Y")]): "Confounder",
        nx.DiGraph([("X", "Y"), ("X", "v"), ("Y", "v")]): "Collider",
        nx.DiGraph([("X", "Y"), ("X", "v"), ("v", "Y")]): "Mediator",
        nx.DiGraph([("X", "Y"), ("v", "X")]): "Cause of X",
        nx.DiGraph([("X", "Y"), ("v", "Y")]): "Cause of Y",
        nx.DiGraph([("X", "Y"), ("X", "v")]): "Consequence of X",
        nx.DiGraph([("X", "Y"), ("Y", "v")]): "Consequence of Y",
        nx.DiGraph({"X": ["Y"], "v": []}): "Independent",
    }

    nodelist = ["v", "X", "Y"]

    # This is an equivalent alternative to graph_label but in a form for which two equivalent graphs have the same key:
    adjacency_label = {
        graph_nodes_representation(graph, nodelist): label
        for graph, label in graph_label.items()
    }

    return graph_label, adjacency_label
def get_labels(adjacency_matrix, adjacency_label):
    """
    Transform an adjacency_matrix (as pd.DataFrame) into a dictionary of variable:label
    """

    result = {}
    for variable in adjacency_matrix.columns.drop(["X", "Y"]):
        submatrix = adjacency_matrix.loc[[variable, "X", "Y"], [variable, "X", "Y"]]  # this is not hashable
        key = tuple(submatrix.values.flatten())  # this is hashable and compatible with adjacency_label

        result[variable] = adjacency_label[key]

    return result
def efetua_pred_mlp_train():
    X_train = joblib.load('DATA/' + 'X_train.pkl')
    # load folds -----------------------------------------------------------------------------------------------
    folds =  pickle.load(open('D:/repositorios/CAUSAL_ML/causality-discovery-default/resources/folds.pkl', 'rb'))
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    for fold, data in folds.items():
        #EVAL ------------------------------------------------------------------
        dict_pred = {}
        # Carregar o melhor modelo salvo pelo ModelCheckpoint
        best_model_path = f'D:/repositorios/CAUSAL_ML/causality-discovery-default/resources/best-model{fold}.ckpt'
        print(f"Melhor modelo salvo em: {best_model_path}")

        # Carregar o modelo a partir do checkpoint
        best_model = ModelWrapper.load_from_checkpoint(best_model_path)

        model = best_model.eval()
        model = model.to(device)

        y_pred = []
        y_true = []
        for name in tqdm(data['valid_keys']):
            X = X_train[name]
            x = preprocessing(X).to(device) 
            # print('name', name)
            # print('x--------------------------------------------------------')
            # print(x[:5])
            # print(x.shape)

            # Model Inference
            graph_label, adjacency_label = create_graph_label()

            with torch.no_grad():
                pred = model(x)[0]
                pred = torch.sigmoid(pred)
                pred = pred.cpu().numpy()

            nodes = list(X.columns)
            pred = transform_proba_to_DAG(nodes, pred).astype(int)
            A = pd.DataFrame(pred, columns=nodes, index=nodes)

            predicted_label = get_labels(A, adjacency_label)
            # print('predicted_label--------------------------------------------------------')
            # print(predicted_label)

            for key in predicted_label.keys():
                y_pred.append(predicted_label[key])
                # print(f'{name}_{key}: {predicted_label[key]}')
                dict_pred[f'{name}_{key}'] = predicted_label[key]

        y_pred = pd.Series(y_pred)

        folds[fold] = {}
        folds[fold]['train_keys'] = data['train_keys']
        folds[fold]['valid_keys'] = data['valid_keys']
        folds[fold]['best_model_path'] = best_model_path
        folds[fold]['dict_pred'] = dict_pred
        folds[fold]['scores'] = data['scores']
        folds[fold]['y_pred'] = y_pred
        folds[fold]['y_true'] = y_true

    return folds
def efetua_pred_mlp_test(X_test, path_model, d_model=128):
    X_train = X_test
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    folds = {}

    #EVAL ------------------------------------------------------------------
    dict_pred = {}
    # Carregar o melhor modelo salvo pelo ModelCheckpoint
    best_model_path = path_model

    # Carregar o modelo a partir do checkpoint
    best_model = ModelWrapper.load_from_checkpoint(best_model_path, d_model=d_model)

    model = best_model.eval()
    model = model.to(device)

    y_pred = []
    for name in tqdm(X_train.keys()):
        X = X_train[name]
        x = preprocessing(X).to(device) 
        # print('name', name)
        # print('x--------------------------------------------------------')
        # print(x[:5])
        # print(x.shape)

        # Model Inference
        graph_label, adjacency_label = create_graph_label()

        with torch.no_grad():
            pred = model(x)[0]
            pred = torch.sigmoid(pred)
            pred = pred.cpu().numpy()

        nodes = list(X.columns)
        pred = transform_proba_to_DAG(nodes, pred).astype(int)
        A = pd.DataFrame(pred, columns=nodes, index=nodes)

        predicted_label = get_labels(A, adjacency_label)
        # print('predicted_label--------------------------------------------------------')
        # print(predicted_label)

        for key in predicted_label.keys():
            # print(f'{name}_{key}: {predicted_label[key]}')
            dict_pred[f'{name}_{key}'] = predicted_label[key]

    return dict_pred
import pandas as pd
import joblib
from tqdm import tqdm
import torch
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
def efetua_pred_mlp_test(X_test, path_model,d_model=256): 
    X_train = X_test.copy()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    folds = {}

    # Carregar o melhor modelo salvo pelo ModelCheckpoint
    best_model_path = path_model
    print(f"Melhor modelo salvo em: {best_model_path}")

    # Carregar o modelo a partir do checkpoint
    best_model = ModelWrapper.load_from_checkpoint(best_model_path, d_model=d_model)

    model = best_model.eval()
    model = model.to(device)

    data_frames = []


    
    # Load the model from the checkpoint
    best_model = ModelWrapper.load_from_checkpoint(best_model_path, d_model=d_model)

    model = best_model.eval()
    model = model.to(device)

    for name in tqdm(X_train.keys()):
        X = X_train[name]
        x = preprocessing(X).to(device) 
        nodes = list(X.columns)

        # Model Inference
        graph_label, adjacency_label = create_graph_label()

        with torch.no_grad():
            pred, array = model(x)
            # print('pred', pred[0])
            # print('pred.shape', pred[0].shape)
            # print('array', array[0])
            # print('array.shape', array[0].shape)
            pred = torch.sigmoid(pred[0]).cpu().numpy()
            array_np = array[0].cpu().numpy() 

            pred_ = pred.copy()
            pred = transform_proba_to_DAG(nodes, pred).astype(int)

            # Get DataFrames
            preds_sigmoid_data = get_agg_preds(pred_, f'psig_{d_model}', nodes, name)
            preds_adjacency_data = get_agg_preds(pred, f'padj_{d_model}', nodes, name)

            # Get predicted labels
            A = pd.DataFrame(pred, columns=nodes, index=nodes)
            predicted_label = get_labels(A, adjacency_label)

            # Create DataFrame for predicted labels
            preds_predicted_label_df = pd.DataFrame.from_dict(
                predicted_label, orient='index', columns=['preds_predicted_label']
            ).reset_index().rename(columns={'index': 'key'})
            preds_predicted_label_df['name'] = name

            # Create DataFrame from 'array_np'
            feature_columns = [f'feature_{i}' for i in range(array_np.shape[1])]
            array_df = pd.DataFrame(array_np, columns=feature_columns)
            array_df['key'] = nodes
            array_df['name'] = name

            # Merge DataFrames
            merged_df = preds_sigmoid_data.merge(preds_adjacency_data, on=['name', 'key'], how='outer')
            merged_df = merged_df.merge(preds_predicted_label_df, on=['name', 'key'], how='left')
            merged_df = merged_df.merge(array_df, on=['name', 'key'], how='left')

            # Add 'fold' column
            merged_df['fold'] = 0

            # Append merged_df to data_frames list
            data_frames.append(merged_df)

    # After processing all data, concatenate all DataFrames
    df = pd.concat(data_frames, ignore_index=True)
    print('Dados salvos com sucesso.')
    print(df.head())

    return df
class CausalModel(nn.Module):
    """
    A neural network model for causal discovery.

    Attributes:
        input_layer (nn.Sequential): The input layer consisting of a linear layer, ReLU activation, and another linear layer.
        conv_block (nn.Sequential): A convolutional block with multiple Conv2d layers, BatchNorm2d, and ReLU activations.
        final (nn.Sequential): The final layer consisting of a linear layer, ReLU activation, and another linear layer.
    """

    def __init__(self, d_model=64):
        """
        Args:
            d_model: The dimension of the model. Default is 64.
        """

        super().__init__()

        self.input_layer = nn.Sequential(
            nn.Linear(1, d_model),
            nn.ReLU(),
            nn.Linear(d_model, 2 * d_model)
        )

        # Define a more complex convolutional block using Conv2d
        self.conv_block = nn.Sequential(
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=d_model,
                out_channels=d_model,
                kernel_size=3,
                padding=1
            ),
            nn.BatchNorm2d(d_model),
            nn.ReLU(),
        )

        self.final = nn.Sequential(
            nn.Linear(d_model, d_model),
            nn.ReLU(),
            nn.Linear(d_model, 1)
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Performs a forward pass through the network.

        Args:
            x: The input tensor. Shape: (batch_size, s)

        Returns:
            The output tensor after applying the model. Shape: (batch_size, s, s)
        """

        # Processa a entrada através da camada de entrada
        input_processed = self.input_layer(x.unsqueeze(-1))  # Shape: (batch_size, s, 2*d_model)

        # Divide em consultas (q) e chaves (k)
        q, k = input_processed.chunk(2, dim=-1)  # Cada um tem shape: (batch_size, s, d_model)

        # Realiza a atenção de produto escalar escalado
        # x: (batch_size, s, s, d_model)
        x = torch.einsum('b s i d, b s j d -> b i j d', q, k) * (x.shape[1] ** -0.5)  # Shape: (batch_size, s, s, d_model)

        # Reorganiza as dimensões para aplicar a convolução 2D
        # Conv2d espera entrada no formato (batch_size, channels, height, width)
        batch_size, s_i, s_j, d = x.shape
        x = x.permute(0, 3, 1, 2).contiguous()  # Shape: (batch_size, d_model, s_i, s_j)

        # Aplica o bloco de convolução complexo
        x = self.conv_block(x)  # Shape: (batch_size, d_model, s_i, s_j)

        # Passa pelo bloco final
        # Primeiro, reorganizamos para aplicar a camada linear
        x = x.permute(0, 2, 3, 1).contiguous()  # Shape: (batch_size, s_i, s_j, d_model)
        # Save intermediate array
        intermediate_array = x

        # Aggregate over s_j dimension
        aggregated_array = intermediate_array.mean(dim=2)

        # Pass through final layers
        y = self.final(x)
        y = y.squeeze(-1)

        return y, aggregated_array
def get_agg_preds(A_hat, name_, nodes, name_df):
    predictions = {}
    # Compute centrality measures
    A_hat = pd.DataFrame(A_hat, columns=nodes, index=nodes)
    g_hat = nx.from_pandas_adjacency(A_hat, create_using=nx.DiGraph)

    betweenness = nx.betweenness_centrality(g_hat)
    closeness = nx.closeness_centrality(g_hat)
    pagerank = nx.pagerank(g_hat)
    in_degree_dict = dict(g_hat.in_degree())
    out_degree_dict = dict(g_hat.out_degree())

    name = name_
    
    for i in nodes:
        predictions[i] = {}
        # Existing features
        predictions[i][f'{name}_count_widout_x_y'] = A_hat.loc[i].drop(['X', 'Y']).count()
        predictions[i][f'{name}_sum_widout_x_y'] = A_hat.loc[i].drop(['X', 'Y']).sum()
        predictions[i][f'{name}_mean_widout_x_y'] = A_hat.loc[i].drop(['X', 'Y']).mean()
        predictions[i][f'{name}_std_widout_x_y'] = A_hat.loc[i].drop(['X', 'Y']).std()
        predictions[i][f'{name}_count'] = A_hat.loc[i].count()
        predictions[i][f'{name}_sum'] = A_hat.loc[i].sum()
        predictions[i][f'{name}_mean'] = A_hat.loc[i].mean()
        predictions[i][f'{name}_std'] = A_hat.loc[i].std()
        predictions[i][f'{name}_has_x'] = A_hat.loc[i]['X']
        predictions[i][f'{name}_has_y'] = A_hat.loc[i]['Y']
        
        # New features
        # Centrality measures
        predictions[i][f'{name}_betweenness'] = betweenness[i]
        predictions[i][f'{name}_closeness'] = closeness[i]
        predictions[i][f'{name}_pagerank'] = pagerank[i]
        # Degree
        predictions[i][f'{name}_in_degree'] = in_degree_dict[i]
        predictions[i][f'{name}_out_degree'] = out_degree_dict[i]
        predictions[i][f'{name}_total_degree'] = in_degree_dict[i] + out_degree_dict[i]
        
        # Shortest path to 'X' and 'Y'
        try:
            predictions[i][f'{name}_shortest_path_to_X'] = nx.shortest_path_length(g_hat, source=i, target='X')
        except nx.NetworkXNoPath:
            predictions[i][f'{name}_shortest_path_to_X'] = np.nan
        try:
            predictions[i][f'{name}_shortest_path_to_Y'] = nx.shortest_path_length(g_hat, source=i, target='Y')
        except nx.NetworkXNoPath:
            predictions[i][f'{name}_shortest_path_to_Y'] = np.nan

        # Clustering coefficient
        predictions[i][f'{name}_clustering_coef'] = nx.clustering(g_hat.to_undirected(), i)
        
        # Is ancestor/descendant of 'X' or 'Y'
        predictions[i][f'{name}_is_ancestor_of_X'] = 1 if nx.has_path(g_hat, i, 'X') else 0
        predictions[i][f'{name}_is_ancestor_of_Y'] = 1 if nx.has_path(g_hat, i, 'Y') else 0
        predictions[i][f'{name}_is_descendant_of_X'] = 1 if nx.has_path(g_hat, 'X', i) else 0
        predictions[i][f'{name}_is_descendant_of_Y'] = 1 if nx.has_path(g_hat, 'Y', i) else 0

    df = pd.DataFrame(predictions).T

    df['name'] = name_df
    df['key'] = df.index

     
    return df
def load_folds_test_parquet(test_pred, Xtest, d_model):
    df__ = Xtest.copy()
    folds = test_pred.copy()
    folds.rename(columns={'name': 'dataset', 'key':'variable'}, inplace=True)
    folds = folds[~folds['variable'].str.contains('Y')]
    folds = folds[~folds['variable'].str.contains('X')]

    le = LabelEncoder()
    le.classes_ = np.array([
        'Cause of X', 'Consequence of X', 'Confounder', 'Collider',
        'Mediator', 'Independent', 'Cause of Y', 'Consequence of Y',
    ])
    folds[f"preds_predicted_label_{d_model}"] = le.transform(folds["preds_predicted_label"])
    folds.drop(columns=['preds_predicted_label'], inplace=True)

    # se df não tiver a coluna 'fold', deleta fold do folds
    if 'fold' in df__.columns:
        folds.drop(columns=['fold'], inplace=True)
    
    n_components=16
    
    feature_columns = [col for col in folds.columns if col.startswith('feature_')]
    features = folds[feature_columns]
    pca = PCA(n_components=n_components)
    principal_components = pca.fit_transform(features)

    pca_columns = [f'PCA_{d_model}_{i+1}' for i in range(n_components)]
    pca_df = pd.DataFrame(data=principal_components, columns=pca_columns, index=folds.index)

    folds.drop(columns=feature_columns, inplace=True)
    folds = pd.concat([folds, pca_df], axis=1)

    df_merge = pd.merge(df__, folds, on=['dataset', 'variable'], how='left')

    return df_merge

from itertools import islice

def take_items_from_to(dictionary, start=0, end=1000):
    return dict(islice(dictionary.items(), start, end))

## CrunchDAO Code Interface

[Submitting to the CrunchDAO platform requires 2 functions, `train` and `infer`.](https://docs.crunchdao.com/competitions/code-interface) Any line that is not in a function or is not an import will be commented when the notebook is processed.

The content of the function is the same as the example, but the train must save the model to be read in infer. This allows for more predictable behavior if the program is restarted without training.

In [19]:
# Uncomment what you need!
def train(
    X_train: typing.Dict[str, pd.DataFrame],
    y_train: typing.Dict[str, pd.DataFrame],
    # number_of_features: int,
    model_directory_path: str,
    # id_column_name: str,
    # prediction_column_name: str,
    # has_gpu: bool,
) -> None:
    
    pass

In [20]:
def create_submission(X_y_pred_test):
    """
    From the predicted test set, for each dataset, take predicted
    classes of all variables, create the adjacency matrix, then create
    the submission in the requested format.
    """

    submission = {}
    for name, prediction in tqdm(X_y_pred_test.groupby("dataset"), delay=10):
        variables_labels = prediction[["variable", "label_predicted"]].set_index("variable")
        variables = variables_labels.index.tolist()
        variables_all = ["X", "Y"] + variables

        adjacency_matrix = pd.DataFrame(index=variables_all, columns=variables_all)
        adjacency_matrix.index.name = "parent"
        adjacency_matrix[:] = 0
        adjacency_matrix.loc["X", "Y"] = 1

        for v in variables:
            l = variables_labels.loc[v].item()
            if l == "Cause of X":
                adjacency_matrix.loc[v, "X"] = 1
            elif l == "Cause of Y":
                adjacency_matrix.loc[v, "Y"] = 1
            elif l == "Consequence of X":
                adjacency_matrix.loc["X", v] = 1
            elif l == "Consequence of Y":
                adjacency_matrix.loc["Y", v] = 1
            elif l == "Confounder":
                adjacency_matrix.loc[v, "X"] = 1
                adjacency_matrix.loc[v, "Y"] = 1
            elif l == "Collider":
                adjacency_matrix.loc["X", v] = 1
                adjacency_matrix.loc["Y", v] = 1
            elif l == "Mediator":
                adjacency_matrix.loc["X", v] = 1
                adjacency_matrix.loc[v, "Y"] = 1
            elif l == "Confounder":
                pass

        for i in variables_all:
            for j in variables_all:
                submission[f'{name}_{i}_{j}'] = int(adjacency_matrix.loc[i, j])

    return submission

In [23]:
def infer(
    X_test: typing.Dict[str, pd.DataFrame],
    # number_of_features: int,
    model_directory_path: str,
    id_column_name: str,
    prediction_column_name: str,
    # has_gpu: bool,
    # has_trained: bool,
) -> pd.DataFrame:
    
    # X_test = take_items_from_to(X_test, 0, 10)
    
    
    model = joblib.load(os.path.join(model_directory_path, "model_treino.pkl"))
    model = model[1]
    top_features = joblib.load(os.path.join(model_directory_path, "colunas_treino.pkl"))

    display(top_features)
    display(model)

    path_model = f'resources/256/best-model1_256.ckpt'
    df_pred = efetua_pred_mlp_test(X_test, path_model, d_model=256)
    
    names_datasets_test = X_test
    from functools import partial
    functions_dict = {}

    for variant in ['original', 'stable', 'parallel']:
        for ci_test in ['fisherz']:
            for alpha in [0.01, 0.05, 0.1]:
                function_name = f'generate_pc_{variant}_{ci_test}_{alpha}'
                function = partial(generate_pc_parallel, variant_=variant, ci_test_=ci_test, alpha_=alpha)
                function.__name__ = function_name
                functions_dict[function] = names_datasets_test

    functions_dict[advanced_nonlinear_feature_extraction] = names_datasets_test
    functions_dict[ttest] = names_datasets_test

    names_datasets_test = X_test
    X_group_test = create_all_columns(
        functions_dict,
        n_jobs=-1,
    )

    # ajuste dos nomes das colunas
    X_group_test.columns = [col.replace(':', '_').replace(',', '_').replace('{', '_').replace('}', '_') for col in X_group_test.columns]

    X_group_test = load_folds_test_parquet(df_pred, X_group_test, 256)

    # mostrar todas colunas
    pd.set_option('display.max_columns', None)
    display(X_group_test.columns)
    display(X_group_test.head())
    display(X_group_test[top_features])
    pd.reset_option('display.max_columns')

    X_test = X_group_test[top_features].copy()
    y_predicted = model.predict(X_test[top_features])
    X_y_pred_test = X_group_test
    X_y_pred_test["y_predicted"] = y_predicted

    print(X_y_pred_test)

    le = LabelEncoder()
    le.classes_ = np.array([
        'Cause of X', 'Consequence of X', 'Confounder', 'Collider',
        'Mediator', 'Independent', 'Cause of Y', 'Consequence of Y',
    ])

    X_y_pred_test["label_predicted"] = le.inverse_transform(y_predicted)

    submission = create_submission(X_y_pred_test)

    print(submission)

    return pd.DataFrame(
        submission.items(),
        columns=[
            id_column_name,
            prediction_column_name
        ]
    )

### Local Testing

Before submitting your notebook, please make sure that it works locally so that you do not waste compute time. The `crunch.test()` will execute your code with the same logic as it will in the cloud environment.

In [26]:
crunch.test(
    no_determinism_check=True
)

print("Download this notebook and submit it to the platform: https://hub.crunchdao.com/competitions/causality-discovery/submit/via/notebook")

[32m22:42:09[0m [33mno forbidden library found[0m
[32m22:42:09[0m [33m[0m
[32m22:42:09[0m started
[32m22:42:09[0m running local test
[32m22:42:09[0m [33minternet access isn't restricted, no check will be done[0m
[32m22:42:09[0m 
[32m22:42:20[0m starting dag process...


download data\X_train.pickle from https:crunchdao--competition--production.s3.eu-west-1.amazonaws.com/data-releases/48/X_train.pickle (1523944532 bytes)
already exists: file length match
download data\y_train.pickle from https:crunchdao--competition--production.s3.eu-west-1.amazonaws.com/data-releases/48/y_train.pickle (7017735 bytes)
already exists: file length match
download data\X_test.pickle from https:crunchdao--competition--production.s3.eu-west-1.amazonaws.com/data-releases/48/X_test_reduced.pickle (122341879 bytes)
already exists: file length match
download data\y_test.pickle from https:crunchdao--competition--production.s3.eu-west-1.amazonaws.com/data-releases/48/y_test_reduced.pickle (562930 bytes)
already exists: file length match
download data\example_prediction.parquet from https:crunchdao--competition--production.s3.eu-west-1.amazonaws.com/data-releases/48/example_prediction_reduced.parquet (668981 bytes)
already exists: file length match


[32m22:42:42[0m [33mcall: train[0m
[32m22:42:42[0m [33mcall: infer[0m
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


['pc_original_fisherz_0.01_count_widout_x_y',
 'pc_original_fisherz_0.01_sum_widout_x_y',
 'pc_original_fisherz_0.01_mean_widout_x_y',
 'pc_original_fisherz_0.01_std_widout_x_y',
 'pc_original_fisherz_0.01_count',
 'pc_original_fisherz_0.01_sum',
 'pc_original_fisherz_0.01_mean',
 'pc_original_fisherz_0.01_std',
 'pc_original_fisherz_0.01_has_x',
 'pc_original_fisherz_0.01_has_y',
 'pc_original_fisherz_0.01_betweenness',
 'pc_original_fisherz_0.01_closeness',
 'pc_original_fisherz_0.01_pagerank',
 'pc_original_fisherz_0.01_in_degree',
 'pc_original_fisherz_0.01_out_degree',
 'pc_original_fisherz_0.01_total_degree',
 'pc_original_fisherz_0.01_shortest_path_to_X',
 'pc_original_fisherz_0.01_shortest_path_to_Y',
 'pc_original_fisherz_0.01_clustering_coef',
 'pc_original_fisherz_0.01_is_ancestor_of_X',
 'pc_original_fisherz_0.01_is_ancestor_of_Y',
 'pc_original_fisherz_0.01_is_descendant_of_X',
 'pc_original_fisherz_0.01_is_descendant_of_Y',
 'pc_original_fisherz_0.05_count_widout_x_y',
 '

Melhor modelo salvo em: resources/256/best-model1_256.ckpt


100%|██████████| 10/10 [00:01<00:00,  6.59it/s]


Dados salvos com sucesso.
   psig_256_count_widout_x_y  psig_256_sum_widout_x_y  \
0                        1.0                 0.000003   
1                        1.0                 0.720957   
2                        1.0                 0.117053   
3                        2.0                 0.789590   
4                        2.0                 0.136575   

   psig_256_mean_widout_x_y  psig_256_std_widout_x_y  psig_256_count  \
0                  0.000003                      NaN             3.0   
1                  0.720957                      NaN             3.0   
2                  0.117053                      NaN             3.0   
3                  0.394795                 0.558325             4.0   
4                  0.068288                 0.096573             4.0   

   psig_256_sum  psig_256_mean  psig_256_std  psig_256_has_x  psig_256_has_y  \
0      1.769196       0.589732      0.510737    8.887748e-01    8.804184e-01   
1      1.631600       0.543867      0.

100%|██████████| 10/10 [00:00<00:00, 241.49it/s]


set: generate_pc_original_fisherz_0.05


100%|██████████| 10/10 [00:00<00:00, 3181.12it/s]


set: generate_pc_original_fisherz_0.1


100%|██████████| 10/10 [00:00<00:00, 3260.50it/s]


set: generate_pc_stable_fisherz_0.01


100%|██████████| 10/10 [00:00<00:00, 3130.31it/s]


set: generate_pc_stable_fisherz_0.05


100%|██████████| 10/10 [00:00<00:00, 3094.74it/s]


set: generate_pc_stable_fisherz_0.1


100%|██████████| 10/10 [00:00<00:00, 2131.04it/s]


set: generate_pc_parallel_fisherz_0.01


100%|██████████| 10/10 [00:00<00:00, 3895.16it/s]


set: generate_pc_parallel_fisherz_0.05


100%|██████████| 10/10 [00:00<00:00, 3221.93it/s]


set: generate_pc_parallel_fisherz_0.1


100%|██████████| 10/10 [00:00<00:00, 4880.50it/s]


set: advanced_nonlinear_feature_extraction


100%|██████████| 10/10 [00:00<00:00, 3244.61it/s]


set: ttest


100%|██████████| 10/10 [00:00<00:00, 4866.35it/s]


Index(['dataset', 'variable', 'pc_original_fisherz_0.01_count_widout_x_y',
       'pc_original_fisherz_0.01_sum_widout_x_y',
       'pc_original_fisherz_0.01_mean_widout_x_y',
       'pc_original_fisherz_0.01_std_widout_x_y',
       'pc_original_fisherz_0.01_count', 'pc_original_fisherz_0.01_sum',
       'pc_original_fisherz_0.01_mean', 'pc_original_fisherz_0.01_std',
       ...
       'PCA_256_7', 'PCA_256_8', 'PCA_256_9', 'PCA_256_10', 'PCA_256_11',
       'PCA_256_12', 'PCA_256_13', 'PCA_256_14', 'PCA_256_15', 'PCA_256_16'],
      dtype='object', length=348)

Unnamed: 0,dataset,variable,pc_original_fisherz_0.01_count_widout_x_y,pc_original_fisherz_0.01_sum_widout_x_y,pc_original_fisherz_0.01_mean_widout_x_y,pc_original_fisherz_0.01_std_widout_x_y,pc_original_fisherz_0.01_count,pc_original_fisherz_0.01_sum,pc_original_fisherz_0.01_mean,pc_original_fisherz_0.01_std,pc_original_fisherz_0.01_has_x,pc_original_fisherz_0.01_has_y,pc_original_fisherz_0.01_betweenness,pc_original_fisherz_0.01_closeness,pc_original_fisherz_0.01_pagerank,pc_original_fisherz_0.01_in_degree,pc_original_fisherz_0.01_out_degree,pc_original_fisherz_0.01_total_degree,pc_original_fisherz_0.01_shortest_path_to_X,pc_original_fisherz_0.01_shortest_path_to_Y,pc_original_fisherz_0.01_clustering_coef,pc_original_fisherz_0.01_is_ancestor_of_X,pc_original_fisherz_0.01_is_ancestor_of_Y,pc_original_fisherz_0.01_is_descendant_of_X,pc_original_fisherz_0.01_is_descendant_of_Y,pc_original_fisherz_0.05_count_widout_x_y,pc_original_fisherz_0.05_sum_widout_x_y,pc_original_fisherz_0.05_mean_widout_x_y,pc_original_fisherz_0.05_std_widout_x_y,pc_original_fisherz_0.05_count,pc_original_fisherz_0.05_sum,pc_original_fisherz_0.05_mean,pc_original_fisherz_0.05_std,pc_original_fisherz_0.05_has_x,pc_original_fisherz_0.05_has_y,pc_original_fisherz_0.05_betweenness,pc_original_fisherz_0.05_closeness,pc_original_fisherz_0.05_pagerank,pc_original_fisherz_0.05_in_degree,pc_original_fisherz_0.05_out_degree,pc_original_fisherz_0.05_total_degree,pc_original_fisherz_0.05_shortest_path_to_X,pc_original_fisherz_0.05_shortest_path_to_Y,pc_original_fisherz_0.05_clustering_coef,pc_original_fisherz_0.05_is_ancestor_of_X,pc_original_fisherz_0.05_is_ancestor_of_Y,pc_original_fisherz_0.05_is_descendant_of_X,pc_original_fisherz_0.05_is_descendant_of_Y,pc_original_fisherz_0.1_count_widout_x_y,pc_original_fisherz_0.1_sum_widout_x_y,pc_original_fisherz_0.1_mean_widout_x_y,pc_original_fisherz_0.1_std_widout_x_y,pc_original_fisherz_0.1_count,pc_original_fisherz_0.1_sum,pc_original_fisherz_0.1_mean,pc_original_fisherz_0.1_std,pc_original_fisherz_0.1_has_x,pc_original_fisherz_0.1_has_y,pc_original_fisherz_0.1_betweenness,pc_original_fisherz_0.1_closeness,pc_original_fisherz_0.1_pagerank,pc_original_fisherz_0.1_in_degree,pc_original_fisherz_0.1_out_degree,pc_original_fisherz_0.1_total_degree,pc_original_fisherz_0.1_shortest_path_to_X,pc_original_fisherz_0.1_shortest_path_to_Y,pc_original_fisherz_0.1_clustering_coef,pc_original_fisherz_0.1_is_ancestor_of_X,pc_original_fisherz_0.1_is_ancestor_of_Y,pc_original_fisherz_0.1_is_descendant_of_X,pc_original_fisherz_0.1_is_descendant_of_Y,pc_stable_fisherz_0.01_count_widout_x_y,pc_stable_fisherz_0.01_sum_widout_x_y,pc_stable_fisherz_0.01_mean_widout_x_y,pc_stable_fisherz_0.01_std_widout_x_y,pc_stable_fisherz_0.01_count,pc_stable_fisherz_0.01_sum,pc_stable_fisherz_0.01_mean,pc_stable_fisherz_0.01_std,pc_stable_fisherz_0.01_has_x,pc_stable_fisherz_0.01_has_y,pc_stable_fisherz_0.01_betweenness,pc_stable_fisherz_0.01_closeness,pc_stable_fisherz_0.01_pagerank,pc_stable_fisherz_0.01_in_degree,pc_stable_fisherz_0.01_out_degree,pc_stable_fisherz_0.01_total_degree,pc_stable_fisherz_0.01_shortest_path_to_X,pc_stable_fisherz_0.01_shortest_path_to_Y,pc_stable_fisherz_0.01_clustering_coef,pc_stable_fisherz_0.01_is_ancestor_of_X,pc_stable_fisherz_0.01_is_ancestor_of_Y,pc_stable_fisherz_0.01_is_descendant_of_X,pc_stable_fisherz_0.01_is_descendant_of_Y,pc_stable_fisherz_0.05_count_widout_x_y,pc_stable_fisherz_0.05_sum_widout_x_y,pc_stable_fisherz_0.05_mean_widout_x_y,pc_stable_fisherz_0.05_std_widout_x_y,pc_stable_fisherz_0.05_count,pc_stable_fisherz_0.05_sum,pc_stable_fisherz_0.05_mean,pc_stable_fisherz_0.05_std,pc_stable_fisherz_0.05_has_x,pc_stable_fisherz_0.05_has_y,pc_stable_fisherz_0.05_betweenness,pc_stable_fisherz_0.05_closeness,pc_stable_fisherz_0.05_pagerank,pc_stable_fisherz_0.05_in_degree,pc_stable_fisherz_0.05_out_degree,pc_stable_fisherz_0.05_total_degree,pc_stable_fisherz_0.05_shortest_path_to_X,pc_stable_fisherz_0.05_shortest_path_to_Y,pc_stable_fisherz_0.05_clustering_coef,pc_stable_fisherz_0.05_is_ancestor_of_X,pc_stable_fisherz_0.05_is_ancestor_of_Y,pc_stable_fisherz_0.05_is_descendant_of_X,pc_stable_fisherz_0.05_is_descendant_of_Y,pc_stable_fisherz_0.1_count_widout_x_y,pc_stable_fisherz_0.1_sum_widout_x_y,pc_stable_fisherz_0.1_mean_widout_x_y,pc_stable_fisherz_0.1_std_widout_x_y,pc_stable_fisherz_0.1_count,pc_stable_fisherz_0.1_sum,pc_stable_fisherz_0.1_mean,pc_stable_fisherz_0.1_std,pc_stable_fisherz_0.1_has_x,pc_stable_fisherz_0.1_has_y,pc_stable_fisherz_0.1_betweenness,pc_stable_fisherz_0.1_closeness,pc_stable_fisherz_0.1_pagerank,pc_stable_fisherz_0.1_in_degree,pc_stable_fisherz_0.1_out_degree,pc_stable_fisherz_0.1_total_degree,pc_stable_fisherz_0.1_shortest_path_to_X,pc_stable_fisherz_0.1_shortest_path_to_Y,pc_stable_fisherz_0.1_clustering_coef,pc_stable_fisherz_0.1_is_ancestor_of_X,pc_stable_fisherz_0.1_is_ancestor_of_Y,pc_stable_fisherz_0.1_is_descendant_of_X,pc_stable_fisherz_0.1_is_descendant_of_Y,pc_parallel_fisherz_0.01_count_widout_x_y,pc_parallel_fisherz_0.01_sum_widout_x_y,pc_parallel_fisherz_0.01_mean_widout_x_y,pc_parallel_fisherz_0.01_std_widout_x_y,pc_parallel_fisherz_0.01_count,pc_parallel_fisherz_0.01_sum,pc_parallel_fisherz_0.01_mean,pc_parallel_fisherz_0.01_std,pc_parallel_fisherz_0.01_has_x,pc_parallel_fisherz_0.01_has_y,pc_parallel_fisherz_0.01_betweenness,pc_parallel_fisherz_0.01_closeness,pc_parallel_fisherz_0.01_pagerank,pc_parallel_fisherz_0.01_in_degree,pc_parallel_fisherz_0.01_out_degree,pc_parallel_fisherz_0.01_total_degree,pc_parallel_fisherz_0.01_shortest_path_to_X,pc_parallel_fisherz_0.01_shortest_path_to_Y,pc_parallel_fisherz_0.01_clustering_coef,pc_parallel_fisherz_0.01_is_ancestor_of_X,pc_parallel_fisherz_0.01_is_ancestor_of_Y,pc_parallel_fisherz_0.01_is_descendant_of_X,pc_parallel_fisherz_0.01_is_descendant_of_Y,pc_parallel_fisherz_0.05_count_widout_x_y,pc_parallel_fisherz_0.05_sum_widout_x_y,pc_parallel_fisherz_0.05_mean_widout_x_y,pc_parallel_fisherz_0.05_std_widout_x_y,pc_parallel_fisherz_0.05_count,pc_parallel_fisherz_0.05_sum,pc_parallel_fisherz_0.05_mean,pc_parallel_fisherz_0.05_std,pc_parallel_fisherz_0.05_has_x,pc_parallel_fisherz_0.05_has_y,pc_parallel_fisherz_0.05_betweenness,pc_parallel_fisherz_0.05_closeness,pc_parallel_fisherz_0.05_pagerank,pc_parallel_fisherz_0.05_in_degree,pc_parallel_fisherz_0.05_out_degree,pc_parallel_fisherz_0.05_total_degree,pc_parallel_fisherz_0.05_shortest_path_to_X,pc_parallel_fisherz_0.05_shortest_path_to_Y,pc_parallel_fisherz_0.05_clustering_coef,pc_parallel_fisherz_0.05_is_ancestor_of_X,pc_parallel_fisherz_0.05_is_ancestor_of_Y,pc_parallel_fisherz_0.05_is_descendant_of_X,pc_parallel_fisherz_0.05_is_descendant_of_Y,pc_parallel_fisherz_0.1_count_widout_x_y,pc_parallel_fisherz_0.1_sum_widout_x_y,pc_parallel_fisherz_0.1_mean_widout_x_y,pc_parallel_fisherz_0.1_std_widout_x_y,pc_parallel_fisherz_0.1_count,pc_parallel_fisherz_0.1_sum,pc_parallel_fisherz_0.1_mean,pc_parallel_fisherz_0.1_std,pc_parallel_fisherz_0.1_has_x,pc_parallel_fisherz_0.1_has_y,pc_parallel_fisherz_0.1_betweenness,pc_parallel_fisherz_0.1_closeness,pc_parallel_fisherz_0.1_pagerank,pc_parallel_fisherz_0.1_in_degree,pc_parallel_fisherz_0.1_out_degree,pc_parallel_fisherz_0.1_total_degree,pc_parallel_fisherz_0.1_shortest_path_to_X,pc_parallel_fisherz_0.1_shortest_path_to_Y,pc_parallel_fisherz_0.1_clustering_coef,pc_parallel_fisherz_0.1_is_ancestor_of_X,pc_parallel_fisherz_0.1_is_ancestor_of_Y,pc_parallel_fisherz_0.1_is_descendant_of_X,pc_parallel_fisherz_0.1_is_descendant_of_Y,granger_v_X,granger_v_Y,corr(v_X),corr(v_Y),spearman(v_X),spearman(v_Y),kendall(v_X),kendall(v_Y),mutual_info(v_X),mutual_info(v_Y),r2_lin_v_X,r2_lin_v_Y,r2_quad_v_X,r2_quad_v_Y,r2_exp_v_X,r2_exp_v_Y,shapiro_p_X,shapiro_p_Y,entropy(v),max(corr(v_ others)),min(corr(v_ others)),mean(corr(v_ others)),std(corr(v_ others)),max(cov(v_ others)),min(cov(v_ others)),mean(cov(v_ others)),std(cov(v_ others)),max(r2_lin_v_ others),min(r2_lin_v_ others),mean(r2_lin_v_ others),std(r2_lin_v_ others),max(r2_quad_v_ others),min(r2_quad_v_ others),mean(r2_quad_v_ others),std(r2_quad_v_ others),max(r2_exp_v_ others),min(r2_exp_v_ others),mean(r2_exp_v_ others),std(r2_exp_v_ others),max(mi(v_ others)),min(mi(v_ others)),mean(mi(v_ others)),std(mi(v_ others)),v_mean,v_median,v_std,v_skewness,v_kurtosis,v_min,v_max,v_range,v_iqr,v_vif,corr(X_Y),spearman(X_Y),kendall(X_Y),mutual_info(X_Y),r2_lin_X_Y,shapiro_p_XY,num_min_vals,ttest(v_X),pvalue(ttest(v_X))<=0.05,ttest(v_Y),pvalue(ttest(v_Y))<=0.05,cohen_d(v_X),cohen_d(v_Y),ci_low(v_X),ci_high(v_X),ci_low(v_Y),ci_high(v_Y),ttest(X_Y),pvalue(ttest(X_Y))<=0.05,cohen_d(X_Y),ci_lowXY,ci_highXY,psig_256_count_widout_x_y,psig_256_sum_widout_x_y,psig_256_mean_widout_x_y,psig_256_std_widout_x_y,psig_256_count,psig_256_sum,psig_256_mean,psig_256_std,psig_256_has_x,psig_256_has_y,psig_256_betweenness,psig_256_closeness,psig_256_pagerank,psig_256_in_degree,psig_256_out_degree,psig_256_total_degree,psig_256_shortest_path_to_X,psig_256_shortest_path_to_Y,psig_256_clustering_coef,psig_256_is_ancestor_of_X,psig_256_is_ancestor_of_Y,psig_256_is_descendant_of_X,psig_256_is_descendant_of_Y,padj_256_count_widout_x_y,padj_256_sum_widout_x_y,padj_256_mean_widout_x_y,padj_256_std_widout_x_y,padj_256_count,padj_256_sum,padj_256_mean,padj_256_std,padj_256_has_x,padj_256_has_y,padj_256_betweenness,padj_256_closeness,padj_256_pagerank,padj_256_in_degree,padj_256_out_degree,padj_256_total_degree,padj_256_shortest_path_to_X,padj_256_shortest_path_to_Y,padj_256_clustering_coef,padj_256_is_ancestor_of_X,padj_256_is_ancestor_of_Y,padj_256_is_descendant_of_X,padj_256_is_descendant_of_Y,fold,preds_predicted_label_256,PCA_256_1,PCA_256_2,PCA_256_3,PCA_256_4,PCA_256_5,PCA_256_6,PCA_256_7,PCA_256_8,PCA_256_9,PCA_256_10,PCA_256_11,PCA_256_12,PCA_256_13,PCA_256_14,PCA_256_15,PCA_256_16
0,12,0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.666667,0.474412,1.0,0.0,1.0,,,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,0.640603,1.0,0.221591,1.0,0.221591,1.0,0.030903,1.0,4.617702,5.471054,0.049103,1.0,0.843873,1.0,0.0,0.0,9.341969e-22,7.981483e-20,2.302585,1.0,0.221591,0.610796,0.550418,0.333667,0.073938,0.203802,0.183656,1.0,0.049103,0.524551,0.475449,1.0,0.843873,0.921937,0.078063,0.0,0.0,0.0,0.0,5.471054,4.617702,5.044378,0.426676,0.0,0.0,0.577639,-9.230235e-18,-1.200002,-0.999,0.999,1.998,0.999,inf,0.221591,0.221591,0.030903,4.615828,0.049103,2.026217e-24,True,7.793903e-17,0.0,0.0,0.0,2.464648e-18,0.0,-0.044725,0.044725,0.0,0.0,-7.793903e-17,0.0,-2.464648e-18,-0.044725,0.044725,1.0,3e-06,3e-06,,3.0,1.769196,0.589732,0.510737,0.888775,0.880418,0.0,1.0,0.358596,3.0,3.0,6.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,,3.0,2.0,0.666667,0.57735,1.0,1.0,0.0,0.0,0.19758,0.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0,2,-4.319,0.789941,1.602393,-0.437881,0.599438,-1.235067,-0.175769,0.532925,0.742901,0.891537,-0.617011,-0.455917,-0.127729,0.340504,-0.096599,-0.33752
1,27,0,2.0,0.0,0.0,0.0,4.0,2.0,0.5,0.57735,1.0,1.0,0.0,0.0,0.164983,0.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.504431,3.0,0.0,3.0,,,0.333333,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.504431,3.0,0.0,3.0,,,0.333333,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,4.0,2.0,0.5,0.57735,1.0,1.0,0.0,0.333333,0.2351,1.0,1.0,2.0,,1.0,1.0,0.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.504431,3.0,0.0,3.0,,,0.333333,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.504431,3.0,0.0,3.0,,,0.333333,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,4.0,2.0,0.5,0.57735,1.0,1.0,0.0,0.0,0.164983,0.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.504431,3.0,0.0,3.0,,,0.333333,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.504431,3.0,0.0,3.0,,,0.333333,0.0,0.0,1.0,1.0,0.290059,0.862917,-0.86916,0.958116,-0.86916,0.958116,-0.676561,0.814282,0.69455,1.205846,0.75544,0.917986,0.75849,0.918239,0.0,0.0,0.08563723,0.198339,2.302585,0.958116,0.063621,0.630299,0.492769,0.319691,0.021228,0.21031,0.16442,0.917986,0.004048,0.559158,0.398092,0.918239,0.156782,0.61117,0.327853,0.0,0.0,0.0,0.0,1.205846,0.109277,0.669891,0.448012,0.0,0.0,0.577639,0.0,-1.200002,-0.999,0.999,1.998,0.999,1.004064,-0.941717,-0.941717,-0.782695,1.011826,0.886831,0.05656027,False,-2.01185e-16,0.0,0.0,0.0,-6.3620270000000004e-18,0.0,-0.069306,0.069306,-0.010375,0.010375,-1.973903e-16,0.0,-6.24203e-18,-0.070638,0.070638,2.0,0.78959,0.394795,0.558325,4.0,2.013961,0.50349,0.408081,0.347038,0.877333,0.0,1.0,0.235604,4.0,4.0,8.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,0.5,0.707107,4.0,2.0,0.5,0.57735,0.0,1.0,0.0,0.333333,0.171219,1.0,2.0,3.0,,1.0,1.0,0.0,1.0,1.0,0.0,0,4,-3.045126,0.004477,1.094154,0.277071,0.495455,-1.126766,-0.142502,0.233509,-0.076412,0.655457,-0.417397,-0.419298,0.249865,-0.236533,0.005777,0.292739
2,27,1,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.164983,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.5,0.707107,4.0,1.0,0.25,0.5,0.0,0.0,0.0,0.0,0.144692,0.0,1.0,1.0,,,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.5,0.707107,4.0,1.0,0.25,0.5,0.0,0.0,0.0,0.0,0.144692,0.0,1.0,1.0,,,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.164983,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.5,0.707107,4.0,1.0,0.25,0.5,0.0,0.0,0.0,0.0,0.144692,0.0,1.0,1.0,,,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.5,0.707107,4.0,1.0,0.25,0.5,0.0,0.0,0.0,0.0,0.144692,0.0,1.0,1.0,,,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.164983,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.5,0.707107,4.0,1.0,0.25,0.5,0.0,0.0,0.0,0.0,0.144692,0.0,1.0,1.0,,,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.5,0.707107,4.0,1.0,0.25,0.5,0.0,0.0,0.0,0.0,0.144692,0.0,1.0,1.0,,,0.0,0.0,0.0,0.0,0.0,0.467789,0.28234,0.027779,0.022494,0.027779,0.022494,0.023331,0.008949,0.032277,0.08398,0.000772,0.000506,0.013774,0.014264,0.0,0.0,8.020208000000001e-17,4.503985e-17,2.302585,0.063621,0.022494,0.037965,0.022376,0.021228,0.007506,0.012668,0.007466,0.004048,0.000506,0.001775,0.001611,0.017123,0.013774,0.015054,0.001477,0.0,0.0,0.0,0.0,0.110029,0.032277,0.075429,0.032313,2.6645350000000002e-18,0.0,0.577639,0.0,-1.200002,-0.999,0.999,1.998,0.999,1.004064,-0.941717,-0.941717,-0.782695,1.011826,0.886831,0.05656027,False,6.973908e-17,0.0,0.0,0.0,2.205343e-18,0.0,-0.049984,0.049984,-0.050119,0.050119,-1.973903e-16,0.0,-6.24203e-18,-0.070638,0.070638,2.0,0.136575,0.068288,0.096573,4.0,0.428534,0.107134,0.075486,0.116553,0.175407,0.0,1.0,0.261403,4.0,4.0,8.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.451376,3.0,0.0,3.0,,,1.0,0.0,0.0,1.0,1.0,0,3,-0.878975,0.960027,0.533893,2.09483,-0.354779,-1.327552,-0.744178,-0.15504,0.043973,-0.551189,-0.137026,0.328373,0.781106,-0.071281,0.154365,0.499256
3,40,0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.053571,0.28125,0.102724,2.0,1.0,3.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.053571,0.375,0.136401,3.0,1.0,4.0,,1.0,0.333333,0.0,1.0,1.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.044643,0.4,0.123562,3.0,1.0,4.0,,1.0,0.333333,0.0,1.0,1.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.089286,0.347222,0.139724,2.0,1.0,3.0,,1.0,0.0,0.0,1.0,1.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.089286,0.390625,0.146518,3.0,1.0,4.0,,1.0,0.333333,0.0,1.0,1.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.044643,0.4,0.123562,3.0,1.0,4.0,,1.0,0.333333,0.0,1.0,1.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.071429,0.285714,0.1091,2.0,1.0,3.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.053571,0.375,0.136401,3.0,1.0,4.0,,1.0,0.333333,0.0,1.0,1.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.044643,0.4,0.123562,3.0,1.0,4.0,,1.0,0.333333,0.0,1.0,1.0,0.0,0.746966,0.166829,0.227636,-0.125736,0.227636,-0.125736,0.150939,-0.082707,0.061777,0.029,0.051818,0.01581,0.059747,0.018127,0.0,0.0,1.073186e-13,3.585458e-15,2.302585,0.54861,0.019782,0.227245,0.159676,0.183053,0.006601,0.075824,0.053278,0.300973,0.000391,0.073949,0.091001,0.301046,0.011576,0.079444,0.090119,0.0,0.0,0.0,0.0,0.223579,0.0,0.067829,0.064634,-1.776357e-18,0.0,0.577639,4.615117e-18,-1.200002,-0.999,0.999,1.998,0.999,1.718279,0.194756,0.194756,0.130486,0.011618,0.03793,1.035489e-15,False,0.0,0.0,-2.592391e-16,0.0,0.0,-8.19786e-18,-0.044551,0.044551,-0.053785,0.053785,0.0,0.0,0.0,-0.045489,0.045489,7.0,0.800112,0.114302,0.159644,9.0,1.078414,0.119824,0.1531,0.009458,0.268844,0.0,1.0,0.261168,9.0,9.0,18.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.888889,0.373049,7.0,0.0,7.0,,,0.380952,0.0,0.0,1.0,1.0,0,3,1.314014,-2.369212,2.092896,-0.406,0.185828,-0.576415,0.349928,-0.14599,-0.327669,-0.060189,-0.228895,0.21258,0.025961,-0.287152,0.071416,0.250036
4,40,3,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.050588,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.051327,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.048536,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.042282,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.042255,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.048536,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.043323,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.051327,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.048536,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,0.414438,0.847672,0.077123,0.296521,0.077123,0.296521,0.050943,0.201866,0.020916,0.033393,0.005948,0.087925,0.006298,0.08959,0.0,0.0,1.230852e-16,1.370661e-10,2.302585,0.296521,0.009723,0.072169,0.093498,0.098939,0.003244,0.02408,0.031197,0.087925,9.5e-05,0.012858,0.028431,0.08959,0.000129,0.01823,0.02794,0.0,0.0,0.0,0.0,0.091913,0.0,0.027535,0.026986,-5.329071e-18,0.0,0.577639,9.230235e-18,-1.200002,-0.999,0.999,1.998,0.999,1.019261,0.194756,0.194756,0.130486,0.011618,0.03793,1.035489e-15,False,2.863168e-16,0.0,8.198478e-17,0.0,9.054132e-18,2.592586e-18,-0.048699,0.048699,-0.042518,0.042518,0.0,0.0,0.0,-0.045489,0.045489,7.0,2.010095,0.287156,0.43583,9.0,2.83645,0.315161,0.429743,0.017553,0.808802,0.0,1.0,0.11432,9.0,9.0,18.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,2.0,0.285714,0.48795,9.0,3.0,0.333333,0.5,0.0,1.0,0.0,0.0,0.051899,0.0,3.0,3.0,,1.0,1.0,0.0,1.0,0.0,0.0,0,6,-0.780377,0.256968,-0.722442,-0.873048,-0.83592,-0.257325,-0.236799,-0.854089,-0.543529,-0.460812,-0.434608,-0.054258,-0.711702,-0.053859,0.011372,0.277471


Unnamed: 0,pc_original_fisherz_0.01_count_widout_x_y,pc_original_fisherz_0.01_sum_widout_x_y,pc_original_fisherz_0.01_mean_widout_x_y,pc_original_fisherz_0.01_std_widout_x_y,pc_original_fisherz_0.01_count,pc_original_fisherz_0.01_sum,pc_original_fisherz_0.01_mean,pc_original_fisherz_0.01_std,pc_original_fisherz_0.01_has_x,pc_original_fisherz_0.01_has_y,pc_original_fisherz_0.01_betweenness,pc_original_fisherz_0.01_closeness,pc_original_fisherz_0.01_pagerank,pc_original_fisherz_0.01_in_degree,pc_original_fisherz_0.01_out_degree,pc_original_fisherz_0.01_total_degree,pc_original_fisherz_0.01_shortest_path_to_X,pc_original_fisherz_0.01_shortest_path_to_Y,pc_original_fisherz_0.01_clustering_coef,pc_original_fisherz_0.01_is_ancestor_of_X,pc_original_fisherz_0.01_is_ancestor_of_Y,pc_original_fisherz_0.01_is_descendant_of_X,pc_original_fisherz_0.01_is_descendant_of_Y,pc_original_fisherz_0.05_count_widout_x_y,pc_original_fisherz_0.05_sum_widout_x_y,pc_original_fisherz_0.05_mean_widout_x_y,pc_original_fisherz_0.05_std_widout_x_y,pc_original_fisherz_0.05_count,pc_original_fisherz_0.05_sum,pc_original_fisherz_0.05_mean,pc_original_fisherz_0.05_std,pc_original_fisherz_0.05_has_x,pc_original_fisherz_0.05_has_y,pc_original_fisherz_0.05_betweenness,pc_original_fisherz_0.05_closeness,pc_original_fisherz_0.05_pagerank,pc_original_fisherz_0.05_in_degree,pc_original_fisherz_0.05_out_degree,pc_original_fisherz_0.05_total_degree,pc_original_fisherz_0.05_shortest_path_to_X,pc_original_fisherz_0.05_shortest_path_to_Y,pc_original_fisherz_0.05_clustering_coef,pc_original_fisherz_0.05_is_ancestor_of_X,pc_original_fisherz_0.05_is_ancestor_of_Y,pc_original_fisherz_0.05_is_descendant_of_X,pc_original_fisherz_0.05_is_descendant_of_Y,pc_original_fisherz_0.1_count_widout_x_y,pc_original_fisherz_0.1_sum_widout_x_y,pc_original_fisherz_0.1_mean_widout_x_y,pc_original_fisherz_0.1_std_widout_x_y,pc_original_fisherz_0.1_count,pc_original_fisherz_0.1_sum,pc_original_fisherz_0.1_mean,pc_original_fisherz_0.1_std,pc_original_fisherz_0.1_has_x,pc_original_fisherz_0.1_has_y,pc_original_fisherz_0.1_betweenness,pc_original_fisherz_0.1_closeness,pc_original_fisherz_0.1_pagerank,pc_original_fisherz_0.1_in_degree,pc_original_fisherz_0.1_out_degree,pc_original_fisherz_0.1_total_degree,pc_original_fisherz_0.1_shortest_path_to_X,pc_original_fisherz_0.1_shortest_path_to_Y,pc_original_fisherz_0.1_clustering_coef,pc_original_fisherz_0.1_is_ancestor_of_X,pc_original_fisherz_0.1_is_ancestor_of_Y,pc_original_fisherz_0.1_is_descendant_of_X,pc_original_fisherz_0.1_is_descendant_of_Y,pc_stable_fisherz_0.01_count_widout_x_y,pc_stable_fisherz_0.01_sum_widout_x_y,pc_stable_fisherz_0.01_mean_widout_x_y,pc_stable_fisherz_0.01_std_widout_x_y,pc_stable_fisherz_0.01_count,pc_stable_fisherz_0.01_sum,pc_stable_fisherz_0.01_mean,pc_stable_fisherz_0.01_std,pc_stable_fisherz_0.01_has_x,pc_stable_fisherz_0.01_has_y,pc_stable_fisherz_0.01_betweenness,pc_stable_fisherz_0.01_closeness,pc_stable_fisherz_0.01_pagerank,pc_stable_fisherz_0.01_in_degree,pc_stable_fisherz_0.01_out_degree,pc_stable_fisherz_0.01_total_degree,pc_stable_fisherz_0.01_shortest_path_to_X,pc_stable_fisherz_0.01_shortest_path_to_Y,pc_stable_fisherz_0.01_clustering_coef,pc_stable_fisherz_0.01_is_ancestor_of_X,pc_stable_fisherz_0.01_is_ancestor_of_Y,pc_stable_fisherz_0.01_is_descendant_of_X,pc_stable_fisherz_0.01_is_descendant_of_Y,pc_stable_fisherz_0.05_count_widout_x_y,pc_stable_fisherz_0.05_sum_widout_x_y,pc_stable_fisherz_0.05_mean_widout_x_y,pc_stable_fisherz_0.05_std_widout_x_y,pc_stable_fisherz_0.05_count,pc_stable_fisherz_0.05_sum,pc_stable_fisherz_0.05_mean,pc_stable_fisherz_0.05_std,pc_stable_fisherz_0.05_has_x,pc_stable_fisherz_0.05_has_y,pc_stable_fisherz_0.05_betweenness,pc_stable_fisherz_0.05_closeness,pc_stable_fisherz_0.05_pagerank,pc_stable_fisherz_0.05_in_degree,pc_stable_fisherz_0.05_out_degree,pc_stable_fisherz_0.05_total_degree,pc_stable_fisherz_0.05_shortest_path_to_X,pc_stable_fisherz_0.05_shortest_path_to_Y,pc_stable_fisherz_0.05_clustering_coef,pc_stable_fisherz_0.05_is_ancestor_of_X,pc_stable_fisherz_0.05_is_ancestor_of_Y,pc_stable_fisherz_0.05_is_descendant_of_X,pc_stable_fisherz_0.05_is_descendant_of_Y,pc_stable_fisherz_0.1_count_widout_x_y,pc_stable_fisherz_0.1_sum_widout_x_y,pc_stable_fisherz_0.1_mean_widout_x_y,pc_stable_fisherz_0.1_std_widout_x_y,pc_stable_fisherz_0.1_count,pc_stable_fisherz_0.1_sum,pc_stable_fisherz_0.1_mean,pc_stable_fisherz_0.1_std,pc_stable_fisherz_0.1_has_x,pc_stable_fisherz_0.1_has_y,pc_stable_fisherz_0.1_betweenness,pc_stable_fisherz_0.1_closeness,pc_stable_fisherz_0.1_pagerank,pc_stable_fisherz_0.1_in_degree,pc_stable_fisherz_0.1_out_degree,pc_stable_fisherz_0.1_total_degree,pc_stable_fisherz_0.1_shortest_path_to_X,pc_stable_fisherz_0.1_shortest_path_to_Y,pc_stable_fisherz_0.1_clustering_coef,pc_stable_fisherz_0.1_is_ancestor_of_X,pc_stable_fisherz_0.1_is_ancestor_of_Y,pc_stable_fisherz_0.1_is_descendant_of_X,pc_stable_fisherz_0.1_is_descendant_of_Y,pc_parallel_fisherz_0.01_count_widout_x_y,pc_parallel_fisherz_0.01_sum_widout_x_y,pc_parallel_fisherz_0.01_mean_widout_x_y,pc_parallel_fisherz_0.01_std_widout_x_y,pc_parallel_fisherz_0.01_count,pc_parallel_fisherz_0.01_sum,pc_parallel_fisherz_0.01_mean,pc_parallel_fisherz_0.01_std,pc_parallel_fisherz_0.01_has_x,pc_parallel_fisherz_0.01_has_y,pc_parallel_fisherz_0.01_betweenness,pc_parallel_fisherz_0.01_closeness,pc_parallel_fisherz_0.01_pagerank,pc_parallel_fisherz_0.01_in_degree,pc_parallel_fisherz_0.01_out_degree,pc_parallel_fisherz_0.01_total_degree,pc_parallel_fisherz_0.01_shortest_path_to_X,pc_parallel_fisherz_0.01_shortest_path_to_Y,pc_parallel_fisherz_0.01_clustering_coef,pc_parallel_fisherz_0.01_is_ancestor_of_X,pc_parallel_fisherz_0.01_is_ancestor_of_Y,pc_parallel_fisherz_0.01_is_descendant_of_X,pc_parallel_fisherz_0.01_is_descendant_of_Y,pc_parallel_fisherz_0.05_count_widout_x_y,pc_parallel_fisherz_0.05_sum_widout_x_y,pc_parallel_fisherz_0.05_mean_widout_x_y,pc_parallel_fisherz_0.05_std_widout_x_y,pc_parallel_fisherz_0.05_count,pc_parallel_fisherz_0.05_sum,pc_parallel_fisherz_0.05_mean,pc_parallel_fisherz_0.05_std,pc_parallel_fisherz_0.05_has_x,pc_parallel_fisherz_0.05_has_y,pc_parallel_fisherz_0.05_betweenness,pc_parallel_fisherz_0.05_closeness,pc_parallel_fisherz_0.05_pagerank,pc_parallel_fisherz_0.05_in_degree,pc_parallel_fisherz_0.05_out_degree,pc_parallel_fisherz_0.05_total_degree,pc_parallel_fisherz_0.05_shortest_path_to_X,pc_parallel_fisherz_0.05_shortest_path_to_Y,pc_parallel_fisherz_0.05_clustering_coef,pc_parallel_fisherz_0.05_is_ancestor_of_X,pc_parallel_fisherz_0.05_is_ancestor_of_Y,pc_parallel_fisherz_0.05_is_descendant_of_X,pc_parallel_fisherz_0.05_is_descendant_of_Y,pc_parallel_fisherz_0.1_count_widout_x_y,pc_parallel_fisherz_0.1_sum_widout_x_y,pc_parallel_fisherz_0.1_mean_widout_x_y,pc_parallel_fisherz_0.1_std_widout_x_y,pc_parallel_fisherz_0.1_count,pc_parallel_fisherz_0.1_sum,pc_parallel_fisherz_0.1_mean,pc_parallel_fisherz_0.1_std,pc_parallel_fisherz_0.1_has_x,pc_parallel_fisherz_0.1_has_y,pc_parallel_fisherz_0.1_betweenness,pc_parallel_fisherz_0.1_closeness,pc_parallel_fisherz_0.1_pagerank,pc_parallel_fisherz_0.1_in_degree,pc_parallel_fisherz_0.1_out_degree,pc_parallel_fisherz_0.1_total_degree,pc_parallel_fisherz_0.1_shortest_path_to_X,pc_parallel_fisherz_0.1_shortest_path_to_Y,pc_parallel_fisherz_0.1_clustering_coef,pc_parallel_fisherz_0.1_is_ancestor_of_X,pc_parallel_fisherz_0.1_is_ancestor_of_Y,pc_parallel_fisherz_0.1_is_descendant_of_X,pc_parallel_fisherz_0.1_is_descendant_of_Y,granger_v_X,granger_v_Y,corr(v_X),corr(v_Y),spearman(v_X),spearman(v_Y),kendall(v_X),kendall(v_Y),mutual_info(v_X),mutual_info(v_Y),r2_lin_v_X,r2_lin_v_Y,r2_quad_v_X,r2_quad_v_Y,r2_exp_v_X,r2_exp_v_Y,shapiro_p_X,shapiro_p_Y,entropy(v),max(corr(v_ others)),min(corr(v_ others)),mean(corr(v_ others)),std(corr(v_ others)),max(cov(v_ others)),min(cov(v_ others)),mean(cov(v_ others)),std(cov(v_ others)),max(r2_lin_v_ others),min(r2_lin_v_ others),mean(r2_lin_v_ others),std(r2_lin_v_ others),max(r2_quad_v_ others),min(r2_quad_v_ others),mean(r2_quad_v_ others),std(r2_quad_v_ others),max(r2_exp_v_ others),min(r2_exp_v_ others),mean(r2_exp_v_ others),std(r2_exp_v_ others),max(mi(v_ others)),min(mi(v_ others)),mean(mi(v_ others)),std(mi(v_ others)),v_mean,v_median,v_std,v_skewness,v_kurtosis,v_min,v_max,v_range,v_iqr,v_vif,corr(X_Y),spearman(X_Y),kendall(X_Y),mutual_info(X_Y),r2_lin_X_Y,shapiro_p_XY,num_min_vals,ttest(v_X),pvalue(ttest(v_X))<=0.05,ttest(v_Y),pvalue(ttest(v_Y))<=0.05,cohen_d(v_X),cohen_d(v_Y),ci_low(v_X),ci_high(v_X),ci_low(v_Y),ci_high(v_Y),ttest(X_Y),pvalue(ttest(X_Y))<=0.05,cohen_d(X_Y),ci_lowXY,ci_highXY,psig_256_count_widout_x_y,psig_256_sum_widout_x_y,psig_256_mean_widout_x_y,psig_256_std_widout_x_y,psig_256_count,psig_256_sum,psig_256_mean,psig_256_std,psig_256_has_x,psig_256_has_y,psig_256_betweenness,psig_256_closeness,psig_256_pagerank,psig_256_in_degree,psig_256_out_degree,psig_256_total_degree,psig_256_shortest_path_to_X,psig_256_shortest_path_to_Y,psig_256_clustering_coef,psig_256_is_ancestor_of_X,psig_256_is_ancestor_of_Y,psig_256_is_descendant_of_X,psig_256_is_descendant_of_Y,padj_256_count_widout_x_y,padj_256_sum_widout_x_y,padj_256_mean_widout_x_y,padj_256_std_widout_x_y,padj_256_count,padj_256_sum,padj_256_mean,padj_256_std,padj_256_has_x,padj_256_has_y,padj_256_betweenness,padj_256_closeness,padj_256_pagerank,padj_256_in_degree,padj_256_out_degree,padj_256_total_degree,padj_256_shortest_path_to_X,padj_256_shortest_path_to_Y,padj_256_clustering_coef,padj_256_is_ancestor_of_X,padj_256_is_ancestor_of_Y,padj_256_is_descendant_of_X,padj_256_is_descendant_of_Y,fold,preds_predicted_label_256,PCA_256_1,PCA_256_2,PCA_256_3,PCA_256_4,PCA_256_5,PCA_256_6,PCA_256_7,PCA_256_8,PCA_256_9,PCA_256_10,PCA_256_11,PCA_256_12,PCA_256_13,PCA_256_14,PCA_256_15,PCA_256_16
0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.666667,0.474412,1.0,0.0,1.0,,,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,,3.0,1.0,0.333333,0.57735,0.0,1.0,0.0,0.0,0.212766,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,0.640603,1.0,0.221591,1.0,0.221591,1.0,0.030903,1.0,4.617702,5.471054,0.04910272,1.0,0.843873,1.0,0.0,0.0,9.341969e-22,7.981483e-20,2.302585,1.0,0.221591,0.610796,0.550418,0.333667,0.073938,0.203802,0.183656,1.0,0.04910272,0.524551,0.475449,1.0,0.843873,0.921937,0.078063,0.0,0.0,0.0,0.0,5.471054,4.617702,5.044378,0.426676,0.0,0.0,0.577639,-9.230235e-18,-1.200002,-0.999,0.999,1.998,0.999,inf,0.221591,0.221591,0.030903,4.615828,0.049103,2.026217e-24,True,7.793903e-17,0.0,0.0,0.0,2.464648e-18,0.0,-0.044725,0.044725,0.0,0.0,-7.793903e-17,0.0,-2.464648e-18,-0.044725,0.044725,1.0,3.012299e-06,3.012299e-06,,3.0,1.769196,0.589732,0.510737,0.888775,0.880418,0.0,1.0,0.358596,3.0,3.0,6.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,,3.0,2.0,0.666667,0.57735,1.0,1.0,0.0,0.0,0.19758,0.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0,2,-4.319,0.789941,1.602393,-0.437881,0.599438,-1.235067,-0.175769,0.532925,0.742901,0.891537,-0.617011,-0.455917,-0.127729,0.340504,-0.096599,-0.33752
1,2.0,0.0,0.0,0.0,4.0,2.0,0.5,0.57735,1.0,1.0,0.0,0.0,0.164983,0.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.504431,3.0,0.0,3.0,,,0.333333,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.504431,3.0,0.0,3.0,,,0.333333,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,4.0,2.0,0.5,0.57735,1.0,1.0,0.0,0.333333,0.2351,1.0,1.0,2.0,,1.0,1.0,0.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.504431,3.0,0.0,3.0,,,0.333333,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.504431,3.0,0.0,3.0,,,0.333333,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,4.0,2.0,0.5,0.57735,1.0,1.0,0.0,0.0,0.164983,0.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.504431,3.0,0.0,3.0,,,0.333333,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.504431,3.0,0.0,3.0,,,0.333333,0.0,0.0,1.0,1.0,0.290059,0.862917,-0.86916,0.958116,-0.86916,0.958116,-0.676561,0.814282,0.69455,1.205846,0.7554395,0.917986,0.75849,0.918239,0.0,0.0,0.08563723,0.198339,2.302585,0.958116,0.063621,0.630299,0.492769,0.319691,0.021228,0.21031,0.16442,0.917986,0.00404769,0.559158,0.398092,0.918239,0.156782,0.61117,0.327853,0.0,0.0,0.0,0.0,1.205846,0.109277,0.669891,0.448012,0.0,0.0,0.577639,0.0,-1.200002,-0.999,0.999,1.998,0.999,1.004064,-0.941717,-0.941717,-0.782695,1.011826,0.886831,0.05656027,False,-2.01185e-16,0.0,0.0,0.0,-6.3620270000000004e-18,0.0,-0.069306,0.069306,-0.010375,0.010375,-1.973903e-16,0.0,-6.24203e-18,-0.070638,0.070638,2.0,0.7895903,0.3947951,0.558325,4.0,2.013961,0.50349,0.408081,0.347038,0.877333,0.0,1.0,0.235604,4.0,4.0,8.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,0.5,0.707107,4.0,2.0,0.5,0.57735,0.0,1.0,0.0,0.333333,0.171219,1.0,2.0,3.0,,1.0,1.0,0.0,1.0,1.0,0.0,0,4,-3.045126,0.004477,1.094154,0.277071,0.495455,-1.126766,-0.142502,0.233509,-0.076412,0.655457,-0.417397,-0.419298,0.249865,-0.236533,0.005777,0.292739
2,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.164983,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.5,0.707107,4.0,1.0,0.25,0.5,0.0,0.0,0.0,0.0,0.144692,0.0,1.0,1.0,,,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.5,0.707107,4.0,1.0,0.25,0.5,0.0,0.0,0.0,0.0,0.144692,0.0,1.0,1.0,,,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.164983,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.5,0.707107,4.0,1.0,0.25,0.5,0.0,0.0,0.0,0.0,0.144692,0.0,1.0,1.0,,,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.5,0.707107,4.0,1.0,0.25,0.5,0.0,0.0,0.0,0.0,0.144692,0.0,1.0,1.0,,,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.164983,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.5,0.707107,4.0,1.0,0.25,0.5,0.0,0.0,0.0,0.0,0.144692,0.0,1.0,1.0,,,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.5,0.707107,4.0,1.0,0.25,0.5,0.0,0.0,0.0,0.0,0.144692,0.0,1.0,1.0,,,0.0,0.0,0.0,0.0,0.0,0.467789,0.28234,0.027779,0.022494,0.027779,0.022494,0.023331,0.008949,0.032277,0.08398,0.0007716453,0.000506,0.013774,0.014264,0.0,0.0,8.020208000000001e-17,4.503985e-17,2.302585,0.063621,0.022494,0.037965,0.022376,0.021228,0.007506,0.012668,0.007466,0.004048,0.0005059832,0.001775,0.001611,0.017123,0.013774,0.015054,0.001477,0.0,0.0,0.0,0.0,0.110029,0.032277,0.075429,0.032313,2.6645350000000002e-18,0.0,0.577639,0.0,-1.200002,-0.999,0.999,1.998,0.999,1.004064,-0.941717,-0.941717,-0.782695,1.011826,0.886831,0.05656027,False,6.973908e-17,0.0,0.0,0.0,2.205343e-18,0.0,-0.049984,0.049984,-0.050119,0.050119,-1.973903e-16,0.0,-6.24203e-18,-0.070638,0.070638,2.0,0.1365753,0.06828767,0.096573,4.0,0.428534,0.107134,0.075486,0.116553,0.175407,0.0,1.0,0.261403,4.0,4.0,8.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.451376,3.0,0.0,3.0,,,1.0,0.0,0.0,1.0,1.0,0,3,-0.878975,0.960027,0.533893,2.09483,-0.354779,-1.327552,-0.744178,-0.15504,0.043973,-0.551189,-0.137026,0.328373,0.781106,-0.071281,0.154365,0.499256
3,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.053571,0.28125,0.102724,2.0,1.0,3.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.053571,0.375,0.136401,3.0,1.0,4.0,,1.0,0.333333,0.0,1.0,1.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.044643,0.4,0.123562,3.0,1.0,4.0,,1.0,0.333333,0.0,1.0,1.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.089286,0.347222,0.139724,2.0,1.0,3.0,,1.0,0.0,0.0,1.0,1.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.089286,0.390625,0.146518,3.0,1.0,4.0,,1.0,0.333333,0.0,1.0,1.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.044643,0.4,0.123562,3.0,1.0,4.0,,1.0,0.333333,0.0,1.0,1.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.071429,0.285714,0.1091,2.0,1.0,3.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.053571,0.375,0.136401,3.0,1.0,4.0,,1.0,0.333333,0.0,1.0,1.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.044643,0.4,0.123562,3.0,1.0,4.0,,1.0,0.333333,0.0,1.0,1.0,0.0,0.746966,0.166829,0.227636,-0.125736,0.227636,-0.125736,0.150939,-0.082707,0.061777,0.029,0.05181832,0.01581,0.059747,0.018127,0.0,0.0,1.073186e-13,3.585458e-15,2.302585,0.54861,0.019782,0.227245,0.159676,0.183053,0.006601,0.075824,0.053278,0.300973,0.0003913307,0.073949,0.091001,0.301046,0.011576,0.079444,0.090119,0.0,0.0,0.0,0.0,0.223579,0.0,0.067829,0.064634,-1.776357e-18,0.0,0.577639,4.615117e-18,-1.200002,-0.999,0.999,1.998,0.999,1.718279,0.194756,0.194756,0.130486,0.011618,0.03793,1.035489e-15,False,0.0,0.0,-2.592391e-16,0.0,0.0,-8.19786e-18,-0.044551,0.044551,-0.053785,0.053785,0.0,0.0,0.0,-0.045489,0.045489,7.0,0.8001123,0.1143018,0.159644,9.0,1.078414,0.119824,0.1531,0.009458,0.268844,0.0,1.0,0.261168,9.0,9.0,18.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.888889,0.373049,7.0,0.0,7.0,,,0.380952,0.0,0.0,1.0,1.0,0,3,1.314014,-2.369212,2.092896,-0.406,0.185828,-0.576415,0.349928,-0.14599,-0.327669,-0.060189,-0.228895,0.21258,0.025961,-0.287152,0.071416,0.250036
4,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.050588,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.051327,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.048536,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.042282,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.042255,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.048536,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.043323,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.051327,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,1.0,0.111111,0.333333,0.0,1.0,0.0,0.0,0.048536,0.0,1.0,1.0,,1.0,0.0,0.0,1.0,0.0,0.0,0.414438,0.847672,0.077123,0.296521,0.077123,0.296521,0.050943,0.201866,0.020916,0.033393,0.005947907,0.087925,0.006298,0.08959,0.0,0.0,1.230852e-16,1.370661e-10,2.302585,0.296521,0.009723,0.072169,0.093498,0.098939,0.003244,0.02408,0.031197,0.087925,9.452782e-05,0.012858,0.028431,0.08959,0.000129,0.01823,0.02794,0.0,0.0,0.0,0.0,0.091913,0.0,0.027535,0.026986,-5.329071e-18,0.0,0.577639,9.230235e-18,-1.200002,-0.999,0.999,1.998,0.999,1.019261,0.194756,0.194756,0.130486,0.011618,0.03793,1.035489e-15,False,2.863168e-16,0.0,8.198478e-17,0.0,9.054132e-18,2.592586e-18,-0.048699,0.048699,-0.042518,0.042518,0.0,0.0,0.0,-0.045489,0.045489,7.0,2.010095,0.2871565,0.43583,9.0,2.83645,0.315161,0.429743,0.017553,0.808802,0.0,1.0,0.11432,9.0,9.0,18.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,2.0,0.285714,0.48795,9.0,3.0,0.333333,0.5,0.0,1.0,0.0,0.0,0.051899,0.0,3.0,3.0,,1.0,1.0,0.0,1.0,0.0,0.0,0,6,-0.780377,0.256968,-0.722442,-0.873048,-0.83592,-0.257325,-0.236799,-0.854089,-0.543529,-0.460812,-0.434608,-0.054258,-0.711702,-0.053859,0.011372,0.277471
5,7.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.471154,0.287074,2.0,0.0,2.0,,,0.0,0.0,0.0,1.0,1.0,7.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.409091,0.257489,2.0,0.0,2.0,,,0.0,0.0,0.0,1.0,1.0,7.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4375,0.16236,2.0,0.0,2.0,,,0.0,0.0,0.0,1.0,1.0,7.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.271226,2.0,0.0,2.0,,,0.0,0.0,0.0,1.0,1.0,7.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.270934,2.0,0.0,2.0,,,0.0,0.0,0.0,1.0,1.0,7.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4375,0.16236,2.0,0.0,2.0,,,0.0,0.0,0.0,1.0,1.0,7.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.282244,2.0,0.0,2.0,,,0.0,0.0,0.0,1.0,1.0,7.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.409091,0.257489,2.0,0.0,2.0,,,0.0,0.0,0.0,1.0,1.0,7.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4375,0.16236,2.0,0.0,2.0,,,0.0,0.0,0.0,1.0,1.0,0.646974,0.806225,-0.00055,0.237358,-0.00055,0.237358,-0.000252,0.158318,0.0,0.0,3.029452e-07,0.056339,2e-06,0.060941,0.0,0.0,5.3913630000000003e-17,1.251058e-13,2.302585,0.237358,0.00055,0.103213,0.100351,0.079199,0.000184,0.034439,0.033484,0.056339,3.029452e-07,0.019464,0.021903,0.060941,2e-06,0.020716,0.022975,0.0,0.0,0.0,0.0,0.023738,0.0,0.012967,0.010385,7.105427e-18,0.0,0.577639,4.615117e-18,-1.200002,-0.999,0.999,1.998,0.999,1.063711,0.194756,0.194756,0.130486,0.011618,0.03793,1.035489e-15,False,-1.031171e-16,0.0,-3.149622e-16,0.0,-3.260848e-18,-9.959981e-18,-0.050707,0.050707,-0.04427,0.04427,0.0,0.0,0.0,-0.045489,0.045489,7.0,1.46946,0.2099229,0.251223,9.0,2.21162,0.245736,0.277559,0.057047,0.685114,0.0,1.0,0.092138,9.0,9.0,18.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,1.0,0.142857,0.377964,9.0,1.0,0.111111,0.333333,0.0,0.0,0.0,0.333333,0.084924,2.0,1.0,3.0,,,0.666667,0.0,0.0,1.0,1.0,0,7,1.033472,-0.407705,-1.664108,0.097237,-0.373753,0.084753,-0.331649,0.316929,-0.453272,0.484567,0.074208,-0.253788,-0.447003,-0.174385,0.574801,0.251668
6,7.0,1.0,0.142857,0.377964,9.0,2.0,0.222222,0.440959,1.0,0.0,0.0,0.0,0.050588,0.0,2.0,2.0,1.0,2.0,0.0,1.0,1.0,0.0,0.0,7.0,1.0,0.142857,0.377964,9.0,2.0,0.222222,0.440959,1.0,0.0,0.0,0.125,0.065869,1.0,1.0,2.0,,2.0,1.0,0.0,1.0,1.0,0.0,7.0,1.0,0.142857,0.377964,9.0,2.0,0.222222,0.440959,1.0,0.0,0.0,0.0,0.048536,0.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,0.0,0.0,7.0,1.0,0.142857,0.377964,9.0,2.0,0.222222,0.440959,1.0,0.0,0.017857,0.125,0.060252,1.0,1.0,2.0,,2.0,0.0,0.0,1.0,1.0,0.0,7.0,1.0,0.142857,0.377964,9.0,2.0,0.222222,0.440959,1.0,0.0,0.0,0.125,0.054227,1.0,1.0,2.0,,2.0,1.0,0.0,1.0,1.0,0.0,7.0,1.0,0.142857,0.377964,9.0,2.0,0.222222,0.440959,1.0,0.0,0.0,0.0,0.048536,0.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,0.0,0.0,7.0,1.0,0.142857,0.377964,9.0,2.0,0.222222,0.440959,1.0,0.0,0.0,0.0,0.043323,0.0,2.0,2.0,1.0,2.0,0.0,1.0,1.0,0.0,0.0,7.0,1.0,0.142857,0.377964,9.0,2.0,0.222222,0.440959,1.0,0.0,0.0,0.125,0.065869,1.0,1.0,2.0,,2.0,1.0,0.0,1.0,1.0,0.0,7.0,1.0,0.142857,0.377964,9.0,2.0,0.222222,0.440959,1.0,0.0,0.0,0.0,0.048536,0.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,0.0,0.0,0.654327,0.037733,0.528004,0.099995,0.528004,0.099995,0.365502,0.066975,0.230115,0.0,0.2787884,0.009999,0.284651,0.011169,0.0,0.0,5.48941e-05,3.507402e-16,2.302585,0.528004,0.013163,0.139706,0.183396,0.176177,0.004392,0.046615,0.061193,0.278788,0.0001732691,0.048948,0.092013,0.284651,0.001097,0.05021,0.093589,0.0,0.0,0.0,0.0,0.230115,0.0,0.040285,0.073541,0.0,0.0,0.577639,9.230235e-18,-1.200002,-0.999,0.999,1.998,0.999,1.193319,0.194756,0.194756,0.130486,0.011618,0.03793,1.035489e-15,False,0.0,0.0,0.0,0.0,0.0,0.0,-0.034827,0.034827,-0.048092,0.048092,0.0,0.0,0.0,-0.045489,0.045489,7.0,0.979311,0.1399016,0.285258,9.0,1.191295,0.132366,0.253047,0.000531,0.211453,0.0,1.0,0.054864,9.0,9.0,18.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,1.0,0.142857,0.377964,9.0,1.0,0.111111,0.333333,0.0,0.0,0.0,0.125,0.062928,1.0,1.0,2.0,,,1.0,0.0,0.0,1.0,0.0,0,1,2.28072,-0.437261,-0.800687,0.024239,-0.28928,-0.01829,-0.02692,0.75293,-0.012809,-0.365879,-0.15003,-0.041924,0.354999,0.375847,0.339399,0.292182
7,7.0,2.0,0.285714,0.48795,9.0,2.0,0.222222,0.440959,0.0,0.0,0.053571,0.125,0.072087,1.0,2.0,3.0,,2.0,0.0,0.0,1.0,0.0,0.0,7.0,3.0,0.428571,0.534522,9.0,3.0,0.333333,0.5,0.0,0.0,0.0,0.0,0.051327,0.0,3.0,3.0,,2.0,0.0,0.0,1.0,0.0,0.0,7.0,3.0,0.428571,0.534522,9.0,3.0,0.333333,0.5,0.0,0.0,0.0,0.0,0.048536,0.0,3.0,3.0,2.0,2.0,0.0,1.0,1.0,0.0,0.0,7.0,2.0,0.285714,0.48795,9.0,2.0,0.222222,0.440959,0.0,0.0,0.107143,0.166667,0.108771,1.0,2.0,3.0,,2.0,0.0,0.0,1.0,0.0,0.0,7.0,3.0,0.428571,0.534522,9.0,3.0,0.333333,0.5,0.0,0.0,0.107143,0.166667,0.1087,1.0,2.0,3.0,,2.0,0.0,0.0,1.0,0.0,0.0,7.0,3.0,0.428571,0.534522,9.0,3.0,0.333333,0.5,0.0,0.0,0.0,0.0,0.048536,0.0,3.0,3.0,2.0,2.0,0.0,1.0,1.0,0.0,0.0,7.0,2.0,0.285714,0.48795,9.0,2.0,0.222222,0.440959,0.0,0.0,0.107143,0.166667,0.111448,1.0,2.0,3.0,,2.0,0.0,0.0,1.0,0.0,0.0,7.0,3.0,0.428571,0.534522,9.0,3.0,0.333333,0.5,0.0,0.0,0.0,0.0,0.051327,0.0,3.0,3.0,,2.0,0.0,0.0,1.0,0.0,0.0,7.0,3.0,0.428571,0.534522,9.0,3.0,0.333333,0.5,0.0,0.0,0.0,0.0,0.048536,0.0,3.0,3.0,2.0,2.0,0.0,1.0,1.0,0.0,0.0,0.210252,0.449221,0.027478,-0.025364,0.027478,-0.025364,0.018006,-0.020885,0.033983,0.047635,0.0007550563,0.000643,0.001106,0.053458,0.0,0.0,6.446131e-17,6.686790000000001e-17,2.302585,0.54861,0.025364,0.193127,0.217473,0.183053,0.008463,0.06444,0.072563,0.300973,0.0006433559,0.078681,0.117439,0.3025,0.001106,0.100266,0.111139,0.0,0.0,0.0,0.0,0.265771,0.015011,0.096728,0.088854,1.776357e-18,0.0,0.577639,0.0,-1.200002,-0.999,0.999,1.998,0.999,1.91823,0.194756,0.194756,0.130486,0.011618,0.03793,1.035489e-15,False,6.972831000000001e-17,0.0,0.0,0.0,2.205003e-18,0.0,-0.049991,0.049991,-0.051332,0.051332,0.0,0.0,0.0,-0.045489,0.045489,7.0,1.517457,0.2167795,0.34408,9.0,1.696874,0.188542,0.306247,0.003587,0.17583,0.0,1.0,0.222917,9.0,9.0,18.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,1.0,0.142857,0.377964,9.0,1.0,0.111111,0.333333,0.0,0.0,0.017857,0.625,0.182842,5.0,1.0,6.0,,,0.533333,0.0,0.0,1.0,1.0,0,3,0.714711,-0.376992,-0.340032,-0.291963,-1.214832,-0.571314,1.476198,0.666395,0.636855,-0.715618,0.37746,0.00343,0.230121,0.045088,-0.224369,0.163491
8,7.0,2.0,0.285714,0.48795,9.0,2.0,0.222222,0.440959,0.0,0.0,0.0,0.0,0.050588,0.0,2.0,2.0,,3.0,0.0,0.0,1.0,0.0,0.0,7.0,2.0,0.285714,0.48795,9.0,2.0,0.222222,0.440959,0.0,0.0,0.0,0.25,0.109497,2.0,0.0,2.0,,,0.0,0.0,0.0,0.0,0.0,7.0,1.0,0.142857,0.377964,9.0,2.0,0.222222,0.440959,1.0,0.0,0.035714,0.125,0.062288,1.0,2.0,3.0,1.0,2.0,0.0,1.0,1.0,0.0,0.0,7.0,2.0,0.285714,0.48795,9.0,2.0,0.222222,0.440959,0.0,0.0,0.071429,0.125,0.078222,1.0,1.0,2.0,,3.0,0.0,0.0,1.0,0.0,0.0,7.0,2.0,0.285714,0.48795,9.0,2.0,0.222222,0.440959,0.0,0.0,0.071429,0.125,0.078172,1.0,1.0,2.0,,3.0,0.0,0.0,1.0,0.0,0.0,7.0,1.0,0.142857,0.377964,9.0,2.0,0.222222,0.440959,1.0,0.0,0.035714,0.125,0.062288,1.0,2.0,3.0,1.0,2.0,0.0,1.0,1.0,0.0,0.0,7.0,2.0,0.285714,0.48795,9.0,2.0,0.222222,0.440959,0.0,0.0,0.071429,0.125,0.080147,1.0,1.0,2.0,,3.0,0.0,0.0,1.0,0.0,0.0,7.0,2.0,0.285714,0.48795,9.0,2.0,0.222222,0.440959,0.0,0.0,0.0,0.25,0.109497,2.0,0.0,2.0,,,0.0,0.0,0.0,0.0,0.0,7.0,1.0,0.142857,0.377964,9.0,2.0,0.222222,0.440959,1.0,0.0,0.035714,0.125,0.062288,1.0,2.0,3.0,1.0,2.0,0.0,1.0,1.0,0.0,0.0,0.604624,0.117227,0.068304,0.008845,0.068304,0.008845,0.045846,0.006098,0.030719,0.0,0.004665436,7.8e-05,0.005156,0.000125,0.0,0.0,1.575305e-16,5.781503e-17,2.302585,0.508264,0.008845,0.152115,0.170988,0.169591,0.002951,0.050756,0.057053,0.258332,7.823687e-05,0.048721,0.082965,0.334692,0.000125,0.063884,0.108219,0.0,0.0,0.0,0.0,0.266676,0.0,0.053869,0.085073,4.440891999999999e-19,0.0,0.577639,-9.230235e-18,-1.200002,-0.999,0.999,1.998,0.999,1.379064,0.194756,0.194756,0.130486,0.011618,0.03793,1.035489e-15,False,0.0,0.0,0.0,0.0,0.0,0.0,-0.048931,0.048931,-0.050468,0.050468,0.0,0.0,0.0,-0.045489,0.045489,7.0,3.640282,0.5200403,0.477698,9.0,3.733861,0.414873,0.463578,0.075709,0.01787,0.0,1.0,0.03651,9.0,9.0,18.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,4.0,0.571429,0.534522,9.0,4.0,0.444444,0.527046,0.0,0.0,0.0,0.0,0.051899,0.0,4.0,4.0,,,0.5,0.0,0.0,0.0,0.0,0,5,-1.968005,0.490396,-0.723247,-1.705197,-0.268092,-0.692979,-0.035649,-0.103272,0.117098,0.047243,0.375444,0.056684,0.200936,-0.113684,-0.117899,-0.21135
9,7.0,1.0,0.142857,0.377964,9.0,1.0,0.111111,0.333333,0.0,0.0,0.0,0.125,0.072087,1.0,0.0,1.0,,,0.0,0.0,0.0,0.0,0.0,7.0,1.0,0.142857,0.377964,9.0,1.0,0.111111,0.333333,0.0,0.0,0.0,0.0,0.051327,0.0,1.0,1.0,,,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.471154,0.175081,2.0,0.0,2.0,,,0.0,0.0,0.0,1.0,1.0,7.0,1.0,0.142857,0.377964,9.0,1.0,0.111111,0.333333,0.0,0.0,0.0,0.0,0.042282,0.0,1.0,1.0,,4.0,0.0,0.0,1.0,0.0,0.0,7.0,1.0,0.142857,0.377964,9.0,1.0,0.111111,0.333333,0.0,0.0,0.0,0.0,0.042255,0.0,1.0,1.0,,4.0,0.0,0.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.471154,0.175081,2.0,0.0,2.0,,,0.0,0.0,0.0,1.0,1.0,7.0,1.0,0.142857,0.377964,9.0,1.0,0.111111,0.333333,0.0,0.0,0.0,0.0,0.043323,0.0,1.0,1.0,,4.0,0.0,0.0,1.0,0.0,0.0,7.0,1.0,0.142857,0.377964,9.0,1.0,0.111111,0.333333,0.0,0.0,0.0,0.0,0.051327,0.0,1.0,1.0,,,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.471154,0.175081,2.0,0.0,2.0,,,0.0,0.0,0.0,1.0,1.0,0.354719,0.671886,-0.042432,-0.065725,-0.042432,-0.065725,-0.02818,-0.043712,0.008407,0.014784,0.001800509,0.00432,0.001832,0.009037,0.0,0.0,8.911566000000001e-17,1.190983e-16,2.302585,0.185631,0.00141,0.075949,0.064825,0.061939,0.000471,0.025341,0.02163,0.034459,1.989187e-06,0.009445,0.011394,0.03446,4e-06,0.01059,0.010921,0.0,0.0,0.0,0.0,0.076608,0.0,0.031106,0.026713,-8.881783999999999e-19,0.0,0.577639,8.076455e-18,-1.200002,-0.999,0.999,1.998,0.999,1.044312,0.194756,0.194756,0.130486,0.011618,0.03793,1.035489e-15,False,-1.346991e-16,0.0,-3.330475e-16,0.0,-4.259561e-18,-1.053189e-17,-0.051757,0.051757,-0.052332,0.052332,0.0,0.0,0.0,-0.045489,0.045489,7.0,1.203032,0.1718617,0.282637,9.0,1.254093,0.139344,0.253415,0.001619,0.049442,0.0,1.0,0.036281,9.0,9.0,18.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,1.0,0.142857,0.377964,9.0,1.0,0.111111,0.333333,0.0,0.0,0.0,0.125,0.062928,1.0,1.0,2.0,,,1.0,0.0,0.0,0.0,0.0,0,5,1.877795,0.515907,-1.177585,-0.455735,1.798875,-0.541452,0.11173,-1.046893,0.204935,-0.398325,-0.601781,0.14114,-0.079525,0.107423,0.293352,-0.364222


   dataset variable  pc_original_fisherz_0.01_count_widout_x_y  \
0    00012        0                                        1.0   
1    00027        0                                        2.0   
2    00027        1                                        2.0   
3    00040        0                                        7.0   
4    00040        3                                        7.0   
5    00040        4                                        7.0   
6    00040        5                                        7.0   
7    00040        6                                        7.0   
8    00040        7                                        7.0   
9    00040        8                                        7.0   
10   00055        1                                        8.0   
11   00055        2                                        8.0   
12   00055        3                                        8.0   
13   00055        4                                        8.0   
14   00055

[32m22:43:21[0m [33msave prediction - path=data\prediction.csv[0m
[32m22:43:24[0m check prediction - call=columns_name({})
[32m22:43:24[0m check prediction - call=nans({})
[32m22:43:24[0m check prediction - call=ids({})
[32m22:43:24[0m [31mcheck failed - message=`Different ID(s)`[0m
[32m22:43:24[0m ended
[32m22:43:24[0m [33mduration - time=00:01:14[0m
[32m22:43:24[0m [33mmemory - before="5.72 GB" after="1.69 GB" consumed="-4,331,032,576.00 B"[0m


Download this notebook and submit it to the platform: https://hub.crunchdao.com/competitions/causality-discovery/submit/via/notebook
