In [27]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df_results_list = []
#list files in directory:
results_dir = 'results-main'

In [28]:
for file in os.listdir(results_dir):
    df_loc = pd.read_csv(os.path.join(results_dir, file))
    df_results_list.append(df_loc)
big_perf_df = pd.concat(df_results_list)

n_samples_of_plot = 10000
big_perf_df = big_perf_df[big_perf_df['n_samples']==n_samples_of_plot]

big_perf_df['algo'].replace({'ICP+DT': 'ICP'}, inplace=True)
algos_to_keep = ['SCM', 'DT', 'ICP', 'ICSCM']
big_perf_df = big_perf_df[big_perf_df['algo'].isin(algos_to_keep)]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  big_perf_df['algo'].replace({'ICP+DT': 'ICP'}, inplace=True)


In [None]:
perf_df_causalscore = big_perf_df[big_perf_df['type'] == 'causal']
perf_df_causalscore = perf_df_causalscore[perf_df_causalscore['metric'] == '01 loss']

min_n_splits = 100
for a in set(perf_df_causalscore['algo']):
    perf_df_causalscore_a = perf_df_causalscore[perf_df_causalscore['algo'] == a]
    for n in set(perf_df_causalscore['n_var']):
        perf_df_causalscore_a_n = perf_df_causalscore_a[perf_df_causalscore_a['n_var'] == n]
        splits = set(perf_df_causalscore_a_n['split'])
        print(a, n, len(splits))
perf_df_causalscore = perf_df_causalscore[perf_df_causalscore['split'].isin(list(range(min_n_splits)))]

plt.figure(figsize=(5, 3))
perf_df_causalscore['cardinality of Xb'] = perf_df_causalscore['n_var']
del perf_df_causalscore['n_var']
perf_df_causalscore_small = perf_df_causalscore[['algo', 'score', 'split', 'cardinality of Xb']]
heatmap_df = perf_df_causalscore_small.groupby(['algo', 'cardinality of Xb']).mean().reset_index().pivot(index='algo', columns='cardinality of Xb', values='score')
heatmap_df = heatmap_df.reindex(algos_to_keep)
print(heatmap_df.index)

sns.heatmap(heatmap_df, annot=True, cmap='Greens')
plt.title('proportion of identification of causal parents over {} splits'.format(min_n_splits), y=1.00, fontsize=10)
plt.show()

In [31]:
# latex table
table = "\\begin{tabular}{lrrrrrrr}\n"
table += "\\toprule\n"
table += "\\textbf{Model} & 1 & 2 & 3 & 4 & 5 & 6 & 7 \\\\\n"
table += "\\midrule\n"
for algo in algos_to_keep:
    table += f"{algo} & "
    for n in range(1, 8):
        score = heatmap_df.loc[algo, n]
        if score < 0.5:
            table += f"\\red{{{score:.2f}}} & "
        else:
            table += f"{score:.2f} & "
    table = table[:-2] + " \\\\\n"
table += "\\bottomrule\n"
table += "\\end{tabular}"

print(table)
#save table in a .tex file
with open('figures/identification-table.tex', 'w') as f:
    f.write(table)

\begin{tabular}{lrrrrrrr}
\toprule
\textbf{Model} & 1 & 2 & 3 & 4 & 5 & 6 & 7 \\
\midrule
SCM & \red{0.00} & \red{0.00} & \red{0.00} & \red{0.00} & \red{0.00} & \red{0.00} & \red{0.00}  \\
DT & \red{0.00} & \red{0.00} & \red{0.00} & \red{0.00} & \red{0.00} & \red{0.00} & \red{0.00}  \\
ICP & 0.96 & 0.98 & 0.99 & 0.99 & 0.97 & \red{0.00} & \red{0.00}  \\
ICSCM & 0.96 & 0.97 & 0.99 & 0.96 & 0.97 & 0.96 & 0.97  \\
\bottomrule
\end{tabular}
