# Analysis code of paper results

Code associated with Elmasri, M (2022) Parallel sampling of decomposable graphs using Markov chain on junction tree. see [ArXiv](https://arxiv.org/abs/2209.02008)



To run the code, follow these steps:

1. Install Benchpress from [here](https://benchpressdocs.readthedocs.io/en/latest/).

2. Pull the code from GitHub using the following commands:
    ```
    git clone https://github.com/felixleopoldo/benchpress.git
    cd benchpress
    git checkout paralleldg
    git pull origin paralleldg
    git checkout ba83a10d
    ```

3. Run the code with the bash command:
    ```
    snakemake --cores all --use-singularity --configfile config/elmasri_parallel.json
    ```

4. All results should exist in the folder `benchpress/results`.

5. To analyze the results, run an external Jupyter notebook as follows:
    ```
    cd ../
    git clone git@github.com:melmasri/parallelDG.git
    jupyter notebook
    ```

6. In the browser (normally at localhost:8889), run the notebook `examples/benchpress_analysis.ipynb`.


7. Please change the variable BENCHPRESS_LOC to the location of benchpress results, normally in `benchpress/results/`


## Parameter setup

In [1]:
%load_ext autoreload
%autoreload 2
import os
print(f"current wd: {os.getcwd()}")
from utils import *
import networkx as nx
import matplotlib
import json
from networkx.drawing.nx_pydot import graphviz_layout
sys.path.append('..')
sys.path.append('/Users/m.elmasri/src/parallelDG/parallelDG/')
sns.set_style("whitegrid")
roc_columns = ['adjmat', 'parameters', 'data']

## all simulations
parameters = ['intra-class', 'gwi']
#parameters = ['intra-class']
data = ['n=50/', 'n=100/', 'n=200/', 'n=500/']
adjmat = ['circle', 'lattice', 'random', 'bandmat']

# adjamt renameing
adjamt_rename = {'bandmat': 'AR', 'random': 'random', 'lattice': 'lattice', 'circle': 'circle'}
adjmat_fullname = {
    'cta': 'Christmas Tree Algorithm',
    'bandmat': 'Autoregressive',
    'random' : 'Random',
    'circle' : 'Circle',
    'lattice': 'Lattice'
}

gt13_label = 'GT13-s'
dualgl_label = 'Dual Graph'
mb_label = 'MB09'
bdgraph_label = 'MHG23-BDMCMC'
glass_label = 'G-Lasso'
psi_label = 'psi-Learner'

current wd: /Users/m.elmasri/src/dualgl


# ROC curves

In [4]:
for par in parameters:
    for adj in adjmat: 
        for d in data:
            adjmat_pattern= 'graph=' + adj
            data_pattern  = d
            parameters_pattern = par
            benchmark_pattern = '*' + par + '*'
            patterns = {
                'adjmat': adjmat_pattern,
                'data': data_pattern,
                'parameters': parameters_pattern,
            }
            roc_filename = get_ROCfilenaem(benchmark_pattern)
            roc_file = file_roc_data(roc_filename, patterns)
            print(patterns) 
            dualgp = get_tpr_fpr_threshold(get_model(roc_file, 'dual'))
            gt13_roc = get_tpr_fpr_threshold(get_model(roc_file, 'gts'))
            mb = get_tpr_fpr_threshold(get_model(roc_file, 'mb'))
            bdgraph = get_tpr_fpr_threshold(get_model(roc_file, 'bdgraph'))
            glass = get_tpr_fpr_threshold(get_model(roc_file, 'glass'))
            psi = get_tpr_fpr_threshold(get_model(roc_file, 'psi'))


            # Plotting the ROC curve
            plt.figure()
            plt.plot(dualgp[0], dualgp[1], lw=1, label=dualgl_label)
            plt.plot(mb[0], mb[1], lw=1,linestyle=':', label=mb_label)
            plt.plot(glass[0], glass[1], lw=1,linestyle=':', label=glass_label)
            plt.plot(psi[0], psi[1], lw=1, linestyle = '--',label=psi_label)
            plt.plot(gt13_roc[0], gt13_roc[1], lw=1, linestyle='-.', label=gt13_label)
            plt.plot(bdgraph[0], bdgraph[1], lw=1,linestyle='-.', label=bdgraph_label)
            plt.xlabel('Average False Positive Rate')
            plt.ylabel('Average True Positive Rate')
            # plt.title('Receiver Operating Characteristic')
            plt.legend(loc="lower right", title = get_title(patterns))
            save_location('roc_graph' + get_title(patterns))
            plt.close()



{'adjmat': 'graph=circle', 'data': 'n=50/', 'parameters': 'intra-class'}
save to: img/roc_graphn=50, graph=graph=circle, param=intra-class.jpg
{'adjmat': 'graph=circle', 'data': 'n=100/', 'parameters': 'intra-class'}
save to: img/roc_graphn=100, graph=graph=circle, param=intra-class.jpg
{'adjmat': 'graph=circle', 'data': 'n=200/', 'parameters': 'intra-class'}
save to: img/roc_graphn=200, graph=graph=circle, param=intra-class.jpg
{'adjmat': 'graph=circle', 'data': 'n=500/', 'parameters': 'intra-class'}
save to: img/roc_graphn=500, graph=graph=circle, param=intra-class.jpg
{'adjmat': 'graph=lattice', 'data': 'n=50/', 'parameters': 'intra-class'}
save to: img/roc_graphn=50, graph=graph=lattice, param=intra-class.jpg
{'adjmat': 'graph=lattice', 'data': 'n=100/', 'parameters': 'intra-class'}
save to: img/roc_graphn=100, graph=graph=lattice, param=intra-class.jpg
{'adjmat': 'graph=lattice', 'data': 'n=200/', 'parameters': 'intra-class'}
save to: img/roc_graphn=200, graph=graph=lattice, param

# Trace plots of likliehood

In [5]:
for f in gt13_files:
    score_traj_plot(f)
#plt.ylim((0, 2400))
plt.title(gt13_label + ', ' + get_title(patterns))
save_location('gt_traj_'+ get_title(patterns))
#plt.show()

NameError: name 'gt13_files' is not defined

In [None]:
for f in bdgraph_files:
    score_traj_plot(f)
#plt.ylim((0, 2400))
plt.title(bdgraph_label + ', ' + get_title(patterns))
save_location('bdgraph_traj_'+ get_title(patterns))
#plt.show()


  columns=df2.columns).fillna(method="ffill")
  columns=df2.columns).fillna(method="ffill")
  columns=df2.columns).fillna(method="ffill")
  columns=df2.columns).fillna(method="ffill")
  columns=df2.columns).fillna(method="ffill")


save to: img/bdgraph_traj_n=100, graph=circle, param=intra-class.jpg


# Trace plot of number of edges

In [None]:
for f in gt13_files:
    size_traj_plot(f)
plt.title(gt13_label + ', ' + get_title(patterns))
save_location('gt_size_' + get_title(patterns))

  columns=df2.columns).fillna(method="ffill")
  columns=df2.columns).fillna(method="ffill")
  columns=df2.columns).fillna(method="ffill")
  columns=df2.columns).fillna(method="ffill")
  columns=df2.columns).fillna(method="ffill")


save to: img/gt_size_n=100, graph=circle, param=intra-class.jpg


In [None]:
for f in bdgraph_files:
    size_traj_plot(f)
plt.title(bdgraph_label + ', ' + get_title(patterns))
save_location('bdgraph_size_'+ get_title(patterns))

  columns=df2.columns).fillna(method="ffill")
  columns=df2.columns).fillna(method="ffill")
  columns=df2.columns).fillna(method="ffill")


save to: img/bdgraph_size_n=100, graph=circle, param=intra-class.jpg


  columns=df2.columns).fillna(method="ffill")
  columns=df2.columns).fillna(method="ffill")


: 

# Heatmap

In [None]:
k=0
for f in gt13_files:
    plot_heatmap(f, cbar=False)
    plt.title(gt13_label +  get_title(patterns) + f" Graph {k+1}")
    save_location('gt_heatmap_traj_' + get_title(patterns) + str(k))
    k+=1

save to: img/gt_heatmap_traj_n=100, graph=circle, param=intra-class0.jpg
save to: img/gt_heatmap_traj_n=100, graph=circle, param=intra-class1.jpg
save to: img/gt_heatmap_traj_n=100, graph=circle, param=intra-class2.jpg
save to: img/gt_heatmap_traj_n=100, graph=circle, param=intra-class3.jpg
save to: img/gt_heatmap_traj_n=100, graph=circle, param=intra-class4.jpg


: 

In [None]:
k=0
for f in bdgraph_files:
    plot_heatmap(f, cbar=False)
    plt.title(bdgraph_label + get_title(patterns)+ f" Graph {k+1}")
    save_location('bdgraph_heatmap_traj_' + get_title(patterns) + str(k))
    k+=1

save to: img/bdgraph_heatmap_traj_n=100, graph=circle, param=intra-class0.jpg
save to: img/bdgraph_heatmap_traj_n=100, graph=circle, param=intra-class1.jpg
save to: img/bdgraph_heatmap_traj_n=100, graph=circle, param=intra-class2.jpg
save to: img/bdgraph_heatmap_traj_n=100, graph=circle, param=intra-class3.jpg
save to: img/bdgraph_heatmap_traj_n=100, graph=circle, param=intra-class4.jpg


In [None]:
k=0
for f in true_graph_files:
    plot_graph(read_csv_from_tar_gz(f), cbar=False)
    plt.title(f"Graph {k+1}" + ' - ' +  patterns['adjmat'])
    save_location('true_graph_traj_' +  patterns['adjmat'] + str(k))
    plt.clf()
    k+=1

save to: img/true_graph_traj_graph=circle0.jpg
save to: img/true_graph_traj_graph=circle1.jpg
save to: img/true_graph_traj_graph=circle2.jpg
save to: img/true_graph_traj_graph=circle3.jpg
save to: img/true_graph_traj_graph=circle4.jpg


In [None]:
k=0
for f in true_graph_files:
    plot_nx_graph(read_csv_from_tar_gz(f))
    plt.title(f"Graph {k+1}" + ' - ' + patterns['adjmat'])
    save_location('true_graph_' + patterns['adjmat']+ str(k))
    plt.clf()
    k+=1

save to: img/true_graph_graph=circle0.jpg
save to: img/true_graph_graph=circle1.jpg
save to: img/true_graph_graph=circle2.jpg
save to: img/true_graph_graph=circle3.jpg
save to: img/true_graph_graph=circle4.jpg


# Plotting Max Score graph

In [None]:
k=0
for f in gt13_files:
    plot_max_score_graph(read_csv_from_tar_gz(f))
    plt.title(gt13_label +  get_title(patterns) + f" Graph {k+1}")
    save_location('gt_max_score_' + get_title(patterns) + str(k))
    k+=1

save to: img/gt_max_score_n=100, graph=circle, param=intra-class0.jpg
save to: img/gt_max_score_n=100, graph=circle, param=intra-class1.jpg
save to: img/gt_max_score_n=100, graph=circle, param=intra-class2.jpg
save to: img/gt_max_score_n=100, graph=circle, param=intra-class3.jpg
save to: img/gt_max_score_n=100, graph=circle, param=intra-class4.jpg


In [None]:
k=0
for f in bdgraph_files:
    plot_max_score_graph(read_csv_from_tar_gz(f))
    plt.title(bdgraph_label + get_title(patterns)+ f" Graph {k+1}")
    save_location('bdgraph_max_score_' + get_title(patterns) + str(k))
    k+=1

save to: img/bdgraph_max_score_n=100, graph=circle, param=intra-class0.jpg
save to: img/bdgraph_max_score_n=100, graph=circle, param=intra-class1.jpg
save to: img/bdgraph_max_score_n=100, graph=circle, param=intra-class2.jpg
save to: img/bdgraph_max_score_n=100, graph=circle, param=intra-class3.jpg
save to: img/bdgraph_max_score_n=100, graph=circle, param=intra-class4.jpg


# Time calculation

In [None]:
def read_time(filename):
    # Open the file
    with open(filename, 'r') as file:
        content = file.read()
    number = float(content.strip())  # Use int() if the number is an integer
    return number

: 

In [None]:
## Extrat location of datafile
graph_model = 'bandmat' #'cta'  # either cta or bandmat
df = pd.DataFrame()
for graph_model in ['cta', 'bandmat']:
    ## Extrat location of datafile
    paralleldg_files = algo_files(BENCHPRESS_LOC, 'parallel', ['parallel=True', graph_model, 'time.txt'])
    paralleldg_files = [f for f in paralleldg_files if 'subindex' not in f]
    paralleldg_files_sub = algo_files(BENCHPRESS_LOC, 'parallel', ['subindex.csv', 'parallel=True', graph_model])
    gt13_files = algo_files(BENCHPRESS_LOC, 'athomas', ['sampler=2', graph_model, 'time.txt', 'full_output=False'])
    gt13_files = [f for f in gt13_files if 'subindex' not in f]


    paralleldg_files_single_move = algo_files(BENCHPRESS_LOC, 'parallel', ['parallel=False', graph_model,  'time.txt'])
    gt13_single_move_files = algo_files(BENCHPRESS_LOC, 'athomas', ['sampler=1', graph_model,  'time.txt', 'full_output=False'])
    gt13_single_move_files = [f for f in gt13_single_move_files if 'subindex' not in f]

    gg99_files = algo_files(BENCHPRESS_LOC, 'athomas', ['sampler=0', graph_model, 'time.txt', 'full_output=False'])
    gg99_files = [f for f in gg99_files if 'subindex' not in f]

    true_graph_files = algo_files(BENCHPRESS_LOC, 'adjmat/',[graph_model, '.csv'])

    trilearn_files = algo_files(BENCHPRESS_LOC, 'pgibbs', [graph_model, 'time.txt', 'M=1000'])
    time_dict = dict()
    time_list = []
    for f in paralleldg_files:
        time_list.append(read_time(f)/3000.0)
    time_dict['Parallel'] = time_list


    time_list = []
    for f in paralleldg_files_single_move:
        time_list.append(read_time(f)/2000.0)
    time_dict['Single-move'] = time_list

    time_list = []
    for f in gt13_files:
        time_list.append(read_time(f)/1000.0)
    time_dict['GT13-m'] = time_list

    time_list = []
    for f in gt13_single_move_files:
        time_list.append(read_time(f)/1000.0)
    time_dict['GT13-s'] = time_list

    time_list = []
    for f in trilearn_files:
        time_list.append(read_time(f)/10.0)
    time_dict['O19'] = time_list
    
    time_list = []
    for f in gg99_files:
        time_list.append(read_time(f)/1000.0)
    time_dict['GG99'] = time_list
    df1 = pd.DataFrame(time_dict)
    df1['graph'] = model_fullname[graph_model]
    df = pd.concat([df, df1])

In [None]:
df_melted = df.melt(id_vars='graph', var_name='measurement')
df1 = df_melted[df_melted.measurement != 'O19']
# figure styles
#sns.set_style('white')
sns.set_context('paper', font_scale=1.2)
plt.figure(figsize=(8, 5))
sns.set_style('ticks', {'axes.edgecolor': 'black',  
                        'xtick.color': 'black',
                        'ytick.color': 'black'})
# Plotting
palette = ['white', '#C1CDCD']
#plt.figure(figsize=(6, 4))
ax = sns.boxplot(x='measurement', y='value', 
                 hue='graph', 
                 data=df1,
                linewidth=0.8, 
                palette=palette)
ax.set_ylabel('Average time (sec) for 1000 updates.')
ax.set_xlabel('')
save_location('time_boxplot')
plt.show()


save to: /home/mo/src/decomposable_paper/benchpress/img/time_boxplot_bandmat.jpg


<Figure size 576x360 with 0 Axes>

In [None]:
print(df.describe().to_latex(index=True, float_format="%.2f"))

\begin{tabular}{lrrrrrr}
\toprule
{} &  Parallel &  Single-move &  GT13-m &  GT13-s &     O19 &  GG99 \\
\midrule
count &     20.00 &        20.00 &   20.00 &   20.00 &   20.00 & 20.00 \\
mean  &      0.20 &         0.26 &    0.04 &    0.19 & 3810.16 &  0.06 \\
std   &      0.04 &         0.03 &    0.01 &    0.04 &  122.58 &  0.00 \\
min   &      0.14 &         0.20 &    0.03 &    0.12 & 3568.87 &  0.06 \\
25\%   &      0.17 &         0.23 &    0.03 &    0.16 & 3707.67 &  0.06 \\
50\%   &      0.20 &         0.26 &    0.04 &    0.19 & 3811.52 &  0.06 \\
75\%   &      0.22 &         0.28 &    0.05 &    0.24 & 3917.07 &  0.07 \\
max   &      0.30 &         0.32 &    0.05 &    0.25 & 3986.64 &  0.07 \\
\bottomrule
\end{tabular}



# Run all

In [None]:
patterns = {'adjmat': 'graph=circle', 'data': 'n=50/', 'parameters': 'intra-class'}
benchmark_pattern = '*' + par + '*'
roc_filename = get_ROCfilenaem(benchmark_pattern)
roc_file = file_roc_data(patterns, roc_filename)

filter_list = ['adjmat_estimate/',patterns['data'], patterns['parameters'], patterns['adjmat']]
gt13_files = algo_files(BENCHPRESS_LOC, 'algorithm=/athomas_jtsampler', 
                [patterns['data'], 
                patterns['parameters'], 
                patterns['adjmat'],
                'adjvecs.tar.gz',
                ])
bdgraph_files = algo_files(BENCHPRESS_LOC, 'algorithm=/bdgraph', 
            [ 
                patterns['data'], 
                patterns['parameters'], 
                patterns['adjmat'],
                '.tar.gz',
            ])
psilearner_files = algo_files(BENCHPRESS_LOC, 'algorithm=/equsa_psilearner', filter_list)
huge_glasso_files = algo_files(BENCHPRESS_LOC, 'algorithm=/huge_glasso', filter_list)
huge_mb_files = algo_files(BENCHPRESS_LOC, 'algorithm=/huge_mb', filter_list)
dualgl_files = algo_files(BENCHPRESS_LOC, 'algorithm=/dualgl', filter_list)
true_graph_files = algo_files(BENCHPRESS_LOC, 'adjmat/',[patterns['adjmat'], '.csv'])
print(f"Number of files (should be equal)  \n \
                GT13 \t\t\t\t{len(gt13_files)} \n \
                dualGL \t\t\t{len(dualgl_files)} \n \
                equsa_psilearner \t\t{len(psilearner_files)} \n \
                huge_glasso \t\t{len(huge_glasso_files)} \n \
                huge_mb \t\t{len(huge_mb_files)} \n \
                True graph files \t \t \t{len(true_graph_files)}")

Number of files (should be equal)  
                 GT13 				5 
                 dualGL 			65 
                 equsa_psilearner 		65 
                 huge_glasso 		45 
                 huge_mb 		45 
                 True graph files 	 	 	5


In [None]:
patterns, roc_filename

({'adjmat': 'graph=circle', 'data': 0.92, 'parameters': 'intra-class'},
 'results/output/benchmarks/dualGraph_p200_sparse_intra-class/ROC_data.csv')

In [None]:
parameters = ['intra-class', 'gwi']
parameters = ['gwi']
data = ['n=50/', 'n=100/', 'n=200/', 'n=500/']
adjmat = ['circle', 'random', 'lattice', 'bandmat']
adjmat = ['bandmat']
for par in parameters:
    for adj in adjmat: 
        for d in data:
            adjmat_pattern= adj
            data_pattern  = d
            parameters_pattern = par
            benchmark_pattern = '*' + par + '*'
            patterns = {
                'adjmat': adjmat_pattern,
                'data': data_pattern,
                'parameters': parameters_pattern,
            }
            roc_filename = get_ROCfilenaem(benchmark_pattern)
            roc_file = file_roc_data(roc_filename, patterns)       
            print(patterns)
            filter_list = ['adjmat_estimate/',patterns['data'], patterns['parameters'], patterns['adjmat']]
            if 'random' in patterns['adjmat']:
               filter_list += ['prob=0.01'] 
            gt13_files = algo_files(BENCHPRESS_LOC, 'algorithm=/athomas_jtsampler', 
               filter_list[1:] + ['adjvecs.tar.gz'])
            bdgraph_files = algo_files(BENCHPRESS_LOC, 'algorithm=/bdgraph', 
            filter_list[1:] + [ '.tar.gz'])
            
            psilearner_files = algo_files(BENCHPRESS_LOC, 'algorithm=/equsa_psilearner', filter_list)
            huge_glasso_files = algo_files(BENCHPRESS_LOC, 'algorithm=/huge_glasso', filter_list)
            huge_mb_files = algo_files(BENCHPRESS_LOC, 'algorithm=/huge_mb', filter_list)
            dualgl_files = algo_files(BENCHPRESS_LOC, 'algorithm=/dualgl', filter_list)
            if 'random' in patterns['adjmat']:
                true_graph_files = algo_files(BENCHPRESS_LOC, 'adjmat/',[patterns['adjmat'], 'prob=0.01', '.csv'])
            else:
                true_graph_files = algo_files(BENCHPRESS_LOC, 'adjmat/',[patterns['adjmat'], '.csv'])

            print(f"Number of files (should be equal)  \n \
                GT13 \t\t\t\t{len(gt13_files)} \n \
                dualGL \t\t\t{len(dualgl_files)} \n \
                equsa_psilearner \t\t{len(psilearner_files)} \n \
                bdgraph \t\t{len(bdgraph_files)} \n \
                huge_glasso \t\t{len(huge_glasso_files)} \n \
                huge_mb \t\t{len(huge_mb_files)} \n \
                True graph files \t \t \t{len(true_graph_files)}")
            
            # tajectory plot
            for f in gt13_files:
                score_traj_plot(f)
                #plt.ylim((0, 2400))
            plt.title(gt13_label + ', ' + get_title(patterns))
            save_location('gt_traj_'+ get_title(patterns))

            for f in bdgraph_files:
                score_traj_plot(f)
            plt.title(bdgraph_label + ', ' + get_title(patterns))
            save_location('bdgraph_traj_'+ get_title(patterns))

            # size traj plot
            for f in gt13_files:
                size_traj_plot(f)
            plt.title(gt13_label + ', ' + get_title(patterns))
            save_location('gt_size_' + get_title(patterns))

            for f in bdgraph_files:
                size_traj_plot(f)
            plt.title(bdgraph_label + ', ' + get_title(patterns))
            save_location('bdgraph_size_'+ get_title(patterns))
            
            # heatmaps
            k=0
            for f in gt13_files: 
                plot_heatmap(f, cbar=False)
                plt.title(gt13_label +  get_title(patterns) + f" Graph {k+1}")
                save_location('gt_heatmap_traj_' + get_title(patterns) + str(k))
                k+=1
            
            k=0
            for f in bdgraph_files:
                plot_heatmap(f, cbar=False)
                plt.title(bdgraph_label + get_title(patterns)+ f" Graph {k+1}")
                save_location('bdgraph_heatmap_traj_' + get_title(patterns) + str(k))
                k+=1

            # true graph
            k=0
            for f in true_graph_files:
                plot_graph(read_csv_from_tar_gz(f), cbar=False)
                plt.title(f"Graph {k+1}" + ' - ' +  patterns['adjmat'])
                save_location('true_graph_traj_' +  patterns['adjmat'] + str(k))
                plt.clf()
                k+=1

            k=0
            for f in true_graph_files:
                plot_nx_graph(read_csv_from_tar_gz(f))
                plt.title(f"Graph {k+1}" + ' - ' + patterns['adjmat'])
                save_location('true_graph_' + patterns['adjmat']+ str(k))
                plt.clf()
                k+=1
            
            # plot max score graph
            k=0
            for f in gt13_files:
                plot_max_score_graph(read_csv_from_tar_gz(f))
                plt.title(gt13_label +  get_title(patterns) + f" Graph {k+1}")
                save_location('gt_max_score_' + get_title(patterns) + str(k))
                k+=1
            
            k=0
            for f in bdgraph_files:
                plot_max_score_graph(read_csv_from_tar_gz(f))
                plt.title(bdgraph_label + get_title(patterns)+ f" Graph {k+1}")
                save_location('bdgraph_max_score_' + get_title(patterns) + str(k))
                k+=1

            # Save decomposbale cover
            k = 0
            combined_list = []
            dict_to_save = dict()
            for est_traj, true_graph in zip(gt13_files, true_graph_files):
                print(est_traj)
                print(true_graph)
                true_adj = read_csv_from_tar_gz(true_graph)
                t_g = nx.from_numpy_array(true_adj.to_numpy())
                est_adj = read_csv_from_tar_gz(est_traj)
                res = get_decomposable_cover(est_adj, t_g, 0.5)
                # Calculate quartiles
                combined_list.append(res)
            patterns['percnt_cover'] = combined_list

            # Save the dictionary to a JSON file
            filename = get_title(patterns)
            with open(SAVE_LOC+'decomposable_cover'+filename+'.json', 'w') as f:
                json.dump(patterns, f, indent=4) 
    

{'adjmat': 'bandmat', 'data': 'n=50/', 'parameters': 'gwi'}
Number of files (should be equal)  
                 GT13 				5 
                 dualGL 			65 
                 equsa_psilearner 		65 
                 bdgraph 		5 
                 huge_glasso 		45 
                 huge_mb 		45 
                 True graph files 	 	 	5
save to: img/gt_traj_n=50, graph=bandmat, param=gwi.jpg
save to: img/bdgraph_traj_n=50, graph=bandmat, param=gwi.jpg
save to: img/gt_size_n=50, graph=bandmat, param=gwi.jpg
save to: img/bdgraph_size_n=50, graph=bandmat, param=gwi.jpg
save to: img/gt_heatmap_traj_n=50, graph=bandmat, param=gwi0.jpg
save to: img/gt_heatmap_traj_n=50, graph=bandmat, param=gwi1.jpg
save to: img/gt_heatmap_traj_n=50, graph=bandmat, param=gwi2.jpg
save to: img/gt_heatmap_traj_n=50, graph=bandmat, param=gwi3.jpg
save to: img/gt_heatmap_traj_n=50, graph=bandmat, param=gwi4.jpg
save to: img/bdgraph_heatmap_traj_n=50, graph=bandmat, param=gwi0.jpg
save to: img/bdgraph_heatmap_traj_n=50

In [2]:
import json
def read_json_and_extract_list(file_path):
  """
  Reads a JSON file with a name tag and a list, and returns the list.

  Args:
    file_path: The path to the JSON file.

  Returns:
    The list extracted from the JSON file.
  """
  with open(file_path, 'r') as f:
    data = json.load(f)
  return data

# plotting decomposable coverage percentage

In [4]:
import json
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# Assuming these functions are defined elsewhere
# from your_module import get_title, read_json_and_extract_list, SAVE_LOC

parameters = ['gwi']
data = ['n=50/', 'n=100/', 'n=200/', 'n=500/']
adjmat = ['random', 'lattice', 'circle','bandmat']
data_labels = [50, 100, 200, 500]

line_type = {
    'circle': '-', 
    'random': '--', 
    'lattice': ':', 
    'bandmat': '-.'
}

line_color = {
    'circle': 'black', 
    'random': 'red', 
    'lattice': 'blue', 
    'bandmat': 'green'
}

fill_colors = {
    'circle': 'darkgrey',
    'random': 'white',
    'lattice': 'lightgrey',
    'bandmat': 'black'
}

# Create a list to store all data
all_data = []

for adj in adjmat:
    for d, n in zip(data, data_labels):
        for par in parameters:
            patterns = {
                'adjmat': adj,
                'data': d,
                'parameters': par,
            }
            filename = get_title(patterns)
            file_path = SAVE_LOC + 'decomposable_cover' + filename + '.json'
            data_list = read_json_and_extract_list(file_path)['percnt_cover']
            data_list = data_list[0]

            # Calculate the mean of the data
            # data_list = [np.mean(i) for i in zip(*data_list)]


            print(f"adj {adj}: num sampels {len(data_list)}")
            # Append data to all_data list
            all_data.extend([{'Graph Type': adj, 'Number of samples': n, 'Percent of decomposable cover': value} for value in data_list])

# Convert to DataFrame
df = pd.DataFrame(all_data)

# Set up the matplotlib figure
plt.figure(figsize=(7, 4))

# Create the boxplot using seaborn
sns.boxplot(x='Number of samples', y='Percent of decomposable cover', hue='Graph Type', 
            data=df, palette=fill_colors, linewidth=1,
            boxprops=dict(edgecolor='black'),
            fliersize=0)

# Customize the plot
plt.xlabel('Number of samples (n)')
plt.ylabel('Averate true positive rate')
plt.legend(title='Graph Type', loc='lower right')
plt.ylim(0, 1)
save_location('decomposable_cover')
# Show the plot
#plt.show()

adj random: num sampels 4999989
adj random: num sampels 5000005
adj random: num sampels 5000001
adj random: num sampels 4999986
adj lattice: num sampels 5000000
adj lattice: num sampels 5000000
adj lattice: num sampels 5000001
adj lattice: num sampels 5000007
adj circle: num sampels 5000002
adj circle: num sampels 4999999
adj circle: num sampels 5000015
adj circle: num sampels 5000001
adj bandmat: num sampels 4999992
adj bandmat: num sampels 4999999
adj bandmat: num sampels 4999993
adj bandmat: num sampels 5000029
save to: img/decomposable_cover.jpg


: 