## Counting simplex-containing DAG2.

In this notebook, we will try to count all simplex-containing DAG2, that is DAG2 robust motifs on n nodes that contain an n-1 simplex.

### Imports 

Core imports

In [1]:
import multiprocessing as mp
from robust_motifs.custom_mp import prepare_shared_memory
from robust_motifs.counting import get_dag2_signature, get_element_targets
from robust_motifs.data import import_connectivity_matrix, save_er_graph, load_sparse_matrix_from_pkl

File tools

In [2]:
from pathlib import Path
import pickle
import h5py

Drawing tools

In [3]:
from robust_motifs.plot import plot_matrices
from robust_motifs.utilities import get_pos

Other tools

In [4]:
import scipy.sparse as sp
import numpy as np
from itertools import product
from time import time
import os
from tqdm import tqdm

In [15]:
pool = mp.Pool()

## Comparison with Erdos-Reyni graph, random simplices

### Creating ER graphs

In [5]:
import_connectivity_matrix(dataframe = False, type = 'csr')

100%|██████████| 55/55 [00:11<00:00,  4.65it/s]


<31346x31346 sparse matrix of type '<class 'numpy.bool_'>'
	with 7648079 stored elements in Compressed Sparse Row format>

In [12]:
n_nodes = 31346
density = 7648079/31346/31346

In [13]:
density

0.007783736164455195

In [14]:
path = Path("data/extended_simplices/full/ER_" + str(n_nodes) + ".flag")
path.parent.mkdir(parents=True, exist_ok = True)
save_er_graph(path, n_nodes, density)
os.system("flagser-count data/extended_simplices/full/ER_" +str(n_nodes)+".flag --out data/extended_simplices/full/ER_"+str(n_nodes)+"-count.h5")

100%|██████████| 31346/31346 [00:00<00:00, 3456857.29it/s]
7648630it [00:11, 673239.00it/s]


0

In [16]:
for _ in range(10):
    result_dictionary = {}
    aux_dictionary = {}
    file_path = Path("data/extended_simplices/full/ER_" + str(n_nodes) + ".flag")
    matrix_path = Path("data/extended_simplices/full/ER_" + str(n_nodes) + ".pkl")
    complex_path = Path("data/extended_simplices/full/ER_" + str(n_nodes) + "-count.h5")
    complex_file = h5py.File(complex_path, 'r')
    matrix = load_sparse_matrix_from_pkl(matrix_path)
    arrays, links = prepare_shared_memory(matrix, str(n_nodes))    
    for dimension in tqdm(range(1, 7)):
        try:
            random_selection = np.random.choice(complex_file["Cells_" + str(dimension)].shape[0],
                                                min(10000,complex_file["Cells_" + str(dimension)].shape[0]),
                                                replace = False)
            random_selection.sort()
            simplex_iterator = iter(complex_file["Cells_" + str(dimension)][random_selection])
            simplex_dictionary = {}
            for simplex in simplex_iterator:
                simplex_dictionary[simplex[-1]] = simplex_dictionary.get(simplex[-1], []) + [simplex]
            mp_iterator = product(simplex_dictionary.keys(), [arrays])
            results = pool.imap(get_element_targets, mp_iterator)
            for elem, key in zip(results, simplex_dictionary.keys()):
                result_dictionary[dimension] = result_dictionary.get(dimension, 0) + len(elem)*len(simplex_dictionary[key])
        except KeyError:
                result_dictionary[dimension] = 0
        try:
            aux_dictionary[dimension] = len(complex_file["Cells_" + str(dimension)])
        except KeyError:
            aux_dictionary[dimension] = 0           
    print(result_dictionary)
    print(aux_dictionary)
    for link in links:
        link.unlink()

100%|██████████| 6/6 [01:23<00:00, 13.97s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 18924, 2: 18831, 3: 18873, 4: 29, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 6/6 [01:23<00:00, 13.99s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 18963, 2: 19017, 3: 18902, 4: 29, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 6/6 [01:24<00:00, 14.01s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 18843, 2: 18844, 3: 19126, 4: 29, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 6/6 [01:21<00:00, 13.63s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 18728, 2: 19039, 3: 19103, 4: 29, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 6/6 [01:24<00:00, 14.15s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 18790, 2: 19029, 3: 18817, 4: 29, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 6/6 [01:22<00:00, 13.80s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 18970, 2: 18711, 3: 18947, 4: 29, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 6/6 [01:22<00:00, 13.68s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 18928, 2: 18668, 3: 19024, 4: 29, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 6/6 [01:25<00:00, 14.32s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 18801, 2: 18636, 3: 19087, 4: 29, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 6/6 [01:23<00:00, 13.95s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 18728, 2: 19000, 3: 18733, 4: 29, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 6/6 [01:27<00:00, 14.63s/it]

{1: 19151, 2: 19059, 3: 19138, 4: 29, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}





## Comparison with ER graph: random simplices, random instances. 

In [17]:
for _ in range(10):
    path = Path("data/extended_simplices/full/ER_" + str(n_nodes) + ".flag")
    path.parent.mkdir(parents=True, exist_ok = True)
    save_er_graph(path, n_nodes, density)
    os.system("flagser-count data/extended_simplices/full/ER_" +str(n_nodes)+".flag --out data/extended_simplices/full/ER_"+str(n_nodes)+"-count.h5")
    result_dictionary = {}
    aux_dictionary = {}
    file_path = Path("data/extended_simplices/full/ER_" + str(n_nodes) + ".flag")
    matrix_path = Path("data/extended_simplices/full/ER_" + str(n_nodes) + ".pkl")
    complex_path = Path("data/extended_simplices/full/ER_" + str(n_nodes) + "-count.h5")
    complex_file = h5py.File(complex_path, 'r')
    matrix = load_sparse_matrix_from_pkl(matrix_path)
    arrays, links = prepare_shared_memory(matrix, str(n_nodes))    
    for dimension in tqdm(range(1, 7)):
        try:
            random_selection = np.random.choice(complex_file["Cells_" + str(dimension)].shape[0],
                                                min(10000,complex_file["Cells_" + str(dimension)].shape[0]),
                                                replace = False)
            random_selection.sort()
            simplex_iterator = iter(complex_file["Cells_" + str(dimension)][random_selection])
            simplex_dictionary = {}
            for simplex in simplex_iterator:
                simplex_dictionary[simplex[-1]] = simplex_dictionary.get(simplex[-1], []) + [simplex]
            mp_iterator = product(simplex_dictionary.keys(), [arrays])
            results = pool.imap(get_element_targets, mp_iterator)
            for elem, key in zip(results, simplex_dictionary.keys()):
                result_dictionary[dimension] = result_dictionary.get(dimension, 0) + len(elem)*len(simplex_dictionary[key])
        except KeyError:
                result_dictionary[dimension] = 0
        try:
            aux_dictionary[dimension] = len(complex_file["Cells_" + str(dimension)])
        except KeyError:
            aux_dictionary[dimension] = 0           
    print(result_dictionary)
    print(aux_dictionary)
    for link in links:
        link.unlink()

100%|██████████| 31346/31346 [00:00<00:00, 3386776.23it/s]
7644508it [00:11, 672087.37it/s]
100%|██████████| 6/6 [01:22<00:00, 13.76s/it]


{1: 19263, 2: 18962, 3: 19092, 4: 35, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 31346/31346 [00:00<00:00, 3440303.88it/s]
7649650it [00:11, 659876.94it/s]
100%|██████████| 6/6 [01:22<00:00, 13.72s/it]


{1: 18926, 2: 18830, 3: 18641, 4: 39, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 31346/31346 [00:00<00:00, 3332184.03it/s]
7646783it [00:11, 653319.08it/s]
100%|██████████| 6/6 [01:21<00:00, 13.66s/it]


{1: 18952, 2: 18851, 3: 19113, 4: 45, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 31346/31346 [00:00<00:00, 3465970.35it/s]
7645591it [00:11, 667678.59it/s]
100%|██████████| 6/6 [01:22<00:00, 13.73s/it]


{1: 18755, 2: 18668, 3: 18756, 4: 30, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 31346/31346 [00:00<00:00, 3460678.93it/s]
7647675it [00:11, 655573.01it/s]
100%|██████████| 6/6 [01:21<00:00, 13.64s/it]


{1: 18806, 2: 18592, 3: 18986, 4: 37, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 31346/31346 [00:00<00:00, 3440934.16it/s]
7650312it [00:11, 667089.71it/s]
100%|██████████| 6/6 [01:20<00:00, 13.48s/it]


{1: 19058, 2: 18884, 3: 18967, 4: 37, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 31346/31346 [00:00<00:00, 3458676.06it/s]
7647188it [00:11, 647960.50it/s]
100%|██████████| 6/6 [01:22<00:00, 13.73s/it]


{1: 19079, 2: 18744, 3: 19009, 4: 30, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 31346/31346 [00:00<00:00, 3452772.03it/s]
7648804it [00:11, 676976.14it/s]
100%|██████████| 6/6 [01:21<00:00, 13.53s/it]


{1: 18986, 2: 19085, 3: 18883, 4: 34, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 31346/31346 [00:00<00:00, 3426496.04it/s]
7645986it [00:11, 663511.45it/s]
100%|██████████| 6/6 [01:23<00:00, 13.91s/it]


{1: 19058, 2: 18754, 3: 18905, 4: 36, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}


100%|██████████| 31346/31346 [00:00<00:00, 3270513.76it/s]
7644935it [00:11, 666659.36it/s]
100%|██████████| 6/6 [01:21<00:00, 13.53s/it]

{1: 18702, 2: 18931, 3: 18829, 4: 34, 5: 0, 6: 0}
{1: 7648630, 2: 14529554, 3: 214516, 4: 18, 5: 0, 6: 0}





## Comparison with ER graph: random simplices in column

In [19]:
complex_path = Path("data/tesi/test_instance/column-count.h5")
complex_file = h5py.File(complex_path, 'r')
matrix = import_connectivity_matrix(dataframe = False, type = 'csr')
arrays, links = prepare_shared_memory(matrix, "full")    
for _ in range(10):
    result_dictionary = {}
    aux_dictionary = {}
    for dimension in tqdm(range(1, 7)):
        try:
            random_selection = np.random.choice(complex_file["Cells_" + str(dimension)].shape[0],
                                                min(10000,complex_file["Cells_" + str(dimension)].shape[0]),
                                                replace = False)
            random_selection.sort()
            simplex_iterator = iter(complex_file["Cells_" + str(dimension)][random_selection])
            simplex_dictionary = {}
            for simplex in simplex_iterator:
                simplex_dictionary[simplex[-1]] = simplex_dictionary.get(simplex[-1], []) + [simplex]
            mp_iterator = product(simplex_dictionary.keys(), [arrays])
            results = pool.imap(get_element_targets, mp_iterator)
            for elem, key in zip(results, simplex_dictionary.keys()):
                result_dictionary[dimension] = result_dictionary.get(dimension, 0) + len(elem)*len(simplex_dictionary[key])
        except KeyError:
                result_dictionary[dimension] = 0
        try:
            aux_dictionary[dimension] = len(complex_file["Cells_" + str(dimension)])
        except KeyError:
            aux_dictionary[dimension] = 0           
    print(result_dictionary)
    print(aux_dictionary)
for link in links:
    link.unlink()

100%|██████████| 55/55 [00:12<00:00,  4.58it/s]
100%|██████████| 6/6 [02:00<00:00, 20.10s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 82839, 2: 100792, 3: 114807, 4: 123421, 5: 133198, 6: 7383}
{1: 7648079, 2: 73036616, 3: 59945205, 4: 6599529, 5: 133115, 6: 529}


100%|██████████| 6/6 [01:58<00:00, 19.83s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 83113, 2: 99944, 3: 113144, 4: 126236, 5: 134194, 6: 7383}
{1: 7648079, 2: 73036616, 3: 59945205, 4: 6599529, 5: 133115, 6: 529}


100%|██████████| 6/6 [01:59<00:00, 19.87s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 83456, 2: 100101, 3: 112581, 4: 123676, 5: 133362, 6: 7383}
{1: 7648079, 2: 73036616, 3: 59945205, 4: 6599529, 5: 133115, 6: 529}


100%|██████████| 6/6 [01:59<00:00, 19.97s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 82661, 2: 101273, 3: 113128, 4: 123073, 5: 133636, 6: 7383}
{1: 7648079, 2: 73036616, 3: 59945205, 4: 6599529, 5: 133115, 6: 529}


100%|██████████| 6/6 [02:00<00:00, 20.01s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 82499, 2: 100166, 3: 114104, 4: 125626, 5: 133600, 6: 7383}
{1: 7648079, 2: 73036616, 3: 59945205, 4: 6599529, 5: 133115, 6: 529}


100%|██████████| 6/6 [02:00<00:00, 20.03s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 82831, 2: 100613, 3: 113691, 4: 124738, 5: 133746, 6: 7383}
{1: 7648079, 2: 73036616, 3: 59945205, 4: 6599529, 5: 133115, 6: 529}


100%|██████████| 6/6 [02:04<00:00, 20.70s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 82593, 2: 100348, 3: 112732, 4: 124446, 5: 133229, 6: 7383}
{1: 7648079, 2: 73036616, 3: 59945205, 4: 6599529, 5: 133115, 6: 529}


100%|██████████| 6/6 [02:01<00:00, 20.25s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 83379, 2: 100029, 3: 113366, 4: 123863, 5: 133171, 6: 7383}
{1: 7648079, 2: 73036616, 3: 59945205, 4: 6599529, 5: 133115, 6: 529}


100%|██████████| 6/6 [02:00<00:00, 20.10s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

{1: 82228, 2: 100877, 3: 112881, 4: 124610, 5: 133965, 6: 7383}
{1: 7648079, 2: 73036616, 3: 59945205, 4: 6599529, 5: 133115, 6: 529}


100%|██████████| 6/6 [02:00<00:00, 20.08s/it]

{1: 83436, 2: 101611, 3: 114114, 4: 123197, 5: 133391, 6: 7383}
{1: 7648079, 2: 73036616, 3: 59945205, 4: 6599529, 5: 133115, 6: 529}



