# Create synthetic list of graphs for testing

In [91]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [112]:
%autoreload 0

In [32]:
import networkx as nx
import pandas as pd

In [2]:
import tqdm

In [3]:
Graphs = []

In [5]:
import random
import numpy as np

def synthetic_data_sbm(N=1000, seed=None):
    if seed:
        random.seed(seed)
        np.random.seed(seed)
        
    graphs = []
    graph_labels = [] 
    prob_in = 0.9
    prob_out = 0.25
    
    for i in range(int(N/2)):
        G = nx.stochastic_block_model(
            [random.randint(2,3),random.randint(2,3),random.randint(2,3)],
            [[prob_in,prob_out,prob_out],[prob_out,prob_in,prob_out],[prob_out,prob_out,prob_in]])
        
        for u,v in G.edges: 
            if len(G[u][v]) == 0:
                G[u][v]['weight'] = 1.
        graphs.append(G)
        graph_labels.append(1)
    
    for i in range(int(N/2)):
        G = nx.stochastic_block_model(
            [random.randint(2,3),random.randint(2,3)],
            [[prob_in, prob_out],[prob_out, prob_in]])
        for u,v in G.edges: 
            if len(G[u][v]) == 0:
                G[u][v]['weight'] = 1.
        graphs.append(G)
        graph_labels.append(2)
    
    return graphs, np.asarray(graph_labels)

In [47]:
rgraphs, rlabels = synthetic_data_sbm(N=4, seed=42)

In [42]:
nx.is_connected(rgraphs[3])


True

In [22]:
import hcga.graphs as hcga_graphs

In [48]:
import importlib

importlib.reload(hcga_graphs)
importlib.reload(hcga_graphs.Operations)

TypeError: reload() argument must be a module

In [49]:
g = hcga_graphs.Graphs(graphs=rgraphs, graph_class=rlabels)
g.n_processes = 4

In [50]:
g.calculate_features(calc_speed='veryfast', parallel = True)



100%|██████████| 4/4 [00:00<00:00, 74.37it/s]

Computation time for feature: Components is 0.0 seconds.
Computation time for feature: NodeLabels is 0.0 seconds.
Computation time for feature: NodeFeaturesConv is 0.0 seconds.
Computation time for feature: DominatingSets is 0.0 seconds.
Computation time for feature: IndependentSets is 0.0 seconds.
Computation time for feature: ScaleFree is 0.0 seconds.
Computation time for feature: Cycles is 0.0 seconds.
Computation time for feature: Cliques is 0.0 seconds.
Computation time for feature: ChemicalTheory is 0.0 seconds.
Computation time for feature: MaximalMatching is 0.001 seconds.
Computation time for feature: Diameter is 0.001 seconds.
Computation time for feature: SpectrumModularity is 0.002 seconds.
Computation time for feature: ShortestPaths is 0.002 seconds.
Computation time for feature: SpectrumAdjacency is 0.002 seconds.
Computation time for feature: Assortativity is 0.002 seconds.
Computation time for feature: MinimumCuts is 0.004 seconds.
Computation time for feature: EdgeFeat




In [51]:
g.graph_feature_matrix

Unnamed: 0,CComp_is_connected,SF_s_metric,SP_path_length_mean,SP_path_length_mean_max,SP_path_length_max,CY_num_cycles,CY_mean_cycle_length,CY_longest_cycle,Cl_number_of_cliques,DI_diameter,...,SM_M_eigvals_ratio_5_2_E,SM_M_eigvals_ratio_5_3_E,SM_M_eigvals_ratio_5_4_E,SM_M_eigvals_ratio_6_0_E,SM_M_eigvals_ratio_6_1_E,SM_M_eigvals_ratio_6_2_E,SM_M_eigvals_ratio_6_3_E,SM_M_eigvals_ratio_6_4_E,SM_M_eigvals_ratio_6_5_E,SM_M_eigvals_min_E
0,1.0,54.0,2.714286,4.285714,5.0,2.0,3.0,3.0,4.0,4.0,...,0.0,0.069611,0.086098,-0.0516862,-0.09708023,0.0,0.04173864,0.05162365,0.07494939,-0.22446
1,0.0,72.0,2.469388,3.571429,4.0,3.0,3.333333,4.0,5.0,3.0,...,0.028069,-0.084565,0.050265,1.527573e-17,-1.057537e-17,-1.5330660000000002e-17,4.618694e-17,-2.745327e-17,-6.068584e-17,-0.241702
2,1.0,41.0,2.5,3.666667,4.0,2.0,3.0,3.0,3.0,3.0,...,0.0,0.333333,0.142857,0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.247436
3,1.0,42.0,2.2,3.4,4.0,2.0,3.0,3.0,3.0,3.0,...,0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0,0.0,-0.296336


In [29]:
hash(g.graph_feature_matrix)

TypeError: 'DataFrame' objects are mutable, thus they cannot be hashed

In [30]:
import hashlib


In [31]:
h = hashlib.blake2b()
h.update(g.graph_feature_matrix)
h.hexdigest()


TypeError: object supporting the buffer API required

In [37]:
hashvalue = pd.util.hash_pandas_object(g.graph_feature_matrix).sum()

In [40]:
pd.util.hash_pandas_object(g.graph_feature_matrix).sum()

-8886006431336352712

In [52]:
pd.util.hash_pandas_object(g.graph_feature_matrix).sum()

-505139337749376602