# Storing results in a table

In [1]:
cd ~/thesis

[Errno 2] No such file or directory: '/home/home/thesis'
/home/home/Nextcloud/University/Thesis/code_source


Import libraries

In [5]:
import numpy as np
import pandas as pd
import networkx as nx

# Modify eliorc's implementation
from eliorc_mod.node2vec import Node2Vec
from gensim.models import Word2Vec

from main_utils import *
from plot_utils import *

from datetime import datetime

## Erdos renyi

In [11]:
initial_graph = nx.erdos_renyi_graph(256, 0.15)
initial_graph.name = 'erdos_renyi'

params = [64, 8, 8, 0.25, 4]

added_nodes_num = 16

X_global, y_global, X_local, y_local, total_global_time, total_local_time, num_starting_nodes = \
    dynamic_extend_compare(initial_graph, added_nodes_num, params, quiet_bool=True)

num_different_nodes = added_nodes_num

test_sizes = np.arange(0.1, 1, 0.1)
mod_type = 'extend'

local_vars = [X_local, y_local]
global_vars = [X_global, y_global]
training_times = total_global_time, total_local_time 

results_df = results_store_func(initial_graph, mod_type, local_vars, global_vars, test_sizes, num_different_nodes, num_starting_nodes, params, training_times)
# results_df

(240, 4314) (256, 4939)


In [None]:
initial_graph = nx.erdos_renyi_graph(256, 0.15)
initial_graph.name = 'erdos_renyi'

params = [64, 8, 8, 0.25, 4]

removed_nodes_num = 8

X_global, y_global, X_local, y_local, total_global_time, total_local_time, num_starting_nodes = \
    dynamic_prune_compare(initial_graph, removed_nodes_num, params, quiet_bool=True)

num_different_nodes = removed_nodes_num

test_sizes = np.arange(0.1, 1, 0.1)
mod_type = 'prune'

local_vars = [X_local, y_local]
global_vars = [X_global, y_global]
training_times = total_global_time, total_local_time 

results_df = results_store_func(initial_graph, mod_type, local_vars, global_vars, test_sizes, num_different_nodes, num_starting_nodes, params, training_times)
# results_df

## Cora

In [3]:
data_dir = os.path.expanduser("~/thesis/datasets/cora")

edgelist = pd.read_csv(os.path.join(data_dir, "cora.cites"), sep='\t', header=None, names=["target", "source"])
edgelist["label"] = "cites"

initial_graph = nx.from_pandas_edgelist(edgelist, edge_attr="label")
initial_graph.name = 'cora'

nx.set_node_attributes(initial_graph, "paper", "label")

feature_names = ["w_{}".format(ii) for ii in range(1433)]
column_names =  feature_names + ["subject"]
node_data = pd.read_csv(os.path.join(data_dir, "cora.content"), sep='\t', header=None, names=column_names)

group_df = node_data['subject'].reset_index()
group_df.columns = ['node_num', 'group']

group_df['group'], _ = pd.factorize(group_df['group'])
group_df['group'] += 4 # number of groups for new graph
groups_dict = groups_assign(initial_graph, initial_graph, group_df)

group_df.head()

Unnamed: 0,node_num,group
0,31336,4
1,1061127,5
2,1106406,6
3,13195,6
4,37879,7


In [4]:
advanced_info(initial_graph)

Graph Information:
Number of nodes: 2708
Number of edges: 5278
Density: 0.0014399999126942077
Is connected: False
Average clustering coefficient: 0.2406732985019372
Directed: False


### Centrality

In [5]:
groups_dict = groups_assign(initial_graph, initial_graph, group_df)

mod_nodes_num = 512
removal_process = 'betweenness_centrality'
# graphs_list = dynamic_graph_gen(initial_graph, mod_nodes_num, removal_process=removal_process)

In [7]:
graphs_filenames_list = f'{initial_graph.name}_{removal_process}_{mod_nodes_num}.pkl'

# Load the list of graphs from the file
with open(f'./graphs/{graphs_filenames_list}', 'rb') as f:
    graphs_list = pickle.load(f)

In [8]:
params = (128, 40, 80, 0.25, 1)

for mod_type in ['extend', 'prune']:
    results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list, removal_process=removal_process)

** Modification type: extend for 512 nodes **
Graphs:
(2196, 2754) (2708, 5278)
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/8 [00:00<?, ?it/s]

Fitting model...
** Modification type: prune for 512 nodes **
Graphs:
(2196, 2754) (2708, 5278)
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...


### Grid search 

In [None]:
parameter_combinations = node2vec_configs()

for params in tqdm(parameter_combinations):
    for mod_type in ['extend', 'prune']:
        results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)

  0%|          | 0/2 [00:00<?, ?it/s]

** Modification type: extend for 128 nodes **
Graphs:
(2580, 4994) (2708, 5278)
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/2 [00:15<?, ?it/s]


## BlogCatalog

In [None]:
blog_dir = '~/thesis/datasets/blog_catalog/data/'

initial_graph = nx.read_edgelist(blog_dir+'edges.csv', delimiter=',')
initial_graph.name = 'blog_catalog'

# Groups mapping
group_df = pd.read_csv(blog_dir+'group-edges.csv', delimiter=',')
group_df.columns = ['node_num', 'group']

In [None]:
groups_dict = groups_assign(initial_graph, initial_graph, group_df)

In [6]:
mod_nodes_num = 512
graphs_list = dynamic_graph_gen(initial_graph, mod_nodes_num)

Generating list of dynamic graphs:


  8%|▊         | 39/512 [01:58<23:52,  3.03s/it]


KeyboardInterrupt: 

IMPORTANT: Save the graph list

In [None]:
mod_type = 'extend'
params = (128, 40, 80, 0.25, 1)

results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)
results_df.head(8)

** Modification type: extend for 512 nodes **
Graphs:
(9800, 292854) (10312, 333983)
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


In [None]:
mod_type = 'prune'
params = (128, 40, 80, 0.25, 1)

results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)
results_df.head(8)

### Grid search

In [3]:
def node2vec_configs():
    '''
        Generates list of configs
    '''
    # D_values = [128]
    # R_values = [40, 80] 
    # L_values = [80, 160]
    # P_values = [0.25, 0.5, 1, 2, 4]
    # Q_values = [0.25, 0.5, 1, 2, 4] 


    # D_values = [128]
    # R_values = [40, 80] 
    # L_values = [80]
    # P_values = [0.25, 1, 2, 4]
    # Q_values = [1, 2, 4] 

    D_values = [128]
    R_values = [80] 
    L_values = [80]
    P_values = [0.25]
    Q_values = [1] 

    # Generate all possible combinations of orders and seasonal orders
    parameter_values = [D_values, R_values, L_values, P_values, Q_values]
    parameter_combinations = list(itertools.product(*parameter_values))

    return parameter_combinations

In [None]:
parameter_combinations = node2vec_configs()
    
for params in tqdm(parameter_combinations):
    for mod_type in ['extend', 'prune']:
        results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)

In [None]:
# Long running session

mod_nodes_num = 512
removal_process = 'betweenness_centrality'
graphs_list = dynamic_graph_gen(initial_graph, mod_nodes_num, removal_process=removal_process, save_bool=True)

In [None]:
parameter_combinations = node2vec_configs()
    
for params in tqdm(parameter_combinations):
    for mod_type in ['extend', 'prune']:
        results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)

In [None]:
ls

analysis_utils.py  [0m[01;34mfigures[0m/       [01;34mold[0m/           results.csv          test.py
[01;34mdatasets[0m/          main_utils.py  plot_utils.py  results_store.ipynb  [01;34mwalks[0m/
[01;34meliorc_mod[0m/        [01;34mmodels[0m/        [01;34m__pycache__[0m/   [01;34mtemp_folder[0m/


## Wikipedia

In [4]:
filepath = 'datasets/wikipedia/POS.mat'
initial_graph, group_df = mat_load_func(filepath)
initial_graph.name = 'wikipedia'

In [5]:
groups_dict = groups_assign(initial_graph, initial_graph, group_df)

mod_nodes_num = 512
graphs_list = dynamic_graph_gen(initial_graph, mod_nodes_num, removal_process='betweenness_centrality')

Generating list of dynamic graphs:


100%|██████████| 512/512 [14:33<00:00,  1.71s/it]


In [6]:
nodes_edges_func(initial_graph)

(4777, 92517)

### Centrality

In [None]:
params = (128, 40, 80, 0.25, 1)

for mod_type in ['extend', 'prune']:
    results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list, removal_process='betweenness_centrality')

** Modification type: extend for 512 nodes **
Graphs:
(4265, 76411) (4777, 92517)
Computing transition probabilities...
Random walks in progress...


In [11]:
mod_type = 'extend'
params = [64, 8, 8, 0.25, 4]

results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)
results_df.head(8)

** Modification type: extend for 1024 nodes **
Graphs:
(3753, 63531) (4777, 92517)
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/8 [00:00<?, ?it/s]

Fitting model...


Unnamed: 0,dataset,nodes,edges,retraining_type,metric,10%,20%,30%,40%,50%,60%,70%,80%,90%,parameters,training_time,dynamic_update,num_different_nodes,num_starting_nodes,execution_timestamp
0,wikipedia,4777,92517,global,macro,0.081872,0.04983,0.052619,0.04462,0.039447,0.036514,0.030789,0.027438,0.025785,"[64, 8, 8, 0.25, 4]",604.198201,extend,1024,1024,05/06/2024 - 13:00
1,wikipedia,4777,92517,global,micro,0.491632,0.472803,0.470711,0.475667,0.475931,0.476456,0.477871,0.47619,0.472791,"[64, 8, 8, 0.25, 4]",604.198201,extend,1024,1024,05/06/2024 - 13:00
2,wikipedia,4777,92517,local,macro,0.084981,0.061396,0.064967,0.040447,0.04495,0.036579,0.033579,0.029248,0.023131,"[64, 8, 8, 0.25, 4]",500.43508,extend,1024,1024,05/06/2024 - 13:00
3,wikipedia,4777,92517,local,micro,0.483264,0.476987,0.478382,0.475667,0.483884,0.480293,0.486244,0.478022,0.473488,"[64, 8, 8, 0.25, 4]",500.43508,extend,1024,1024,05/06/2024 - 13:00
4,cora,2708,5278,global,macro,0.695142,0.664917,0.662999,0.669191,0.674114,0.676894,0.667904,0.641703,0.605866,"[64, 8, 8, 0.25, 4]",2.184401,prune,1024,1688,05/06/2024 - 12:03
5,cora,2708,5278,global,micro,0.704142,0.688427,0.695652,0.689911,0.686461,0.691395,0.676845,0.652819,0.614116,"[64, 8, 8, 0.25, 4]",2.184401,prune,1024,1688,05/06/2024 - 12:03
6,cora,2708,5278,local,macro,0.292453,0.256135,0.25083,0.26352,0.254546,0.217255,0.203417,0.196845,0.154591,"[64, 8, 8, 0.25, 4]",0.831088,prune,1024,1688,05/06/2024 - 12:03
7,cora,2708,5278,local,micro,0.402367,0.388724,0.379447,0.394659,0.380048,0.37092,0.357082,0.364985,0.346966,"[64, 8, 8, 0.25, 4]",0.831088,prune,1024,1688,05/06/2024 - 12:03


In [12]:
mod_type = 'prune'
params = [64, 8, 8, 0.25, 4]


results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)
results_df.head(8)

** Modification type: prune for 1024 nodes **
Graphs:
(3753, 63531) (4777, 92517)
1024
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...


Unnamed: 0,dataset,nodes,edges,retraining_type,metric,10%,20%,30%,40%,50%,60%,70%,80%,90%,parameters,training_time,dynamic_update,num_different_nodes,num_starting_nodes,execution_timestamp
0,wikipedia,4777,92517,global,macro,0.045538,0.037732,0.051527,0.044686,0.038411,0.031108,0.029101,0.026497,0.023757,"[64, 8, 8, 0.25, 4]",373.358892,prune,1024,4245,05/06/2024 - 13:21
1,wikipedia,4777,92517,global,micro,0.446809,0.466045,0.459147,0.454061,0.464571,0.470693,0.471842,0.472194,0.468028,"[64, 8, 8, 0.25, 4]",373.358892,prune,1024,4245,05/06/2024 - 13:21
2,wikipedia,4777,92517,local,macro,0.029511,0.029614,0.027793,0.025567,0.024318,0.021647,0.021663,0.020931,0.020462,"[64, 8, 8, 0.25, 4]",293.332361,prune,1024,4245,05/06/2024 - 13:21
3,wikipedia,4777,92517,local,micro,0.417553,0.450067,0.457371,0.460053,0.465104,0.468028,0.469939,0.470862,0.464476,"[64, 8, 8, 0.25, 4]",293.332361,prune,1024,4245,05/06/2024 - 13:21
4,wikipedia,4777,92517,global,macro,0.081872,0.04983,0.052619,0.04462,0.039447,0.036514,0.030789,0.027438,0.025785,"[64, 8, 8, 0.25, 4]",604.198201,extend,1024,1024,05/06/2024 - 13:00
5,wikipedia,4777,92517,global,micro,0.491632,0.472803,0.470711,0.475667,0.475931,0.476456,0.477871,0.47619,0.472791,"[64, 8, 8, 0.25, 4]",604.198201,extend,1024,1024,05/06/2024 - 13:00
6,wikipedia,4777,92517,local,macro,0.084981,0.061396,0.064967,0.040447,0.04495,0.036579,0.033579,0.029248,0.023131,"[64, 8, 8, 0.25, 4]",500.43508,extend,1024,1024,05/06/2024 - 13:00
7,wikipedia,4777,92517,local,micro,0.483264,0.476987,0.478382,0.475667,0.483884,0.480293,0.486244,0.478022,0.473488,"[64, 8, 8, 0.25, 4]",500.43508,extend,1024,1024,05/06/2024 - 13:00


### Grid search

In [None]:
parameter_combinations = node2vec_configs()

for params in tqdm(parameter_combinations):
    # for mod_type in ['extend', 'prune']:
    for mod_type in ['prune']:
        results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)

  0%|          | 0/1 [00:00<?, ?it/s]

** Modification type: prune for 512 nodes **
Graphs:
(4265, 76411) (4777, 92517)
Computing transition probabilities...
Random walks in progress...


## PPI

In [7]:
initial_graph, group_df = mat_load_func('datasets/PPI/PPI.mat')
initial_graph.name = 'PPI'

In [8]:
nodes_edges_func(initial_graph)

(3890, 38739)

In [5]:
groups_dict = groups_assign(initial_graph, initial_graph, group_df)

mod_nodes_num = 512
graphs_list = dynamic_graph_gen(initial_graph, mod_nodes_num)

Generating list of dynamic graphs:


100%|██████████| 512/512 [06:13<00:00,  1.37it/s]


### Centrality

In [None]:
groups_dict = groups_assign(initial_graph, initial_graph, group_df)

mod_nodes_num = 512
removal_process = 'betweenness_centrality'
graphs_list = dynamic_graph_gen(initial_graph, mod_nodes_num, removal_process=removal_process, save_bool=True)

Generating list of dynamic graphs:


 83%|████████████████████████████▉      | 423/512 [11:55:15<4:54:58, 198.86s/it]

In [None]:
params = (128, 40, 80, 0.25, 1)

for mod_type in ['extend', 'prune']:
    results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list, removal_process='betweenness_centrality')

In [16]:
mod_type = 'extend'
params = [64, 8, 8, 0.25, 4]

results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)
results_df.head(8)

** Modification type: extend for 1024 nodes **
Graphs:
(2866, 23359) (3890, 38739)
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/8 [00:00<?, ?it/s]

Fitting model...


Unnamed: 0,dataset,nodes,edges,retraining_type,metric,10%,20%,30%,40%,50%,60%,70%,80%,90%,parameters,training_time,dynamic_update,num_different_nodes,num_starting_nodes,execution_timestamp
0,PPI,3890,38739,global,macro,0.097459,0.092602,0.089124,0.083537,0.082383,0.09212,0.08417,0.080883,0.060841,"[64, 8, 8, 0.25, 4]",43.447217,extend,1024,1024,05/06/2024 - 13:36
1,PPI,3890,38739,global,micro,0.156812,0.1491,0.148116,0.140746,0.135733,0.136675,0.13069,0.128856,0.103113,"[64, 8, 8, 0.25, 4]",43.447217,extend,1024,1024,05/06/2024 - 13:36
2,PPI,3890,38739,local,macro,0.093724,0.107513,0.100109,0.096566,0.093822,0.086901,0.077249,0.069578,0.052015,"[64, 8, 8, 0.25, 4]",38.362036,extend,1024,1024,05/06/2024 - 13:36
3,PPI,3890,38739,local,micro,0.159383,0.161954,0.161815,0.148458,0.144473,0.13539,0.11931,0.112147,0.091688,"[64, 8, 8, 0.25, 4]",38.362036,extend,1024,1024,05/06/2024 - 13:36
4,wikipedia,4777,92517,global,macro,0.045538,0.037732,0.051527,0.044686,0.038411,0.031108,0.029101,0.026497,0.023757,"[64, 8, 8, 0.25, 4]",373.358892,prune,1024,4245,05/06/2024 - 13:21
5,wikipedia,4777,92517,global,micro,0.446809,0.466045,0.459147,0.454061,0.464571,0.470693,0.471842,0.472194,0.468028,"[64, 8, 8, 0.25, 4]",373.358892,prune,1024,4245,05/06/2024 - 13:21
6,wikipedia,4777,92517,local,macro,0.029511,0.029614,0.027793,0.025567,0.024318,0.021647,0.021663,0.020931,0.020462,"[64, 8, 8, 0.25, 4]",293.332361,prune,1024,4245,05/06/2024 - 13:21
7,wikipedia,4777,92517,local,micro,0.417553,0.450067,0.457371,0.460053,0.465104,0.468028,0.469939,0.470862,0.464476,"[64, 8, 8, 0.25, 4]",293.332361,prune,1024,4245,05/06/2024 - 13:21


In [17]:
mod_type = 'prune'
params = [64, 8, 8, 0.25, 4]

results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)
results_df.head(8)

** Modification type: prune for 1024 nodes **
Graphs:
(2866, 23359) (3890, 38739)
1024
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...


Unnamed: 0,dataset,nodes,edges,retraining_type,metric,10%,20%,30%,40%,50%,60%,70%,80%,90%,parameters,training_time,dynamic_update,num_different_nodes,num_starting_nodes,execution_timestamp
0,PPI,3890,38739,global,macro,0.08748,0.078519,0.083875,0.081866,0.079652,0.081975,0.079652,0.068709,0.054841,"[64, 8, 8, 0.25, 4]",23.138397,prune,1024,2746,05/06/2024 - 13:38
1,PPI,3890,38739,global,micro,0.142857,0.125436,0.137209,0.129904,0.124913,0.131977,0.130045,0.118622,0.098062,"[64, 8, 8, 0.25, 4]",23.138397,prune,1024,2746,05/06/2024 - 13:38
2,PPI,3890,38739,local,macro,0.033213,0.037811,0.035759,0.031038,0.030844,0.025366,0.027298,0.017826,0.01204,"[64, 8, 8, 0.25, 4]",18.799618,prune,1024,2746,05/06/2024 - 13:38
3,PPI,3890,38739,local,micro,0.073171,0.074913,0.065116,0.067132,0.069784,0.064535,0.063279,0.043611,0.05,"[64, 8, 8, 0.25, 4]",18.799618,prune,1024,2746,05/06/2024 - 13:38
4,PPI,3890,38739,global,macro,0.097459,0.092602,0.089124,0.083537,0.082383,0.09212,0.08417,0.080883,0.060841,"[64, 8, 8, 0.25, 4]",43.447217,extend,1024,1024,05/06/2024 - 13:36
5,PPI,3890,38739,global,micro,0.156812,0.1491,0.148116,0.140746,0.135733,0.136675,0.13069,0.128856,0.103113,"[64, 8, 8, 0.25, 4]",43.447217,extend,1024,1024,05/06/2024 - 13:36
6,PPI,3890,38739,local,macro,0.093724,0.107513,0.100109,0.096566,0.093822,0.086901,0.077249,0.069578,0.052015,"[64, 8, 8, 0.25, 4]",38.362036,extend,1024,1024,05/06/2024 - 13:36
7,PPI,3890,38739,local,micro,0.159383,0.161954,0.161815,0.148458,0.144473,0.13539,0.11931,0.112147,0.091688,"[64, 8, 8, 0.25, 4]",38.362036,extend,1024,1024,05/06/2024 - 13:36


### Grid search

In [5]:
parameter_combinations = node2vec_configs()


In [None]:
# TEMPORARILY WIKIPEDIA TEST THAT"S WHY THE DIFFERENT NODE, EDGE NUMS

for params in tqdm(parameter_combinations):
    for mod_type in ['extend', 'prune']:
        results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)

  0%|          | 0/24 [00:00<?, ?it/s]

** Modification type: extend for 512 nodes **
Graphs:
(4265, 76411) (4777, 92517)
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/8 [00:00<?, ?it/s]

Fitting model...
** Modification type: prune for 512 nodes **
Graphs:
(4265, 76411) (4777, 92517)
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


## Other

### Saving dynamic graphs

In [3]:
dynamic_graph_gen

<function main_utils.dynamic_graph_gen(initial_graph, num_nodes_to_remove, save_bool=False, removal_process='random')>

In [4]:
# # Refactor this one
def dynamic_graph_gen(initial_graph, num_nodes_to_remove, save_bool=False, removal_process='random'):
    '''
    Generates a list of dynamically updated graphs starting from a subgraph of the initial graph.
    
    Parameters:
        initial_graph (NetworkX graph): The initial graph.
        num_nodes_to_remove (int): The number of nodes to remove from the initial graph.

    Returns:
        graphs_list: A list of dynamically updated graphs.
    '''

    graphs_list = [initial_graph]
    dynamic_graph = initial_graph
    
    print("Generating list of dynamic graphs:")
    for i in tqdm(range(num_nodes_to_remove)):
        dynamic_graph, _ = remove_nodes_connected(dynamic_graph, 1, removal_process)
        graphs_list.append(dynamic_graph)

    graphs_list = graphs_list[::-1]

    if save_bool:
        graphs_filenames_list = f'{initial_graph.name}_{removal_process}_{num_nodes_to_remove}.pkl'

        # Save the list of graphs to a file
        with open(f'./graphs/{graphs_filenames_list}', 'wb') as f:
            pickle.dump(graphs_list, f)

    return graphs_list

In [None]:
num_nodes_to_remove = 128
graphs_list = dynamic_graph_gen(initial_graph, num_nodes_to_remove, save_bool=True)
graphs_list

In [21]:
graphs_filenames_list = f'{initial_graph.name}_{num_nodes_to_remove}.pkl'

# Load the list of graphs from the file
with open('./graphs/cora_random_128.pkl', 'rb') as f:
    loaded_graphs_list = pickle.load(f)

In [22]:
len(graphs_list) == len(loaded_graphs_list)

True

In [27]:
advanced_info(initial_graph)

Graph Information:
Number of nodes: 2708
Number of edges: 5278
Density: 0.0014399999126942077
Is connected: False
Average clustering coefficient: 0.2406732985019372
Directed: False


In [28]:
advanced_info(graphs_list[0])

Graph Information:
Number of nodes: 2580
Number of edges: 4994
Density: 0.0015010926054507036
Is connected: False
Average clustering coefficient: 0.23931300304563
Directed: False


In [29]:
advanced_info(graphs_list[-1])

Graph Information:
Number of nodes: 2708
Number of edges: 5278
Density: 0.0014399999126942077
Is connected: False
Average clustering coefficient: 0.2406732985019372
Directed: False


In [30]:
advanced_info(loaded_graphs_list[0])

Graph Information:
Number of nodes: 2580
Number of edges: 4994
Density: 0.0015010926054507036
Is connected: False
Average clustering coefficient: 0.23931300304563
Directed: False


In [31]:
advanced_info(loaded_graphs_list[-1])

Graph Information:
Number of nodes: 2708
Number of edges: 5278
Density: 0.0014399999126942077
Is connected: False
Average clustering coefficient: 0.2406732985019372
Directed: False
