# Storing results in a table

In [1]:
cd ~/thesis

/home/jovyan/thesis


  bkms = self.shell.db.get('bookmarks', {})
  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


Import libraries

In [2]:
import numpy as np
import pandas as pd
import networkx as nx

# Modify eliorc's implementation
from eliorc_mod.node2vec import Node2Vec
from gensim.models import Word2Vec

from main_utils import *
from plot_utils import *

from datetime import datetime

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


## Erdos renyi

In [11]:
initial_graph = nx.erdos_renyi_graph(256, 0.15)
initial_graph.name = 'erdos_renyi'

params = [64, 8, 8, 0.25, 4]

added_nodes_num = 16

X_global, y_global, X_local, y_local, total_global_time, total_local_time, num_starting_nodes = \
    dynamic_extend_compare(initial_graph, added_nodes_num, params, quiet_bool=True)

num_different_nodes = added_nodes_num

test_sizes = np.arange(0.1, 1, 0.1)
mod_type = 'extend'

local_vars = [X_local, y_local]
global_vars = [X_global, y_global]
training_times = total_global_time, total_local_time 

results_df = results_store_func(initial_graph, mod_type, local_vars, global_vars, test_sizes, num_different_nodes, num_starting_nodes, params, training_times)
# results_df

(240, 4314) (256, 4939)


In [None]:
initial_graph = nx.erdos_renyi_graph(256, 0.15)
initial_graph.name = 'erdos_renyi'

params = [64, 8, 8, 0.25, 4]

removed_nodes_num = 8

X_global, y_global, X_local, y_local, total_global_time, total_local_time, num_starting_nodes = \
    dynamic_prune_compare(initial_graph, removed_nodes_num, params, quiet_bool=True)

num_different_nodes = removed_nodes_num

test_sizes = np.arange(0.1, 1, 0.1)
mod_type = 'prune'

local_vars = [X_local, y_local]
global_vars = [X_global, y_global]
training_times = total_global_time, total_local_time 

results_df = results_store_func(initial_graph, mod_type, local_vars, global_vars, test_sizes, num_different_nodes, num_starting_nodes, params, training_times)
# results_df

## Cora

In [3]:
data_dir = os.path.expanduser("~/thesis/datasets/cora")

edgelist = pd.read_csv(os.path.join(data_dir, "cora.cites"), sep='\t', header=None, names=["target", "source"])
edgelist["label"] = "cites"

initial_graph = nx.from_pandas_edgelist(edgelist, edge_attr="label")
initial_graph.name = 'cora'

nx.set_node_attributes(initial_graph, "paper", "label")

feature_names = ["w_{}".format(ii) for ii in range(1433)]
column_names =  feature_names + ["subject"]
node_data = pd.read_csv(os.path.join(data_dir, "cora.content"), sep='\t', header=None, names=column_names)

group_df = node_data['subject'].reset_index()
group_df.columns = ['node_num', 'group']

group_df['group'], _ = pd.factorize(group_df['group'])
group_df['group'] += 4 # number of groups for new graph
groups_dict = groups_assign(initial_graph, initial_graph, group_df)

group_df.head()

Unnamed: 0,node_num,group
0,31336,4
1,1061127,5
2,1106406,6
3,13195,6
4,37879,7


In [4]:
advanced_info(initial_graph)

Graph Information:
Number of nodes: 2708
Number of edges: 5278
Density: 0.0014399999126942077
Is connected: False
Average clustering coefficient: 0.2406732985019372
Directed: False


In [5]:
groups_dict = groups_assign(initial_graph, initial_graph, group_df)

mod_nodes_num = 1024
graphs_list = dynamic_graph_gen(initial_graph, mod_nodes_num)

Generating list of dynamic graphs:


100%|██████████| 1024/1024 [01:05<00:00, 15.55it/s]


In [6]:
mod_type = 'extend'
params = [64, 8, 8, 0.25, 4]

results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)
results_df.head(8)

** Modification type: extend for 1024 nodes **
Graphs:
(1684, 2465) (2708, 5278)
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/8 [00:00<?, ?it/s]

Fitting model...


Unnamed: 0,dataset,nodes,edges,retraining_type,metric,10%,20%,30%,40%,50%,60%,70%,80%,90%,parameters,training_time,dynamic_update,num_different_nodes,num_starting_nodes,execution_timestamp
0,cora,2708,5278,global,macro,0.730587,0.737445,0.742649,0.728899,0.723772,0.709106,0.708233,0.691043,0.642751,"[64, 8, 8, 0.25, 4]",3.887151,extend,1024,1024,05/06/2024 - 12:03
1,cora,2708,5278,global,micro,0.719557,0.730627,0.739237,0.736162,0.726736,0.716308,0.714135,0.703276,0.665299,"[64, 8, 8, 0.25, 4]",3.887151,extend,1024,1024,05/06/2024 - 12:03
2,cora,2708,5278,local,macro,0.70732,0.715295,0.718216,0.715161,0.706878,0.697484,0.68178,0.670834,0.64607,"[64, 8, 8, 0.25, 4]",3.072851,extend,1024,1024,05/06/2024 - 12:03
3,cora,2708,5278,local,micro,0.704797,0.719557,0.724477,0.72786,0.720827,0.705846,0.69673,0.688048,0.659147,"[64, 8, 8, 0.25, 4]",3.072851,extend,1024,1024,05/06/2024 - 12:03
4,wikipedia,4777,92517,global,macro,0.044401,0.039382,0.038125,0.036381,0.036443,0.032291,0.031386,0.030521,0.023499,"[64, 8, 8, 0.25, 4]",304.372519,prune,1024,4615,04/06/2024 - 12:05
5,wikipedia,4777,92517,global,micro,0.452128,0.472703,0.473357,0.476032,0.482152,0.475577,0.480974,0.477855,0.46714,"[64, 8, 8, 0.25, 4]",304.372519,prune,1024,4615,04/06/2024 - 12:05
6,wikipedia,4777,92517,local,macro,0.031063,0.024293,0.024204,0.02416,0.023971,0.023036,0.02075,0.020156,0.020352,"[64, 8, 8, 0.25, 4]",277.378711,prune,1024,4615,04/06/2024 - 12:05
7,wikipedia,4777,92517,local,micro,0.484043,0.48735,0.484902,0.484021,0.478423,0.476021,0.474125,0.475857,0.467436,"[64, 8, 8, 0.25, 4]",277.378711,prune,1024,4615,04/06/2024 - 12:05


In [7]:
mod_type = 'prune'
params = [64, 8, 8, 0.25, 4]

results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)
results_df.head(8)

** Modification type: prune for 1024 nodes **
Graphs:
(1684, 2465) (2708, 5278)
1024
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...


Unnamed: 0,dataset,nodes,edges,retraining_type,metric,10%,20%,30%,40%,50%,60%,70%,80%,90%,parameters,training_time,dynamic_update,num_different_nodes,num_starting_nodes,execution_timestamp
0,cora,2708,5278,global,macro,0.695142,0.664917,0.662999,0.669191,0.674114,0.676894,0.667904,0.641703,0.605866,"[64, 8, 8, 0.25, 4]",2.184401,prune,1024,1688,05/06/2024 - 12:03
1,cora,2708,5278,global,micro,0.704142,0.688427,0.695652,0.689911,0.686461,0.691395,0.676845,0.652819,0.614116,"[64, 8, 8, 0.25, 4]",2.184401,prune,1024,1688,05/06/2024 - 12:03
2,cora,2708,5278,local,macro,0.292453,0.256135,0.25083,0.26352,0.254546,0.217255,0.203417,0.196845,0.154591,"[64, 8, 8, 0.25, 4]",0.831088,prune,1024,1688,05/06/2024 - 12:03
3,cora,2708,5278,local,micro,0.402367,0.388724,0.379447,0.394659,0.380048,0.37092,0.357082,0.364985,0.346966,"[64, 8, 8, 0.25, 4]",0.831088,prune,1024,1688,05/06/2024 - 12:03
4,cora,2708,5278,global,macro,0.730587,0.737445,0.742649,0.728899,0.723772,0.709106,0.708233,0.691043,0.642751,"[64, 8, 8, 0.25, 4]",3.887151,extend,1024,1024,05/06/2024 - 12:03
5,cora,2708,5278,global,micro,0.719557,0.730627,0.739237,0.736162,0.726736,0.716308,0.714135,0.703276,0.665299,"[64, 8, 8, 0.25, 4]",3.887151,extend,1024,1024,05/06/2024 - 12:03
6,cora,2708,5278,local,macro,0.70732,0.715295,0.718216,0.715161,0.706878,0.697484,0.68178,0.670834,0.64607,"[64, 8, 8, 0.25, 4]",3.072851,extend,1024,1024,05/06/2024 - 12:03
7,cora,2708,5278,local,micro,0.704797,0.719557,0.724477,0.72786,0.720827,0.705846,0.69673,0.688048,0.659147,"[64, 8, 8, 0.25, 4]",3.072851,extend,1024,1024,05/06/2024 - 12:03


## BlogCatalog

In [3]:
blog_dir = '/home/jovyan/thesis/datasets/blog_catalog/data/'

initial_graph = nx.read_edgelist(blog_dir+'edges.csv', delimiter=',')
initial_graph.name = 'blog_catalog'

# Groups mapping
group_df = pd.read_csv(blog_dir+'group-edges.csv', delimiter=',')
group_df.columns = ['node_num', 'group']

In [4]:
groups_dict = groups_assign(initial_graph, initial_graph, group_df)

In [5]:

mod_nodes_num = 3072
graphs_list = dynamic_graph_gen(initial_graph, mod_nodes_num)

Generating list of dynamic graphs:


100%|██████████| 3072/3072 [2:19:47<00:00,  2.73s/it]  


IMPORTANT: Save the graph list

In [6]:
mod_type = 'extend'
params = [64, 8, 8, 0.25, 4]

results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)
results_df.head(8)

** Modification type: extend for 3072 nodes **
Graphs:
(7240, 189637) (10312, 333983)
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/8 [00:00<?, ?it/s]

Fitting model...


Unnamed: 0,dataset,nodes,edges,retraining_type,metric,10%,20%,30%,40%,50%,60%,70%,80%,90%,parameters,training_time,dynamic_update,num_different_nodes,num_starting_nodes,execution_timestamp
0,blog_catalog,10312,333983,global,macro,0.206227,0.192615,0.204716,0.192943,0.197772,0.199865,0.200935,0.192379,0.18726,"[64, 8, 8, 0.25, 4]",1392.715573,extend,3072,3072,05/06/2024 - 17:31
1,blog_catalog,10312,333983,global,micro,0.215116,0.200194,0.21073,0.200485,0.204616,0.206529,0.204599,0.196,0.199547,"[64, 8, 8, 0.25, 4]",1392.715573,extend,3072,3072,05/06/2024 - 17:31
2,blog_catalog,10312,333983,local,macro,0.194896,0.19466,0.193883,0.198663,0.193938,0.206944,0.20543,0.195614,0.175584,"[64, 8, 8, 0.25, 4]",1312.374426,extend,3072,3072,05/06/2024 - 17:31
3,blog_catalog,10312,333983,local,micro,0.20155,0.198255,0.195863,0.202909,0.19841,0.212346,0.209724,0.213091,0.197069,"[64, 8, 8, 0.25, 4]",1312.374426,extend,3072,3072,05/06/2024 - 17:31
4,PPI,3890,38739,global,macro,0.08748,0.078519,0.083875,0.081866,0.079652,0.081975,0.079652,0.068709,0.054841,"[64, 8, 8, 0.25, 4]",23.138397,prune,1024,2746,05/06/2024 - 13:38
5,PPI,3890,38739,global,micro,0.142857,0.125436,0.137209,0.129904,0.124913,0.131977,0.130045,0.118622,0.098062,"[64, 8, 8, 0.25, 4]",23.138397,prune,1024,2746,05/06/2024 - 13:38
6,PPI,3890,38739,local,macro,0.033213,0.037811,0.035759,0.031038,0.030844,0.025366,0.027298,0.017826,0.01204,"[64, 8, 8, 0.25, 4]",18.799618,prune,1024,2746,05/06/2024 - 13:38
7,PPI,3890,38739,local,micro,0.073171,0.074913,0.065116,0.067132,0.069784,0.064535,0.063279,0.043611,0.05,"[64, 8, 8, 0.25, 4]",18.799618,prune,1024,2746,05/06/2024 - 13:38


In [7]:
mod_type = 'prune'
params = [64, 8, 8, 0.25, 4]

mod_nodes_num = 1024

results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)
results_df.head(8)

** Modification type: prune for 1024 nodes **
Graphs:
(7240, 189637) (10312, 333983)
3072
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...


Unnamed: 0,dataset,nodes,edges,retraining_type,metric,10%,20%,30%,40%,50%,60%,70%,80%,90%,parameters,training_time,dynamic_update,num_different_nodes,num_starting_nodes,execution_timestamp
0,blog_catalog,10312,333983,global,macro,0.2079,0.210733,0.211049,0.196362,0.197216,0.203207,0.201855,0.202264,0.189741,"[64, 8, 8, 0.25, 4]",629.954747,prune,1024,8733,05/06/2024 - 18:21
1,blog_catalog,10312,333983,global,micro,0.208564,0.212017,0.211229,0.196823,0.198066,0.20419,0.205169,0.204765,0.196133,"[64, 8, 8, 0.25, 4]",629.954747,prune,1024,8733,05/06/2024 - 18:21
2,blog_catalog,10312,333983,local,macro,0.160682,0.182285,0.182659,0.175635,0.183043,0.156216,0.18406,0.190195,0.187669,"[64, 8, 8, 0.25, 4]",580.0565,prune,1024,8733,05/06/2024 - 18:21
3,blog_catalog,10312,333983,local,micro,0.182171,0.203102,0.197156,0.195879,0.198216,0.195863,0.198365,0.203758,0.198901,"[64, 8, 8, 0.25, 4]",580.0565,prune,1024,8733,05/06/2024 - 18:21
4,blog_catalog,10312,333983,global,macro,0.206227,0.192615,0.204716,0.192943,0.197772,0.199865,0.200935,0.192379,0.18726,"[64, 8, 8, 0.25, 4]",1392.715573,extend,3072,3072,05/06/2024 - 17:31
5,blog_catalog,10312,333983,global,micro,0.215116,0.200194,0.21073,0.200485,0.204616,0.206529,0.204599,0.196,0.199547,"[64, 8, 8, 0.25, 4]",1392.715573,extend,3072,3072,05/06/2024 - 17:31
6,blog_catalog,10312,333983,local,macro,0.194896,0.19466,0.193883,0.198663,0.193938,0.206944,0.20543,0.195614,0.175584,"[64, 8, 8, 0.25, 4]",1312.374426,extend,3072,3072,05/06/2024 - 17:31
7,blog_catalog,10312,333983,local,micro,0.20155,0.198255,0.195863,0.202909,0.19841,0.212346,0.209724,0.213091,0.197069,"[64, 8, 8, 0.25, 4]",1312.374426,extend,3072,3072,05/06/2024 - 17:31


## Wikipedia

In [8]:
filepath = 'datasets/wikipedia/POS.mat'
initial_graph, group_df = mat_load_func(filepath)
initial_graph.name = 'wikipedia'

In [9]:
groups_dict = groups_assign(initial_graph, initial_graph, group_df)

mod_nodes_num = 1024
graphs_list = dynamic_graph_gen(initial_graph, mod_nodes_num)

Generating list of dynamic graphs:


100%|██████████| 1024/1024 [32:16<00:00,  1.89s/it]


In [10]:
nodes_edges_func(initial_graph)

(4777, 92517)

In [11]:
mod_type = 'extend'
params = [64, 8, 8, 0.25, 4]

results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)
results_df.head(8)

** Modification type: extend for 1024 nodes **
Graphs:
(3753, 63531) (4777, 92517)
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/8 [00:00<?, ?it/s]

Fitting model...


Unnamed: 0,dataset,nodes,edges,retraining_type,metric,10%,20%,30%,40%,50%,60%,70%,80%,90%,parameters,training_time,dynamic_update,num_different_nodes,num_starting_nodes,execution_timestamp
0,wikipedia,4777,92517,global,macro,0.081872,0.04983,0.052619,0.04462,0.039447,0.036514,0.030789,0.027438,0.025785,"[64, 8, 8, 0.25, 4]",604.198201,extend,1024,1024,05/06/2024 - 13:00
1,wikipedia,4777,92517,global,micro,0.491632,0.472803,0.470711,0.475667,0.475931,0.476456,0.477871,0.47619,0.472791,"[64, 8, 8, 0.25, 4]",604.198201,extend,1024,1024,05/06/2024 - 13:00
2,wikipedia,4777,92517,local,macro,0.084981,0.061396,0.064967,0.040447,0.04495,0.036579,0.033579,0.029248,0.023131,"[64, 8, 8, 0.25, 4]",500.43508,extend,1024,1024,05/06/2024 - 13:00
3,wikipedia,4777,92517,local,micro,0.483264,0.476987,0.478382,0.475667,0.483884,0.480293,0.486244,0.478022,0.473488,"[64, 8, 8, 0.25, 4]",500.43508,extend,1024,1024,05/06/2024 - 13:00
4,cora,2708,5278,global,macro,0.695142,0.664917,0.662999,0.669191,0.674114,0.676894,0.667904,0.641703,0.605866,"[64, 8, 8, 0.25, 4]",2.184401,prune,1024,1688,05/06/2024 - 12:03
5,cora,2708,5278,global,micro,0.704142,0.688427,0.695652,0.689911,0.686461,0.691395,0.676845,0.652819,0.614116,"[64, 8, 8, 0.25, 4]",2.184401,prune,1024,1688,05/06/2024 - 12:03
6,cora,2708,5278,local,macro,0.292453,0.256135,0.25083,0.26352,0.254546,0.217255,0.203417,0.196845,0.154591,"[64, 8, 8, 0.25, 4]",0.831088,prune,1024,1688,05/06/2024 - 12:03
7,cora,2708,5278,local,micro,0.402367,0.388724,0.379447,0.394659,0.380048,0.37092,0.357082,0.364985,0.346966,"[64, 8, 8, 0.25, 4]",0.831088,prune,1024,1688,05/06/2024 - 12:03


In [12]:
mod_type = 'prune'
params = [64, 8, 8, 0.25, 4]


results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)
results_df.head(8)

** Modification type: prune for 1024 nodes **
Graphs:
(3753, 63531) (4777, 92517)
1024
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...


Unnamed: 0,dataset,nodes,edges,retraining_type,metric,10%,20%,30%,40%,50%,60%,70%,80%,90%,parameters,training_time,dynamic_update,num_different_nodes,num_starting_nodes,execution_timestamp
0,wikipedia,4777,92517,global,macro,0.045538,0.037732,0.051527,0.044686,0.038411,0.031108,0.029101,0.026497,0.023757,"[64, 8, 8, 0.25, 4]",373.358892,prune,1024,4245,05/06/2024 - 13:21
1,wikipedia,4777,92517,global,micro,0.446809,0.466045,0.459147,0.454061,0.464571,0.470693,0.471842,0.472194,0.468028,"[64, 8, 8, 0.25, 4]",373.358892,prune,1024,4245,05/06/2024 - 13:21
2,wikipedia,4777,92517,local,macro,0.029511,0.029614,0.027793,0.025567,0.024318,0.021647,0.021663,0.020931,0.020462,"[64, 8, 8, 0.25, 4]",293.332361,prune,1024,4245,05/06/2024 - 13:21
3,wikipedia,4777,92517,local,micro,0.417553,0.450067,0.457371,0.460053,0.465104,0.468028,0.469939,0.470862,0.464476,"[64, 8, 8, 0.25, 4]",293.332361,prune,1024,4245,05/06/2024 - 13:21
4,wikipedia,4777,92517,global,macro,0.081872,0.04983,0.052619,0.04462,0.039447,0.036514,0.030789,0.027438,0.025785,"[64, 8, 8, 0.25, 4]",604.198201,extend,1024,1024,05/06/2024 - 13:00
5,wikipedia,4777,92517,global,micro,0.491632,0.472803,0.470711,0.475667,0.475931,0.476456,0.477871,0.47619,0.472791,"[64, 8, 8, 0.25, 4]",604.198201,extend,1024,1024,05/06/2024 - 13:00
6,wikipedia,4777,92517,local,macro,0.084981,0.061396,0.064967,0.040447,0.04495,0.036579,0.033579,0.029248,0.023131,"[64, 8, 8, 0.25, 4]",500.43508,extend,1024,1024,05/06/2024 - 13:00
7,wikipedia,4777,92517,local,micro,0.483264,0.476987,0.478382,0.475667,0.483884,0.480293,0.486244,0.478022,0.473488,"[64, 8, 8, 0.25, 4]",500.43508,extend,1024,1024,05/06/2024 - 13:00


## PPI

In [13]:
initial_graph, group_df = mat_load_func('datasets/PPI/PPI.mat')
initial_graph.name = 'PPI'

In [14]:
nodes_edges_func(initial_graph)

(3890, 38739)

In [15]:
groups_dict = groups_assign(initial_graph, initial_graph, group_df)

mod_nodes_num = 1024
graphs_list = dynamic_graph_gen(initial_graph, mod_nodes_num)

Generating list of dynamic graphs:


100%|██████████| 1024/1024 [13:24<00:00,  1.27it/s]


In [16]:
mod_type = 'extend'
params = [64, 8, 8, 0.25, 4]

results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)
results_df.head(8)

** Modification type: extend for 1024 nodes **
Graphs:
(2866, 23359) (3890, 38739)
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/8 [00:00<?, ?it/s]

Fitting model...


Unnamed: 0,dataset,nodes,edges,retraining_type,metric,10%,20%,30%,40%,50%,60%,70%,80%,90%,parameters,training_time,dynamic_update,num_different_nodes,num_starting_nodes,execution_timestamp
0,PPI,3890,38739,global,macro,0.097459,0.092602,0.089124,0.083537,0.082383,0.09212,0.08417,0.080883,0.060841,"[64, 8, 8, 0.25, 4]",43.447217,extend,1024,1024,05/06/2024 - 13:36
1,PPI,3890,38739,global,micro,0.156812,0.1491,0.148116,0.140746,0.135733,0.136675,0.13069,0.128856,0.103113,"[64, 8, 8, 0.25, 4]",43.447217,extend,1024,1024,05/06/2024 - 13:36
2,PPI,3890,38739,local,macro,0.093724,0.107513,0.100109,0.096566,0.093822,0.086901,0.077249,0.069578,0.052015,"[64, 8, 8, 0.25, 4]",38.362036,extend,1024,1024,05/06/2024 - 13:36
3,PPI,3890,38739,local,micro,0.159383,0.161954,0.161815,0.148458,0.144473,0.13539,0.11931,0.112147,0.091688,"[64, 8, 8, 0.25, 4]",38.362036,extend,1024,1024,05/06/2024 - 13:36
4,wikipedia,4777,92517,global,macro,0.045538,0.037732,0.051527,0.044686,0.038411,0.031108,0.029101,0.026497,0.023757,"[64, 8, 8, 0.25, 4]",373.358892,prune,1024,4245,05/06/2024 - 13:21
5,wikipedia,4777,92517,global,micro,0.446809,0.466045,0.459147,0.454061,0.464571,0.470693,0.471842,0.472194,0.468028,"[64, 8, 8, 0.25, 4]",373.358892,prune,1024,4245,05/06/2024 - 13:21
6,wikipedia,4777,92517,local,macro,0.029511,0.029614,0.027793,0.025567,0.024318,0.021647,0.021663,0.020931,0.020462,"[64, 8, 8, 0.25, 4]",293.332361,prune,1024,4245,05/06/2024 - 13:21
7,wikipedia,4777,92517,local,micro,0.417553,0.450067,0.457371,0.460053,0.465104,0.468028,0.469939,0.470862,0.464476,"[64, 8, 8, 0.25, 4]",293.332361,prune,1024,4245,05/06/2024 - 13:21


In [17]:
mod_type = 'prune'
params = [64, 8, 8, 0.25, 4]

results_df = results_output_func(initial_graph, mod_type, mod_nodes_num, params, groups_dict, graphs_list)
results_df.head(8)

** Modification type: prune for 1024 nodes **
Graphs:
(2866, 23359) (3890, 38739)
1024
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...
Computing transition probabilities...
Random walks in progress...


  0%|          | 0/64 [00:00<?, ?it/s]

Fitting model...


Unnamed: 0,dataset,nodes,edges,retraining_type,metric,10%,20%,30%,40%,50%,60%,70%,80%,90%,parameters,training_time,dynamic_update,num_different_nodes,num_starting_nodes,execution_timestamp
0,PPI,3890,38739,global,macro,0.08748,0.078519,0.083875,0.081866,0.079652,0.081975,0.079652,0.068709,0.054841,"[64, 8, 8, 0.25, 4]",23.138397,prune,1024,2746,05/06/2024 - 13:38
1,PPI,3890,38739,global,micro,0.142857,0.125436,0.137209,0.129904,0.124913,0.131977,0.130045,0.118622,0.098062,"[64, 8, 8, 0.25, 4]",23.138397,prune,1024,2746,05/06/2024 - 13:38
2,PPI,3890,38739,local,macro,0.033213,0.037811,0.035759,0.031038,0.030844,0.025366,0.027298,0.017826,0.01204,"[64, 8, 8, 0.25, 4]",18.799618,prune,1024,2746,05/06/2024 - 13:38
3,PPI,3890,38739,local,micro,0.073171,0.074913,0.065116,0.067132,0.069784,0.064535,0.063279,0.043611,0.05,"[64, 8, 8, 0.25, 4]",18.799618,prune,1024,2746,05/06/2024 - 13:38
4,PPI,3890,38739,global,macro,0.097459,0.092602,0.089124,0.083537,0.082383,0.09212,0.08417,0.080883,0.060841,"[64, 8, 8, 0.25, 4]",43.447217,extend,1024,1024,05/06/2024 - 13:36
5,PPI,3890,38739,global,micro,0.156812,0.1491,0.148116,0.140746,0.135733,0.136675,0.13069,0.128856,0.103113,"[64, 8, 8, 0.25, 4]",43.447217,extend,1024,1024,05/06/2024 - 13:36
6,PPI,3890,38739,local,macro,0.093724,0.107513,0.100109,0.096566,0.093822,0.086901,0.077249,0.069578,0.052015,"[64, 8, 8, 0.25, 4]",38.362036,extend,1024,1024,05/06/2024 - 13:36
7,PPI,3890,38739,local,micro,0.159383,0.161954,0.161815,0.148458,0.144473,0.13539,0.11931,0.112147,0.091688,"[64, 8, 8, 0.25, 4]",38.362036,extend,1024,1024,05/06/2024 - 13:36


## Other

### Saving big models

In [10]:
initial_graph = nx.erdos_renyi_graph(128, 0.3)
params = [64, 8, 8, 0.25, 4]

[d, r, l, p, q] = params

removed_nodes_num = 8

graphs_list = dynamic_graph_gen(initial_graph, removed_nodes_num)

graph_pruned = graphs_list[0]
graph_upd = initial_graph #graphs_list[-1]

nodes_pruned = set(graph_pruned.nodes())
nodes_upd = set(graph_upd.nodes())

diff_nodes = nodes_upd - nodes_pruned
print(len(diff_nodes))

start_time = time.time()


neighbors = removed_nodes_neighbors_func(graph_upd, diff_nodes, max_step=1)

# Pruned models
node2vec_pruned = Node2Vec(graph_pruned, dimensions=d, walk_length=l//2, num_walks=r//2, p=p, q=q, workers=8, temp_folder='temp_folder',
                    starting_nodes=neighbors, quiet=True)  # Use temp_folder for big graphs

model_pruned = node2vec_pruned.fit() #, ns_exponent=1)

total_time = time.time() - start_time

8


In [11]:
total_time

0.5847151279449463

Initially we will perform the node2vec with the starting nodes like normal (even if it takes a lot of time and is not optimised)

In [None]:
# perc_list = [f"{i}0%" for i in range(1, 10)]
# perc_list

In [None]:
groups_dict = groups_assign(initial_graph, initial_graph, group_df)
[groups_dict[10312], groups_dict['10312']]

In [None]:
import pickle

graphs_filenames_list = f'{initial_graph.name}_{num_nodes_to_remove}.pkl'

# Save the list of graphs to a file
with open('graphs.pkl', 'wb') as f:
    pickle.dump(graphs_list, f)

# Load the list of graphs from the file
with open('graphs.pkl', 'rb') as f:
    loaded_graphs = pickle.load(f)

# Accessing the graphs and their names
for i, graph in enumerate(loaded_graphs):