In [1]:
import os
import pickle

In [2]:
from funcs.community_detection_funcs import community_detection_recursive_with_limit
from funcs.find_families import find_families

In [3]:
os.chdir('..')

In [4]:
names = ['arab', 'juncea', 'nigra', 'rapa']

In [5]:
G_all = {}
G_largest_component_all = {}
node_family_dict_all = {}

In [6]:
for name in names:
    pickle_filename = os.path.join('data', f'data_{name}.pkl')
    
    if os.path.exists(pickle_filename):
        try:
            with open(pickle_filename, 'rb') as f:
                data = pickle.load(f)
            
            G = data['graph']
            G_largest_component = data['largest_component']
            node_family_dict = data['node_family_dict']
            
            G_all[name] = G
            G_largest_component_all[name] = G_largest_component
            node_family_dict_all[name] = node_family_dict
            
            print(f"Loaded data for {name}")
            print(f"Graph has {len(G_largest_component.nodes())} nodes and {len(G_largest_component.edges())} edges.")
                    
        except Exception as e:
            print(f"Error loading {name}: {e}")
    else:
        print(f"File for {name} not found.")

Loaded data for arab
Graph has 2125 nodes and 2871 edges.
Loaded data for juncea
Graph has 43073 nodes and 79813 edges.
Loaded data for nigra
Graph has 31 nodes and 32 edges.
Loaded data for rapa
Graph has 18 nodes and 17 edges.


Разбиваем на компоненты поменьше и находим метки для них

In [7]:
name = 'arab'
clusters_dict = community_detection_recursive_with_limit(G_largest_component_all[name], max_size=20, max_depth=5)
clusters = list(clusters_dict.values())
clusters_wo_nan, families = find_families(clusters, node_family_dict_all[name])
data_filename = os.path.join('data', f'largest_component_clusters_{name}.pkl')
if not os.path.exists(data_filename):
    with open(data_filename, 'wb') as f:
        pickle.dump({
            'clusters': clusters_wo_nan,
            'families': families,
        }, f)

In [8]:
for name in names[1:]:
    clusters_dict = community_detection_recursive_with_limit(G_largest_component_all[name], max_size=20, max_depth=5)
    clusters = list(clusters_dict.values())
    clusters_wo_nan, families = find_families(clusters, node_family_dict_all[name], pass_nans=False)
    data_filename = os.path.join('data', f'largest_component_clusters_{name}.pkl')
    if not os.path.exists(data_filename):
        with open(data_filename, 'wb') as f:
            pickle.dump({
                'clusters': clusters_wo_nan,
                'families': families,
            }, f)