In [38]:
import warnings
from pprint import pprint
from collections import Counter
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from graphrole import RecursiveFeatureExtractor, RoleExtractor
import seaborn as sns
import numpy as np

In [39]:
G = nx.read_gpickle('../graphs/edge_list')

In [40]:
feature_extractor = RecursiveFeatureExtractor(G, max_generations=10)
features = feature_extractor.extract_features()

In [41]:
print(f'\nFeatures extracted from {feature_extractor.generation_count} recursive generations:')
print(features)


Features extracted from 6 recursive generations:
              external_edges(mean)(mean)(mean)(mean)(mean)  \
A__BADEN01                                   145872.735983   
A__DORNBIR01                                 140796.345033   
A__EISENST01                                 146186.842411   
A__EISENST02                                 143570.042382   
A__EISENST05                                 114744.920677   
...                                                    ...   
Unknown_PT                                        0.000000   
Unknown_S                                    143673.982646   
Unknown_SK                                        0.000000   
Unknown_TR                                   149752.078361   
Unknown_UK                                        0.000000   

              external_edges(mean)(mean)(mean)(mean)  \
A__BADEN01                             147234.072295   
A__DORNBIR01                           139357.315492   
A__EISENST01                         

In [42]:
role_extractor = RoleExtractor(n_roles=None)
role_extractor.extract_role_factors(features)
node_roles = role_extractor.roles
print("")




In [43]:
# pprint(node_roles)

print('\nNode role membership by percentage:')
print(role_extractor.role_percentage.round(2))

role_count = Counter(node_roles.values())
pprint(role_count)


Node role membership by percentage:
              role_0  role_1  role_2  role_3
A__BADEN01      0.00    0.89    0.10    0.00
A__DORNBIR01    0.90    0.10    0.00    0.00
A__EISENST01    0.00    0.81    0.00    0.18
A__EISENST02    0.79    0.21    0.00    0.00
A__EISENST05    0.01    0.98    0.01    0.01
...              ...     ...     ...     ...
Unknown_PT      0.25    0.25    0.25    0.25
Unknown_S       0.82    0.10    0.03    0.05
Unknown_SK      0.25    0.25    0.25    0.25
Unknown_TR      0.00    0.74    0.25    0.00
Unknown_UK      0.25    0.25    0.25    0.25

[3292 rows x 4 columns]
Counter({'role_1': 1584, 'role_0': 1075, 'role_2': 440, 'role_3': 193})


In [44]:
# roles_values = list(node_roles.values())
# features['roles'] = roles_values
prdf = pd.DataFrame.from_dict(data= node_roles, orient="index", columns=["roles"])
print(prdf) 
features = features.join(prdf)

               roles
A__BADEN01    role_1
A__DORNBIR01  role_0
A__EISENST01  role_1
A__EISENST02  role_0
A__EISENST05  role_1
...              ...
Unknown_PT    role_0
Unknown_S     role_0
Unknown_SK    role_0
Unknown_TR    role_1
Unknown_UK    role_0

[3292 rows x 1 columns]


In [45]:
pr = nx.pagerank(G, alpha=0.9)

# features["pagerank"] = pr.values()

In [46]:
prdf = pd.DataFrame.from_dict(data= pr, orient="index", columns=["pagerank"]) 
features = features.join(prdf)
print(features)
# print(len(pr.keys()),len(node_roles.keys()))

              external_edges(mean)(mean)(mean)(mean)(mean)  \
A__BADEN01                                   145872.735983   
A__DORNBIR01                                 140796.345033   
A__EISENST01                                 146186.842411   
A__EISENST02                                 143570.042382   
A__EISENST05                                 114744.920677   
...                                                    ...   
Unknown_PT                                        0.000000   
Unknown_S                                    143673.982646   
Unknown_SK                                        0.000000   
Unknown_TR                                   149752.078361   
Unknown_UK                                        0.000000   

              external_edges(mean)(mean)(mean)(mean)  \
A__BADEN01                             147234.072295   
A__DORNBIR01                           139357.315492   
A__EISENST01                           147283.474239   
A__EISENST02                   

In [47]:
features.to_csv("../graphs/node_roles.csv")