In [4]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import glob
import seaborn as sns
import sys
import copy
from tqdm.notebook import tqdm
from numba import jit
from scipy import stats
import networkx as nx
import re

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
import powerlaw


def extract_graph_properties(g):
    d = list(dict(g.degree).values())
    N = nx.number_of_nodes(g)
    kmean = np.mean(d)
    results = powerlaw.Fit(d)
    gamma = results.power_law.alpha
    return {
        'N': N,
        'k': kmean,
        'gamma': gamma
    }


def extract_feature_properties(features):
    kmean_f = np.mean(np.sum(features, axis=0))
    kmean_n = np.mean(np.sum(features, axis=1))
    
    results = powerlaw.Fit(np.sum(features, axis=0))
    gamma_f = results.power_law.alpha
    
    results = powerlaw.Fit(np.sum(features, axis=1))
    gamma_n = results.power_law.alpha
    
    return {
        'Nf': features.shape[1],
        'k_f': kmean_f,
        'k_n': kmean_n,
        'gamma_f': gamma_f,
        'gamma_n': gamma_n
    }

### Cora

In [6]:
cora_graph = nx.read_edgelist("../data/cora/cora.cites")
print(cora_graph)

Graph with 2708 nodes and 5278 edges


In [7]:
cora_features = pd.read_csv("../data/cora/cora.content", sep="\t", header=None)
cora_features = cora_features.sort_values(0)
cora_features.columns = ['id', *list(range(1, 1434)), 'category']
cora_features = cora_features.drop(['id', 'category'], axis=1).values
cora_features.shape

(2708, 1433)

In [8]:
cora_properties = extract_graph_properties(cora_graph) | extract_feature_properties(cora_features)
cora_properties

Calculating best minimal value for power law fit
xmin progress: 00%xmin progress: 02%xmin progress: 05%xmin progress: 08%xmin progress: 11%xmin progress: 13%xmin progress: 16%xmin progress: 19%xmin progress: 22%xmin progress: 25%xmin progress: 27%xmin progress: 30%xmin progress: 33%xmin progress: 36%xmin progress: 38%xmin progress: 41%xmin progress: 44%xmin progress: 47%xmin progress: 50%xmin progress: 52%xmin progress: 55%xmin progress: 58%xmin progress: 61%xmin progress: 63%xmin progress: 66%xmin progress: 69%xmin progress: 72%xmin progress: 75%xmin progress: 77%xmin progress: 80%xmin progress: 83%xmin progress: 86%xmin progress: 88%xmin progress: 91%xmin progress: 94%xmin progress: 97%Calculating best minimal value for power law fit
xmin progress: 00%xmin progress: 00%xmin progress: 01%xmin progress: 01%xmin progress: 02%xmin progress: 02%xmin progress: 03%xmin progress: 04%xmin progress: 04%xmin progress: 05%xmin progress: 05%xmin prog

Values less than or equal to 0 in data. Throwing out 0 or negative values


{'N': 2708,
 'k': 3.8980797636632203,
 'gamma': 3.702152925692742,
 'Nf': 1433,
 'k_f': 34.34473133286811,
 'k_n': 18.174298375184637,
 'gamma_f': 3.3023909160798324,
 'gamma_n': 62.7645096232935}