# Edge Frequency by Source and Target Degree Across Permuted Networks

This notebook computes the frequency of edge presence for each (source degree, target degree) pair across 200 permuted networks in `data/downloads/hetionet-permutations/permutations`. The result is a summary of how often an edge appears for each degree pair, which can be used for downstream analysis or visualization.

In [1]:
# Import required libraries
import os
import numpy as np
import scipy.sparse as sp
from collections import defaultdict
import pandas as pd

In [None]:
# Default parameters
edge_file = "AeG.sparse.npz"  # Default edge file type

In [None]:
# Set the path to the permutation directories
perm_dir = "../data/downloads/hetionet-permutations/permutations"

# Extract edge type name from edge_file for naming results
edge_type = edge_file.replace('.sparse.npz', '')

In [5]:
# Find all permutation folders
perm_folders = sorted([os.path.join(perm_dir, d) for d in os.listdir(perm_dir) if d.endswith('.hetmat')])
print(f"Found {len(perm_folders)} permutation folders.")

Found 200 permutation folders.


In [6]:
# Initialize dictionaries to count edge presence and total possible pairs
freq = defaultdict(int)
total_counts = defaultdict(int)

In [8]:
# Iterate through permutations and count edge frequencies by degree pair
for perm_path in perm_folders:
    edge_path = os.path.join(perm_path, "edges", edge_file)
    print(f'{edge_path}')
    if not os.path.exists(edge_path):
        continue
    adj = sp.load_npz(edge_path)
    src_degrees = np.array(adj.sum(axis=1)).flatten()
    tgt_degrees = np.array(adj.sum(axis=0)).flatten()
    src_nodes, tgt_nodes = adj.nonzero()
    for s, t in zip(src_nodes, tgt_nodes):
        s_deg = src_degrees[s]
        t_deg = tgt_degrees[t]
        freq[(s_deg, t_deg)] += 1
    for s in range(adj.shape[0]):
        for t in range(adj.shape[1]):
            total_counts[(src_degrees[s], tgt_degrees[t])] += 1

../data/downloads/hetionet-permutations/permutations/001.hetmat/edges/AeG.sparse.npz
../data/downloads/hetionet-permutations/permutations/002.hetmat/edges/AeG.sparse.npz
../data/downloads/hetionet-permutations/permutations/003.hetmat/edges/AeG.sparse.npz
../data/downloads/hetionet-permutations/permutations/004.hetmat/edges/AeG.sparse.npz
../data/downloads/hetionet-permutations/permutations/005.hetmat/edges/AeG.sparse.npz
../data/downloads/hetionet-permutations/permutations/006.hetmat/edges/AeG.sparse.npz
../data/downloads/hetionet-permutations/permutations/007.hetmat/edges/AeG.sparse.npz
../data/downloads/hetionet-permutations/permutations/008.hetmat/edges/AeG.sparse.npz
../data/downloads/hetionet-permutations/permutations/009.hetmat/edges/AeG.sparse.npz
../data/downloads/hetionet-permutations/permutations/010.hetmat/edges/AeG.sparse.npz
../data/downloads/hetionet-permutations/permutations/011.hetmat/edges/AeG.sparse.npz
../data/downloads/hetionet-permutations/permutations/012.hetmat/e

In [11]:
# Compute frequency (fraction of permutations with edge present)
freq_matrix = {k: freq[k] / total_counts[k] for k in freq}

# Convert to DataFrame for easier analysis
freq_df = pd.DataFrame([
    {'source_degree': k[0], 'target_degree': k[1], 'frequency': v}
    for k, v in freq_matrix.items()
])

# Show top 10 most frequent degree pairs
freq_df.sort_values('frequency', ascending=False).head(10)

Unnamed: 0,source_degree,target_degree,frequency
8221,7853,98,1.004902
8832,12157,82,1.004902
8835,12157,90,1.004902
8125,8074,87,1.004902
6924,6745,98,1.004902
9604,6725,98,1.004902
8656,12589,82,1.004902
8657,12589,90,1.004902
8831,12157,98,1.004902
8805,12157,88,1.004902


In [None]:
# Save the frequency DataFrame to the results folder with edge type in filename
results_dir = "../results/empirical_edge_frequencies"
os.makedirs(results_dir, exist_ok=True)
output_filename = f"edge_frequency_by_degree_{edge_type}.csv"
output_path = os.path.join(results_dir, output_filename)
freq_df.to_csv(output_path, index=False)
print(f"Saved edge frequency table to {output_path}")