# Edge Frequency by Source and Target Degree Across Permuted Networks

This notebook computes the frequency of edge presence for each (source degree, target degree) pair across 200 permuted networks in `data/downloads/hetionet-permutations/permutations`. The result is a summary of how often an edge appears for each degree pair, which can be used for downstream analysis or visualization.

In [None]:
# Import required libraries
import os
import numpy as np
import scipy.sparse as sp
from collections import defaultdict
import pandas as pd

In [None]:
# Set the path to the permutation directories and edge file type
perm_dir = "data/downloads/hetionet-permutations/permutations"
edge_file = "AeG.sparse.npz"  # Change to your edge type if needed

In [None]:
# Find all permutation folders
perm_folders = sorted([os.path.join(perm_dir, d) for d in os.listdir(perm_dir) if d.endswith('.hetmat')])
print(f"Found {len(perm_folders)} permutation folders.")

In [None]:
# Initialize dictionaries to count edge presence and total possible pairs
freq = defaultdict(int)
total_counts = defaultdict(int)

In [None]:
# Iterate through permutations and count edge frequencies by degree pair
for perm_path in perm_folders:
    edge_path = os.path.join(perm_path, "edges", edge_file)
    if not os.path.exists(edge_path):
        continue
    adj = sp.load_npz(edge_path)
    src_degrees = np.array(adj.sum(axis=1)).flatten()
    tgt_degrees = np.array(adj.sum(axis=0)).flatten()
    src_nodes, tgt_nodes = adj.nonzero()
    for s, t in zip(src_nodes, tgt_nodes):
        s_deg = src_degrees[s]
        t_deg = tgt_degrees[t]
        freq[(s_deg, t_deg)] += 1
    for s in range(adj.shape[0]):
        for t in range(adj.shape[1]):
            total_counts[(src_degrees[s], tgt_degrees[t])] += 1

In [None]:
# Compute frequency (fraction of permutations with edge present)
freq_matrix = {k: freq[k] / total_counts[k] for k in freq}

# Convert to DataFrame for easier analysis
freq_df = pd.DataFrame([
    {'source_degree': k[0], 'target_degree': k[1], 'frequency': v}
    for k, v in freq_matrix.items()
])

# Show top 10 most frequent degree pairs
freq_df.sort_values('frequency', ascending=False).head(10)