In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from cloudvolume import CloudVolume as cv
from caveclient import CAVEclient
from scipy.spatial import KDTree
import pygsheets
import ast
from collections import Counter, defaultdict
import time
import pickle
from scipy.spatial.distance import cdist
import networkx as nx
import random
from itertools import combinations
import igraph as ig

vol_graphene =cv('graphene://https://minnie.microns-daf.com/segmentation/table/zheng_ca3', agglomerate=True, use_https=True)
#vol = cv('gs://zheng_mouse_hippocampus_production/v2/seg_m195',parallel=True, progress=False, use_https=True)
client = CAVEclient('zheng_ca3')
#t195 = client.materialize.get_timestamp(195)


In [6]:
# load MF, PYR IDs of timestamp 195

gc = pygsheets.authorize(service_file = "./hippca3-8126bea0d603.json")
sheet = gc.open('CA3_cells')
mf_sheet = sheet.worksheet('title','mossy_fibers')
pyr_sheet = sheet.worksheet('title','pyramidal_cells')
mf_df = mf_sheet.get_as_df()
mf_df = mf_df[mf_df['type'] == 'mossy fiber']
mf_df = mf_df[mf_df['segid_195'] != '']
pc_df = pyr_sheet.get_as_df()

sheet2 = gc.open('all_pyramidal_cells')
mfpc_sheet = sheet2.worksheet('title','MF-pyr')
mfpc_df = mfpc_sheet.get_as_df(start='A4')
thorny_nuc_ids = mfpc_df['segid (Nuclei table segID)'].values
thorny_df = pc_df[pc_df['nucleus_segids'].isin(thorny_nuc_ids)]


In [16]:

def shuffle_adjmat_config_model(adj_matrix):
    n_sinks, n_sources = adj_matrix.shape
    deg_sources = adj_matrix.sum(axis=0).astype(int).tolist()  # Source (columns)
    deg_sinks = adj_matrix.sum(axis=1).astype(int).tolist()    # Sink (rows)
    source_stubs = [i for i, deg in enumerate(deg_sources) for _ in range(deg)]
    sink_stubs = [i for i, deg in enumerate(deg_sinks) for _ in range(deg)]

    if len(source_stubs) != len(sink_stubs):
        raise ValueError("The total number of stubs for sources and sinks must match.")
    np.random.shuffle(source_stubs)
    np.random.shuffle(sink_stubs)
    sink_stubs = [s + n_sources for s in sink_stubs]

    edges = list(zip(source_stubs, sink_stubs))
    types = [0]*n_sources + [1]*n_sinks  # 0=source, 1=sink
    # Create bipartite graph
    g = ig.Graph(edges=edges, directed=False)
    g.vs["type"] = types
    # Reconstruct randomized adjacency matrix
    rand_adj = np.zeros((n_sinks, n_sources), dtype=int)
    for u, v in g.get_edgelist():
        if types[u] == 0:
            source = u
            sink = v - n_sources
        else:
            source = v
            sink = u - n_sources
        rand_adj[sink, source] = 1
    return rand_adj

def configuration_model_from_adjacency(adj):
    n_sinks, n_sources = adj.shape

    # Step 1: Degree sequences
    sink_indegree = adj.sum(axis=1)
    source_outdegree = adj.sum(axis=0)

    # Step 2: Create stubs
    sink_stubs = np.repeat(np.arange(n_sinks), sink_indegree)
    source_stubs = np.repeat(np.arange(n_sources), source_outdegree)

    # Check matching is possible
    assert len(sink_stubs) == len(source_stubs), "Total in-degree and out-degree must match"

    # Step 3: Shuffle and pair stubs
    np.random.shuffle(sink_stubs)
    np.random.shuffle(source_stubs)

    # Step 4: Build new adjacency matrix
    new_adj = np.zeros((n_sinks, n_sources), dtype=int)
    for sink, source in zip(sink_stubs, source_stubs):
        new_adj[sink, source] += 1  # allows multiedges!

    return new_adj

def simple_configuration_model(adj, max_attempts=1000):
    n_sinks, n_sources = adj.shape
    sink_indegree = adj.sum(axis=1)
    source_outdegree = adj.sum(axis=0)

    # Step 1: Create stubs
    sink_stubs = list(np.repeat(np.arange(n_sinks), sink_indegree))
    source_stubs = list(np.repeat(np.arange(n_sources), source_outdegree))

    if len(sink_stubs) != len(source_stubs):
        raise ValueError("Total in-degree and out-degree must match")

    # Step 2: Shuffle and try to match without duplicates
    attempt = 0
    while attempt < max_attempts:
        edges = set()
        used_pairs = set()
        sink_pool = sink_stubs.copy()
        source_pool = source_stubs.copy()
        random.shuffle(sink_pool)
        random.shuffle(source_pool)

        valid = True
        for sink, source in zip(sink_pool, source_pool):
            if (sink, source) in edges:
                valid = False
                break
            edges.add((sink, source))

        if valid:
            # Build adjacency matrix
            new_adj = np.zeros_like(adj, dtype=int)
            for sink, source in edges:
                new_adj[sink, source] = 1
            return new_adj

        attempt += 1

    raise RuntimeError(f"Failed to generate a simple configuration model after {max_attempts} attempts")
    
def shuffle_adjmat_radius_model(d_mat, r, seed, coord_df):
    d_mat_thresh = (d_mat < r).astype(int)
    bouton_indices, thorn_indices = np.where(d_mat_thresh == 1)
    edges = [(f"b{i}", f"t{j}") for i, j in zip(bouton_indices, thorn_indices)]
    counters = []

    random.seed(seed)
    random.shuffle(edges)
    bouton_matched = set()
    thorn_matched = set()
    bouton_to_thorn = {f"b{i}": [] for i in range(d_mat_thresh.shape[0])}

    for b, t in edges:
        if b not in bouton_matched and t not in thorn_matched:
            bouton_to_thorn[b].append(t)
            bouton_matched.add(b)
            thorn_matched.add(t)

    for b, t in edges:
        if b not in bouton_matched:
            bouton_to_thorn[b].append(t)
            bouton_matched.add(b)

    thorn_to_bouton = defaultdict(list)
    for k, vals in bouton_to_thorn.items():
        for v in vals:
            thorn_to_bouton[v].append(k)

    target_mf_ids = coord_df['pre_ids'].unique()
    target_pyr_ids = coord_df['post_ids'].unique()
    adj_mat_rand = np.zeros((len(target_pyr_ids), len(target_mf_ids)), dtype=int)
    mf_to_pyr_randomized = {i: [] for i in range(len(target_mf_ids))}
    for key, val in bouton_to_thorn.items():
        key = int(key.lstrip('b'))
        val = int(val[0].lstrip('t'))
        this_mf_id = coord_df['pre_ids'].values[key]
        this_pyr_id = coord_df['post_ids'].values[val]
        idx_mf = np.where(target_mf_ids==this_mf_id)[0][0]
        idx_pyr = np.where(target_pyr_ids==this_pyr_id)[0][0]
        mf_to_pyr_randomized[idx_mf].append(idx_pyr)

    for key, vals in mf_to_pyr_randomized.items():
        for val in vals:
            adj_mat_rand[val,key] = 1
            
    return adj_mat_rand




In [17]:
# Load previously generated adjmatrix, mf_ids, pc_ids, dist_mat
syn_thresh = 3
with open('./variables/adjmat_and_bouton_thorn_dist_matrix_250707_th' +str(syn_thresh)+'.pkl','rb') as f:
    adj_mat_weighted, mf_ids, pc_ids, coord_df, d_mat, syn_df = pickle.load(f)
    
adj_mat = (adj_mat_weighted != 0).astype(int)

In [11]:
# Construct an adjacency matrix between Pyr and MF
# Use flat segmentation m195

'''
mf_ids_temp = mf_df['segid_195'].values
mf_ids_temp = [ids for ids in mf_ids_temp if ids != '']
pc_ids = thorny_df['segid_195'].values
syn_thresh = 3

print('Number of pyr IDs used: ', len(pc_ids))

mf_manually_identifed_zhihao_pyr_648518346442090245 = [648518346369197640, 648518346383876745, 648518346383931017, 648518346394388476, 648518346404290697, 648518346409172934, 648518346421440163, 648518346426154487, 648518346426411414, 648518346426420886, 648518346428546878, 648518346428550718, 648518346428663779, 648518346430163710, 648518346430885722, 648518346431036176, 648518346431151376, 648518346431549859, 648518346431600214, 648518346431791040, 648518346432844718, 648518346433020736, 648518346433096829, 648518346433221277, 648518346433736019, 648518346433921124, 648518346434062966, 648518346434614442, 648518346435060257, 648518346435163644, 648518346435244540, 648518346435414392, 648518346435440760, 648518346435754623, 648518346435847544, 648518346435859832, 648518346436065028, 648518346436078200, 648518346436228728, 648518346436369272, 648518346436382072, 648518346436401175, 648518346436693531, 648518346436705323, 648518346436722199, 648518346436736888, 648518346436772728, 648518346436812827, 648518346436846810, 648518346436863879, 648518346436864218, 648518346436879301, 648518346436940408, 648518346436940920, 648518346436943736, 648518346436945016, 648518346436991709, 648518346437144005, 648518346437153034, 648518346437220984, 648518346437606030, 648518346437646346, 648518346437703982, 648518346437708516, 648518346437794346, 648518346437952042, 648518346438002630, 648518346438005190, 648518346438014406, 648518346438015360, 648518346438040127, 648518346438203078, 648518346438206120, 648518346438206406, 648518346438251718, 648518346438313791, 648518346438453275, 648518346438515188, 648518346438518079, 648518346438561855, 648518346438640582, 648518346438653812, 648518346438684358, 648518346438687551, 648518346438764276, 648518346438783551, 648518346438820415, 648518346438825791, 648518346438878650, 648518346438901702, 648518346438906431, 648518346438960959, 648518346438964799, 648518346439152447, 648518346439198018, 648518346439207836, 648518346439213631, 648518346439263295, 648518346439277631, 648518346439281343, 648518346439342143, 648518346439351871, 648518346439383103, 648518346439389503, 648518346439405887, 648518346439407935, 648518346439419455, 648518346439428759, 648518346439468695, 648518346439582600, 648518346439608924, 648518346439682623, 648518346440669455, 648518346440679183, 648518346440681118, 648518346440710823, 648518346440934253, 648518346441454294, 648518346441492694, 648518346441526230, 648518346441841035, 648518346441946571, 648518346442016351, 648518346442032162, 648518346442147557, 648518346442163595, 648518346442226225, 648518346442336816, 648518346442421808, 648518346442445063, 648518346442493685, 648518346442643759, 648518346442651401, 648518346442704687, 648518346442751880, 648518346442853076, 648518346442855124, 648518346443143728, 648518346443797823, 648518346444000183, 648518346444116601, 648518346444121921, 648518346444141377, 648518346444158585, 648518346444184385, 648518346444248129, 648518346444422375, 648518346444427239, 648518346444899791, 648518346444900493, 648518346444935939, 648518346445025283, 648518346445155891, 648518346445178675, 648518346445187649, 648518346445190962, 648518346445220673, 648518346445225750, 648518346445231169, 648518346445248065, 648518346445274689, 648518346445295497, 648518346445441417, 648518346445443191, 648518346445477149, 648518346445514269, 648518346445550386, 648518346445580082, 648518346445582130, 648518346445585714, 648518346445635011, 648518346445680442, 648518346445725379, 648518346445897522, 648518346446288590, 648518346446493481, 648518346446582888, 648518346446707241, 648518346446976065, 648518346446976970, 648518346447037057, 648518346447156339, 648518346447310642, 648518346447354418, 648518346447393489, 648518346447455361, 648518346447456385, 648518346447474305, 648518346447525697, 648518346447615873, 648518346447711347, 648518346447793980, 648518346447828851, 648518346447832947, 648518346447833459, 648518346447835251, 648518346447837756, 648518346447993148, 648518346447993515, 648518346447996843, 648518346448016243, 648518346448115059, 648518346448142951, 648518346448152167, 648518346448266411, 648518346448345297, 648518346448817011, 648518346448999027, 648518346449141633, 648518346449212801, 648518346449263397, 648518346449535556, 648518346449619663, 648518346450711887, 648518346450867219, 648518346450876769, 648518346451037583, 648518346451056545, 648518346451302387, 648518346451320379, 648518346451377651, 648518346451686971, 648518346451879663, 648518346451948781, 648518346451960301, 648518346452209350, 648518346452277222, 648518346452496102, 648518346454342988, 648518346455185289, 648518346456752565, 648518346457259378, 648518346457442418, 648518346457511861, 648518346457513909, 648518346457514421, 648518346457565714, 648518346457607605, 648518346457831605, 648518346457896885, 648518346458130819, 648518346458391427, 648518346459186051, 648518346461813379, 648518346462388867, 648518346462747023, 648518346462753935, 648518346463194474, 648518346463266959, 648518346465033615, 648518346465295601, 648518346465707407, 648518346465858798, 648518346466597617, 648518346476441254, 648518346476549030, 648518346478250918]
mf_to_add = np.array([val for i, val in enumerate(mf_manually_identifed_zhihao_pyr_648518346442090245) if val not in mf_ids_temp])
mf_ids = list(np.append(mf_ids_temp,mf_to_add))

pre_ids = []
post_ids = []
adj_mat = np.zeros((len(pc_ids), len(mf_ids)), dtype=int)

stime = time.time()
for i in range(0, len(pc_ids)):
    syn = client.materialize.synapse_query(pre_ids=mf_ids, post_ids=pc_ids[i], bounding_box=None, bounding_box_column='post_pt_position', 
                timestamp=t195, remove_autapses=True, include_zeros=False, limit=None, offset=None, 
                split_positions=False, desired_resolution=[1000,1000,1000], materialization_version=None, 
                synapse_table='synapses_ca3_v1', datastack_name='zheng_ca3', metadata=True) 
    syn2 = syn.groupby('pre_pt_root_id')
    syn_grouped = {name: group for name, group in syn2 if len(group)>=syn_thresh}

    for key, group in syn_grouped.items():
        pre_ids.append(key)
        post_ids.append(pc_ids[i])
        j = mf_ids.index(key)
        adj_mat[i,j] = 1
        
    if i%100 ==0:
        print(i)
etime = time.time()
print(etime - stime)
'''

Number of pyr IDs used:  630
0
100
200
300
400
500
600
1730.6373629570007


In [18]:
# 3-node, 4-node motif analyses of bipartite graph

def count_bipartite_motifs(adj_matrix):
    n_sinks, n_sources = adj_matrix.shape

    fanin3 = 0
    fanout3 = 0
    fanin4 = 0
    fanout4 = 0
    butterfly = 0
    path3 = 0
    
    # Fan-in
    for sink in range(n_sinks):
        sources = np.where(adj_matrix[sink])[0]
        if len(sources) >= 2:
            fanin3 += len(list(combinations(sources, 2)))
        if len(sources) >= 3:
            fanin4 += len(list(combinations(sources, 3)))

    # Fan-out
    for source in range(n_sources):
        sinks = np.where(adj_matrix[:, source])[0]
        if len(sinks) >= 2:
            fanout3 += len(list(combinations(sinks, 2)))
        if len(sinks) >= 3:
            fanout4 += len(list(combinations(sinks, 3)))

    # Butterfly (slow exhaustive count)
    '''
    for src1, src2 in combinations(range(n_sources), 2):
        common_sinks = np.where(adj_matrix[:, src1] & adj_matrix[:, src2])[0]
        if len(common_sinks) >= 2:
            butterfly += len(list(combinations(common_sinks, 2)))
    '''
    
    # Butterfly: optimized strategy
    for s1, s2 in combinations(range(n_sinks), 2):
        common_sources = np.where(adj_matrix[s1] & adj_matrix[s2])[0]
        k = len(common_sources)
        if k >= 2:
            butterfly += k * (k - 1) // 2 
            
    # Path of length 3: (A–B–A–B)
    for sink1 in range(n_sinks):
        src1s = np.where(adj_matrix[sink1])[0]
        for src in src1s:
            sink2s = np.where(adj_matrix[:, src])[0]
            for sink2 in sink2s:
                if sink2 != sink1:
                    path3 += 1
                    
    sharing = 1 / (1 + (path3 / (butterfly*4)))

    return {
        "3-node fan-in": fanin3,
        "3-node fan-out": fanout3,
        "4-node fan-in": fanin4,
        "4-node fan-out": fanout4,
        "4-node butterfly": butterfly,
        "path-of-length-3": path3,
    }

motifs = count_bipartite_motifs(adj_mat)
for key, val in motifs.items():
    print(f"{key}: {val}")

3-node fan-in: 1030653
3-node fan-out: 5216
4-node fan-in: 38234155
4-node fan-out: 630
4-node butterfly: 505
path-of-length-3: 10432


In [8]:
import math
stime = time.time()
aa = sum([math.comb(i,2)*j for i,j in {2: 233, 3: 25, 4: 9, 5: 4, 6: 1, 7: 1, 8: 1}.items()])
etime = time.time()
print(etime-stime)
print(aa)
print(butterfly)

3.719329833984375e-05
466
466


In [19]:
# Count bipartite motifs on configuration models

N=100

clustering_coeff = []
motif_records = defaultdict(list)
for ii in range(N):
    
    adj_rand = shuffle_adjmat_config_model(adj_mat)
    #adj_rand = simple_configuration_model(adj_mat, max_attempts=10000)
    # check degree-preservation
    #print("Preserved sink in-degrees %:", np.sum(adj_mat.sum(axis=1) == adj_rand.sum(axis=1))/ adj_mat.shape[0] *100)
    #print("Preserved source out-degrees %:",np.sum(adj_mat.sum(axis=0) == adj_rand.sum(axis=0))/ adj_mat.shape[1] *100)
    
    counts = count_bipartite_motifs(adj_rand)
    for motif, count in counts.items():
        motif_records[motif].append(count)
    
    if ii % 10 == 0:
        print(f"  Processed {ii+1}/{N} random models.")
        
stats = {
    motif: {
        "mean": np.mean(vals),
        "std": np.std(vals)
    }
    for motif, vals in motif_records.items()
}

for motif, s in stats.items():
    print(f"{motif}: mean = {s['mean']:.2f}, std = {s['std']:.2f}")

  Processed 1/100 random models.
  Processed 11/100 random models.
  Processed 21/100 random models.
  Processed 31/100 random models.
  Processed 41/100 random models.
  Processed 51/100 random models.
  Processed 61/100 random models.
  Processed 71/100 random models.
  Processed 81/100 random models.
  Processed 91/100 random models.
3-node fan-in: mean = 1029345.58, std = 450.02
3-node fan-out: mean = 5200.04, std = 5.56
4-node fan-in: mean = 38131198.00, std = 45586.99
4-node fan-out: mean = 625.07, std = 4.02
4-node butterfly: mean = 131.01, std = 13.67
path-of-length-3: mean = 10400.08, std = 11.11


In [20]:
# Count bipartite motifs on radius models

N=100
r=10
clustering_coeff = []
motif_records = defaultdict(list)
for ii in range(N):
    adj_rand = shuffle_adjmat_radius_model(d_mat,r,ii, coord_df)
    counts = count_bipartite_motifs(adj_rand)
    for motif, count in counts.items():
        motif_records[motif].append(count)
    if ii % 10 == 0:
        print(f"  Processed {ii+1}/{N} random models.")
        
stats = {
    motif: {
        "mean": np.mean(vals),
        "std": np.std(vals)
    }
    for motif, vals in motif_records.items()
}

for motif, s in stats.items():
    print(f"{motif}: mean = {s['mean']:.2f}, std = {s['std']:.2f}")

  Processed 1/100 random models.
  Processed 11/100 random models.
  Processed 21/100 random models.
  Processed 31/100 random models.
  Processed 41/100 random models.
  Processed 51/100 random models.
  Processed 61/100 random models.
  Processed 71/100 random models.
  Processed 81/100 random models.
  Processed 91/100 random models.
3-node fan-in: mean = 1019388.71, std = 2109.27
3-node fan-out: mean = 4992.31, std = 14.82
4-node fan-in: mean = 37514752.94, std = 224652.81
4-node fan-out: mean = 581.50, std = 7.15
4-node butterfly: mean = 443.27, std = 25.35
path-of-length-3: mean = 9984.62, std = 29.64


In [None]:
# 50 um radius
3-node fan-in: mean = 1185051.74, std = 1471.14
3-node fan-out: mean = 3468.33, std = 6.21
4-node fan-in: mean = 48590262.84, std = 159524.17
4-node fan-out: mean = 193.98, std = 1.92
4-node butterfly: mean = 256.73, std = 16.82
path-of-length-3: mean = 6936.66, std = 12.42
clustering coeff: mean = 0.13, std = 0.01

In [21]:
# (Projected graph onto Pyr) Using igraph: Draw an edge between pyr cells if they share a common MF and count motifs
import igraph as ig 

pc_indices, mf_indices = np.where(adj_mat == 1)
edges = [(f"m{i}", f"p{j}") for i, j in zip(mf_indices, pc_indices)]

source_to_sinks = defaultdict(list)
for src, sink, in edges:
    source_to_sinks[src].append(sink)

# Project onto sinks (connect sinks that share a source)
projected_edges = set()
for sinks in source_to_sinks.values():
    for i in range(len(sinks)):
        for j in range(i + 1, len(sinks)):
            projected_edges.add(tuple(sorted((sinks[i], sinks[j]))))

sink_nodes = list(set(sink for _, sink in edges))
g_sink = ig.Graph()
g_sink.add_vertices(sink_nodes)
g_sink.add_edges(list(projected_edges))

# Count motifs and clustering
triangle_count_observed = g_sink.motifs_randesu(size=3)[3]  # triangle = motif ID 3
open_triplet_count_observed = g_sink.motifs_randesu(size=3)[2]  # open triplet = motif ID 2
clustering_coeff_observed = g_sink.transitivity_undirected()


print(triangle_count_observed)
print(open_triplet_count_observed)
print(clustering_coeff_observed)

3380
87153
0.1042212697727483


In [5]:
a = (1+(61807/(2821*3)))
print(1/a)

8.303202174169916
0.12043546321332005


In [22]:
# (Projected graph onto Pyr) Using igraph: Count motifs on projected configuration models

N=100

counts_triangle = []
counts_open_triplet = []
clustering_coeff = []

for ii in range(N):
    adj_rand = shuffle_adjmat_config_model(adj_mat)
    pc_indices, mf_indices = np.where(adj_rand == 1)
    edges = [(f"m{i}", f"p{j}") for i, j in zip(mf_indices, pc_indices)]

    source_to_sinks = defaultdict(list)
    for src, sink, in edges:
        source_to_sinks[src].append(sink)

    projected_edges = set()
    for sinks in source_to_sinks.values():
        for i in range(len(sinks)):
            for j in range(i + 1, len(sinks)):
                projected_edges.add(tuple(sorted((sinks[i], sinks[j]))))
    sink_nodes = list(set(sink for _, sink in edges))
    g_sink = ig.Graph()
    g_sink.add_vertices(sink_nodes)
    g_sink.add_edges(list(projected_edges))
    triangle_count = g_sink.motifs_randesu(size=3)[3]  # triangle = motif ID 3
    open_triplet_count = g_sink.motifs_randesu(size=3)[2]  # open triplet = motif ID 2
    clustering = g_sink.transitivity_undirected()
       
    counts_triangle.append(triangle_count)
    counts_open_triplet.append(open_triplet_count)
    clustering_coeff.append(clustering)
    
    if ii % 10 == 0:
        print(f"  Processed {ii+1}/{N} random models.")
        
        
print("3-node motifs on a projected graph onto pyr cells")
print("-"*30)
print("triangle_count_observed = " ,triangle_count_observed)
print("open_triplet_count_observed = ", open_triplet_count_observed)
print("clustering_coeff_observed = ", clustering_coeff_observed)
print("*"*50)
print()
print("Stats of 3-node motifs on ", str(N), " projected random configuration models")
print("-"*30)
print("Mean(triangle) = ", np.mean(counts_triangle))
print("StdDev(triangle) = ", np.std(counts_triangle))
print("-"*30)
print("Mean(open_triplet) = ", np.mean(counts_open_triplet))
print("StdDev(open_triplet) = ", np.std(counts_open_triplet))
print("-"*30)
print("Mean(clustering coeff) = ", np.mean(clustering_coeff))
print("StdDev(clustering coeff) = ", np.std(clustering_coeff))

  Processed 1/100 random models.
  Processed 11/100 random models.
  Processed 21/100 random models.
  Processed 31/100 random models.
  Processed 41/100 random models.
  Processed 51/100 random models.
  Processed 61/100 random models.
  Processed 71/100 random models.
  Processed 81/100 random models.
  Processed 91/100 random models.
3-node motifs on a projected graph onto pyr cells
------------------------------
triangle_count_observed =  3380
open_triplet_count_observed =  87153
clustering_coeff_observed =  0.1042212697727483
**************************************************

Stats of 3-node motifs on  100  projected random configuration models
------------------------------
Mean(triangle) =  2299.1
StdDev(triangle) =  77.23632565056418
------------------------------
Mean(open_triplet) =  105343.63
StdDev(open_triplet) =  1282.5190887858162
------------------------------
Mean(clustering coeff) =  0.061441105042971456
StdDev(clustering coeff) =  0.0015548980212102602


In [11]:
# (Projected graph onto Pyr cells) Using igraph (faster): Count motifs on projected radius models

N=100
r=10
counts_triangle2 = []
counts_open_triplet2 = []
clustering_coeff2 = []

for ii in range(N):
    
    adj_rand = shuffle_adjmat_radius_model(d_mat,r,ii, coord_df)
    pc_indices, mf_indices = np.where(adj_rand == 1)
    edges = [(f"m{i}", f"p{j}") for i, j in zip(mf_indices, pc_indices)]

    source_to_sinks = defaultdict(list)
    for src, sink, in edges:
        source_to_sinks[src].append(sink)

    projected_edges = set()
    for sinks in source_to_sinks.values():
        for i in range(len(sinks)):
            for j in range(i + 1, len(sinks)):
                projected_edges.add(tuple(sorted((sinks[i], sinks[j]))))
    sink_nodes = list(set(sink for _, sink in edges))
    g_sink = ig.Graph()
    g_sink.add_vertices(sink_nodes)
    g_sink.add_edges(list(projected_edges))
    triangle_count = g_sink.motifs_randesu(size=3)[3]  # triangle = motif ID 3
    open_triplet_count = g_sink.motifs_randesu(size=3)[2]  # open triplet = motif ID 2
    clustering = g_sink.transitivity_undirected()
       
    counts_triangle2.append(triangle_count)
    counts_open_triplet2.append(open_triplet_count)
    clustering_coeff2.append(clustering)
    
    if ii % 10 == 0:
        print(f"  Processed {ii+1}/{N} random models.")
        

print("Stats of 3-node motifs on ", str(N), " projected radius<", str(r)," models")
print("-"*30)
print("Mean(triangle) = ", np.mean(counts_triangle2))
print("StdDev(triangle) = ", np.std(counts_triangle2))
print("-"*30)
print("Mean(open_triplet) = ", np.mean(counts_open_triplet2))
print("StdDev(open_triplet) = ", np.std(counts_open_triplet2))
print("-"*30)
print("Mean(clustering coeff) = ", np.mean(clustering_coeff2))
print("StdDev(clustering coeff) = ", np.std(clustering_coeff2))

  Processed 1/100 random models.
  Processed 11/100 random models.
  Processed 21/100 random models.
  Processed 31/100 random models.
  Processed 41/100 random models.
  Processed 51/100 random models.
  Processed 61/100 random models.
  Processed 71/100 random models.
  Processed 81/100 random models.
  Processed 91/100 random models.
Stats of 3-node motifs on  100  projected radius< 10  models
------------------------------
Mean(triangle) =  2497.71
StdDev(triangle) =  73.3508411676376
------------------------------
Mean(open_triplet) =  57855.78
StdDev(open_triplet) =  642.5964764920518
------------------------------
Mean(clustering coeff) =  0.11464907062606362
StdDev(clustering coeff) =  0.00246806409818598


In [12]:
# (Projected graph onto Pyr cells) Using igraph (faster): Count motifs on projected radius models

N=100
r=50
counts_triangle2 = []
counts_open_triplet2 = []
clustering_coeff2 = []

for ii in range(N):
    adj_rand = shuffle_adjmat_radius_model(d_mat,r,ii, coord_df)
    pc_indices, mf_indices = np.where(adj_rand == 1)
    edges = [(f"m{i}", f"p{j}") for i, j in zip(mf_indices, pc_indices)]

    source_to_sinks = defaultdict(list)
    for src, sink, in edges:
        source_to_sinks[src].append(sink)

    projected_edges = set()
    for sinks in source_to_sinks.values():
        for i in range(len(sinks)):
            for j in range(i + 1, len(sinks)):
                projected_edges.add(tuple(sorted((sinks[i], sinks[j]))))
    sink_nodes = list(set(sink for _, sink in edges))
    g_sink = ig.Graph()
    g_sink.add_vertices(sink_nodes)
    g_sink.add_edges(list(projected_edges))
    triangle_count = g_sink.motifs_randesu(size=3)[3]  # triangle = motif ID 3
    open_triplet_count = g_sink.motifs_randesu(size=3)[2]  # open triplet = motif ID 2
    clustering = g_sink.transitivity_undirected()
       
    counts_triangle2.append(triangle_count)
    counts_open_triplet2.append(open_triplet_count)
    clustering_coeff2.append(clustering)
    
    if ii % 10 == 0:
        print(f"  Processed {ii+1}/{N} random models.")
        

print("Stats of 3-node motifs on ", str(N), " projected radius<", str(r)," models")
print("-"*30)
print("Mean(triangle) = ", np.mean(counts_triangle2))
print("StdDev(triangle) = ", np.std(counts_triangle2))
print("-"*30)
print("Mean(open_triplet) = ", np.mean(counts_open_triplet2))
print("StdDev(open_triplet) = ", np.std(counts_open_triplet2))
print("-"*30)
print("Mean(clustering coeff) = ", np.mean(clustering_coeff2))
print("StdDev(clustering coeff) = ", np.std(clustering_coeff2))

  Processed 1/100 random models.
  Processed 11/100 random models.
  Processed 21/100 random models.
  Processed 31/100 random models.
  Processed 41/100 random models.
  Processed 51/100 random models.
  Processed 61/100 random models.
  Processed 71/100 random models.
  Processed 81/100 random models.
  Processed 91/100 random models.
Stats of 3-node motifs on  100  projected radius< 50  models
------------------------------
Mean(triangle) =  2326.19
StdDev(triangle) =  72.35878592126875
------------------------------
Mean(open_triplet) =  63068.55
StdDev(open_triplet) =  683.0718025947199
------------------------------
Mean(clustering coeff) =  0.09961358817151028
StdDev(clustering coeff) =  0.002363489599021539


In [36]:
# Create a bipartite graph between MF (source) and pyr cells (sink)
# No cycles are possible
# 3-Node Motifs: [(MF1,MF2) -> Pyr1]     [MF1 -> (Pyr1,Pyr2)]
# 4-Node Motifs: [(MF1,MF2) -> (Pyr1,Pyr2)]     [MF1 -> (Pyr1,Pyr2) and MF2 -> Pyr1]   


pc_indices, mf_indices = np.where(adj_mat == 1)
edges = [(f"m{i}", f"p{j}") for i, j in zip(mf_indices, pc_indices)]
mf_nodes = [f"m{i}" for i in range(len(mf_ids))]
pc_nodes = [f"p{j}" for j in range(len(pc_ids))]

G = nx.Graph()
G.add_nodes_from(mf_nodes, bipartite = 0)
G.add_nodes_from(pc_nodes, bipartite = 1)
G.add_edges_from(edges)

nodes_to_remove = []
for node in mf_nodes:
    if G.degree(node) == 0:
        nodes_to_remove.append(node)
G.remove_nodes_from(nodes_to_remove)
mf_nodes = [n for n,d in G.nodes(data=True) if d['bipartite']==0]

# 3-node motifs
print('counting 3-node motifs...')
motif_counts = Counter()
for pc in pc_nodes:
    neighbors = list(G.neighbors(pc))
    for mf1, mf2 in combinations(neighbors, 2):
        motif_counts['fan_in_3_node'] += 1

for mf in mf_nodes:
    neighbors = list(G.neighbors(mf))
    for pc1, pc2 in combinations(neighbors, 2):
        motif_counts['fan_out_3_node'] += 1
        
# 4-node motifs
print('counting 4-node motifs...')
for mf1, mf2 in combinations(mf_nodes, 2):
    pyr_common = set(G.neighbors(mf1)) & set(G.neighbors(mf2))
    if len(pyr_common) >= 2:
        motif_counts['butterfly_4_node'] += (len(pyr_common) * (len(pyr_common)-1) // 2)


counting 3-node motifs...
counting 4-node motifs...


In [38]:
print(motif_counts['fan_in_3_node'])
print(motif_counts['fan_out_3_node'])
print(motif_counts['butterfly_4_node'])

939757
4043
901


In [59]:
# Create a random null models 

N=100
motif_counts_null = Counter()
motif_counts_null['fan_in_3_node'] = []
motif_counts_null['fan_out_3_node'] = []
motif_counts_null['butterfly_4_node'] = []
mf_degrees = {node: G.degree(node) for node in mf_nodes}
pc_degrees = {node: G.degree(node) for node in pc_nodes}
for ii in range(N):
    G_rand = nx.bipartite.configuration_model([mf_degrees[n] for n in mf_nodes], [pc_degrees[n] for n in pc_nodes], seed=ii)
    G_rand = nx.Graph(G_rand)
    for node in mf_nodes:
        if node in G_rand: G_rand.nodes[node]['bipartite'] = 0
    for node in pc_nodes:
        if node in G_rand: G_rand.nodes[node]['bipartite'] = 1
            
    top_nodes = [n for n, d in G_rand.nodes(data=True) if d['bipartite']==0]
    bottom_nodes = [n for n ,d in G_rand.nodes(data=True) if d['bipartite']==1]

    # 3-node motifs
    counts_fan_in = 0
    counts_fan_out = 0
    counts_butterfly = 0
    for pc in bottom_nodes:
        neighbors = list(G_rand.neighbors(pc))
        for mf1, mf2 in combinations(neighbors, 2):
            counts_fan_in += 1

    for mf in top_nodes:
        neighbors = list(G_rand.neighbors(mf))
        for pc1, pc2 in combinations(neighbors, 2):
            counts_fan_out += 1

    # 4-node motifs
    for mf1, mf2 in combinations(top_nodes, 2):
        pyr_common = set(G_rand.neighbors(mf1)) & set(G_rand.neighbors(mf2))
        if len(pyr_common) >= 2:
            counts_butterfly += (len(pyr_common) * (len(pyr_common)-1) // 2)
            
    motif_counts_null['fan_in_3_node'].append(counts_fan_in)
    motif_counts_null['fan_out_3_node'].append(counts_fan_out)
    motif_counts_null['butterfly_4_node'].append(counts_butterfly)
    print(f"  Processed {ii+1}/{N} random models.")

  Processed 1/100 random models.
  Processed 2/100 random models.
  Processed 3/100 random models.
  Processed 4/100 random models.
  Processed 5/100 random models.
  Processed 6/100 random models.
  Processed 7/100 random models.
  Processed 8/100 random models.
  Processed 9/100 random models.
  Processed 10/100 random models.
  Processed 11/100 random models.
  Processed 12/100 random models.
  Processed 13/100 random models.
  Processed 14/100 random models.
  Processed 15/100 random models.
  Processed 16/100 random models.
  Processed 17/100 random models.
  Processed 18/100 random models.
  Processed 19/100 random models.
  Processed 20/100 random models.
  Processed 21/100 random models.
  Processed 22/100 random models.
  Processed 23/100 random models.
  Processed 24/100 random models.
  Processed 25/100 random models.
  Processed 26/100 random models.
  Processed 27/100 random models.
  Processed 28/100 random models.
  Processed 29/100 random models.
  Processed 30/100 rand

In [50]:
print(motif_counts)
print(motif_counts_null)

Counter({'fan_in_3_node': 939757, 'fan_out_3_node': 4043, 'butterfly_4_node': 901})
Counter({'fan_in_3_node': [937937, 938424, 939181, 938409, 938674, 938075, 938418, 938208, 938285, 938990], 'fan_out_3_node': [4022, 4027, 4036, 4029, 4030, 4021, 4020, 4020, 4025, 4030], 'butterfly_4_node': [143, 139, 121, 140, 126, 115, 117, 110, 128, 133]})


In [60]:
print(f'Motif analysis stats from observed data')
for motif, counts_list in motif_counts.items():
    print(f"{motif:<20} Counts: {counts_list:<10}")    
print('----------------------------------------------------------')
print(f'Motif analysis stats from {N} random configuration models')
for motif, counts_list in motif_counts_null.items():
    mean_count = np.mean(np.array(counts_list))
    std_dev = np.std(np.array(counts_list))
    print(f"{motif:<20} Mean: {mean_count:<10} Std Dev: {std_dev:.2f}")

Motif analysis stats from observed data
fan_in_3_node        Counts: 939757    
fan_out_3_node       Counts: 4043      
butterfly_4_node     Counts: 901       
----------------------------------------------------------
Motif analysis stats from 100 random configuration models
fan_in_3_node        Mean: 938648.62  Std Dev: 387.03
fan_out_3_node       Mean: 4028.18    Std Dev: 4.98
butterfly_4_node     Mean: 124.07     Std Dev: 11.51


In [4]:
# (TOO SLOW) Using Networkx: Draw an edge between pyr cells if they share a common MF

def classify_3_node_motif(subg):
    degrees = sorted(dict(subg.degree()).values())
    if degrees == [1, 1, 2]:
        return 'Open Triplet'
    elif degrees == [2, 2, 2]:
        return 'Triangle'
    return 'Other'

def count_and_classify_motifs(G, size):
    motif_counts = Counter()
    for nodes in combinations(G.nodes, size):
        subg = G.subgraph(nodes)
        if nx.is_connected(subg):
            if size == 3:
                motif = classify_3_node_motif(subg)
            elif size == 4:
                motif = classify_4_node_motif(subg)
            else:
                motif = 'Unknown'
            motif_counts[motif] += 1
    return motif_counts

pc_indices, mf_indices = np.where(adj_mat == 1)
edges = [(f"m{i}", f"p{j}") for i, j in zip(mf_indices, pc_indices)]
mf_nodes = [f"m{i}" for i in range(len(mf_ids))]
pc_nodes = [f"p{j}" for j in range(len(pc_ids))]

G = nx.Graph()
G.add_nodes_from(mf_nodes, bipartite = 0)
G.add_nodes_from(pc_nodes, bipartite = 1)
G.add_edges_from(edges)
nodes_to_remove = []
for node in mf_nodes:
    if G.degree(node) == 0:
        nodes_to_remove.append(node)
G.remove_nodes_from(nodes_to_remove)
mf_nodes = [n for n,d in G.nodes(data=True) if d['bipartite']==0]

P_edges = []
for pc1, pc2 in combinations(pc_nodes,2):
    mf_common = set(G.neighbors(pc1)) & set(G.neighbors(pc2))
    if len(mf_common) > 0:
        P_edges.append((pc1,pc2,len(mf_common)))
        
P = nx.Graph()
for u,v,w in P_edges:
    P.add_edge(u,v, weight=w)
    
stime = time.time()
motifs_3 = count_and_classify_motifs(P, 3)
etime = time.time()
print(etime -stime)

#motifs_4 = count_and_classify_motifs(P, 4)

279.27818417549133


In [18]:
# test the motif count code (networkx)

n_sink = 5
n_source = 5
edge_prob = 0.5
rng = np.random.default_rng(0)
adj = (rng.random((n_sink, n_source)) < edge_prob).astype(int)

pc_indices, mf_indices = np.where(adj == 1)
edges = [(f"m{i}", f"p{j}") for i, j in zip(mf_indices, pc_indices)]
mf_nodes = [f"m{i}" for i in range(len(mf_ids))]
pc_nodes = [f"p{j}" for j in range(len(pc_ids))]

G = nx.Graph()
G.add_nodes_from(mf_nodes, bipartite = 0)
G.add_nodes_from(pc_nodes, bipartite = 1)
G.add_edges_from(edges)
nodes_to_remove = []
for node in mf_nodes:
    if G.degree(node) == 0:
        nodes_to_remove.append(node)
G.remove_nodes_from(nodes_to_remove)
mf_nodes = [n for n,d in G.nodes(data=True) if d['bipartite']==0]

P_edges = []
for pc1, pc2 in combinations(pc_nodes,2):
    mf_common = set(G.neighbors(pc1)) & set(G.neighbors(pc2))
    if len(mf_common) > 0:
        P_edges.append((pc1,pc2,len(mf_common)))
        
P = nx.Graph()
for u,v,w in P_edges:
    P.add_edge(u,v, weight=w)
    
stime = time.time()
motifs_3 = count_and_classify_motifs(P, 3)
etime = time.time()
print(etime -stime)
print(motifs_3)
print(adj)
print(P_edges)

randadj = shuffle_adjmat_config_model(adj)
print(randadj)

0.00012993812561035156
Counter({'Triangle': 4})
[[0 1 1 1 0]
 [0 0 0 0 0]
 [0 1 0 1 0]
 [1 0 0 1 1]
 [1 1 0 0 0]]
[('p0', 'p2', 2), ('p0', 'p3', 1), ('p0', 'p4', 1), ('p2', 'p3', 1), ('p2', 'p4', 1), ('p3', 'p4', 1)]
[[0 1 1 1 0]
 [0 0 0 0 0]
 [0 1 0 0 1]
 [1 1 0 1 0]
 [1 0 0 1 0]]
