In [1]:
import hashlib
import pandas as pd
import numpy as np
import networkx as nx
from typing import List, Tuple, Generator
from collections import defaultdict
import itertools

In [2]:
# fname = "../data/huang_emt_scc.gxml"
# fname = "../data/omnipath.gxml"
fname = "../data/tcell_scc.gxml"
g = nx.read_graphml(fname)
g = nx.convert_node_labels_to_integers(g)

In [3]:
g.nodes(data=True)

NodeDataView({0: {'name': 'Notch'}, 1: {'name': 'Deltex'}, 2: {'name': 'Notch3'}, 3: {'name': 'Notch1'}, 4: {'name': 'IL7RStat'}, 5: {'name': 'IL7Ra'}, 6: {'name': 'Nrarp'}, 7: {'name': 'GATA3'}, 8: {'name': 'TCF1'}, 9: {'name': 'Bcl11b'}, 10: {'name': 'Ikaros'}, 11: {'name': 'Runx1'}, 12: {'name': 'HEBAlt'}, 13: {'name': 'Eprotein'}, 14: {'name': 'Gfi1'}, 15: {'name': 'Myb'}, 16: {'name': 'E2A'}, 17: {'name': 'HEB'}, 18: {'name': 'SLE'}, 19: {'name': 'SclTal1'}, 20: {'name': 'Lyl1'}, 21: {'name': 'Id2'}, 22: {'name': 'PU1'}, 23: {'name': 'Gfi1b'}, 24: {'name': 'Lmo2'}})

# Network Cycles becomes Nodes

# Cycle Data Type
Later on, it's probably wise to define data-type to speed up hashing and equalities

In [4]:
class Cycle(object):
    def __init__(self, cycle_list: List, positivity: bool, NODE_COUNT: int):
        self.NODE_COUNT = NODE_COUNT
        self.N = len(cycle_list)
        self.cycle_array = np.zeros(NODE_COUNT, dtype=np.bool)
        self.orig_cycle_list = np.array(cycle_list, dtype=int)
        # self.orig_cycle_list = cycle_list
        for c in self.orig_cycle_list:
            self.cycle_array[c] = True
        self.positivity = positivity
        self.id = ''.join(self.cycle_array.astype(int).astype(str))

    # def get_nodes(self):
    #     for node in self.cycle_list:
    #         return node
        
    def get_node_and_next(self):
        # -> Generator[Tuple[int, int]]:
        for i, node in enumerate(self.orig_cycle_list):
            next_node = self.orig_cycle_list[(i + 1) % self.N]
            yield(node, next_node)

    def __eq__(self, other):
        return not(np.any(np.logical_xor(self.cycle_array, other.cycle_array)))
        # return (self.NODE_COUNT == other.NODE_COUNT) and not(np.any(self.cycle_array, other.cycle_array))

    def __hash__(self):
        val = int(hashlib.sha256(self.cycle_array).hexdigest(), base=16)
        return val
    
    def __repr__(self):
        return self.id

    def and_two(self, c2, c3):
        return np.any(
            np.logical_and(
                np.logical_and(self.cycle_array, c2.cycle_array),
                c3.cycle_array
            )
            
        )

    def intersection(self, other):
        return np.sort(np.intersect1d(self.orig_cycle_list, other.orig_cycle_list))

## Filtering for only Positive-cycles
A cycle is positive if the XOR of their "represses" of all the edges results in a positive

In [5]:
cycles = sorted(nx.simple_cycles(g))
cycle = cycles[0]
positive = True
print(positive, end ="")
for i, node in enumerate(cycle):
    next_node = cycle[(i + 1) % len(cycle)]
    repress = g.edges[node, next_node]['repress']
    positive ^= repress
    print(f" -> ({node}, {next_node}: {repress}) -> {positive}", end="")
    if (i + 1) % 3 == 0:
        print(f'\n{positive}', end="")

True -> (0, 1: False) -> True -> (1, 0: True) -> False

In [6]:
def is_positive(graph: nx.Graph, cycle: List[str]):
    positive = True
    for i, node in enumerate(cycle):
        next_node = cycle[(i + 1) % len(cycle)]
        repress = graph.edges[node, next_node]['repress']
        positive ^= repress
    return positive

Find all positive cycles

In [7]:
NODE_COUNT = len(g.nodes())
all_cycles = [Cycle(c, is_positive(g, c), NODE_COUNT) for c in nx.simple_cycles(g)]
pos_cycles = [c for c in all_cycles if c.positivity]
len(all_cycles)

2772

## Edge to Cycles Map
For each edge, create a set that tracks all the cycles that the edge is in

In [8]:
edge_cycles_mp = defaultdict(set)

for i, c in enumerate(all_cycles):
    for node, next_node in c.get_node_and_next():
        edge_cycles_mp[node, next_node].add(c)

In [9]:
c1, c2 = all_cycles[1], all_cycles[14]
c1 == c2

False

In [10]:
edge_cycles_mp = defaultdict(set)
for cycle in all_cycles:
    for i, node in cycle.get_node_and_next():
        edge_cycles_mp[node, next_node].add(cycle)
nx.set_edge_attributes(g, edge_cycles_mp, "cycles")

In [11]:
class CycleFactory:
    def __init__(self, node_count: int):
        self.NODE_COUNT = node_count
    
    def create_cycle(self, cycle_list: List[int]) -> Cycle:
        pass

## Type 1 Detection
Three cycles with a common node

In [12]:
def hastype1_old(cycle_sets, node_count):
    """Determine whether any three cycles share at least one node (Type I motif)."""
    node_uses = [0] * node_count
    for cycle_holder in cycle_sets:
        for n in cycle_holder.value:
            node_uses[n] += 1
            if (node_uses[n] == 3):
                return True
    return False

def hastype1(c1, c2, c3):
    return c1.and_two(c2, c3)

# Cycle Intersection Graph

In [14]:
nodes_mp = defaultdict(set)
pos_g = nx.Graph()
for i, c1 in enumerate(pos_cycles):
    for c2 in pos_cycles[i + 1:]:
        commons = c1.intersection(c2)
        if len(commons) > 0:
            cfs = frozenset(commons)
            pos_g.add_edge(c1, c2, common_nodes=commons)
            nodes_mp[cfs].add(c1)
            nodes_mp[cfs].add(c2)

In [18]:
nodes_combo = dict()
for common_nodes, cycle_list in nodes_mp.items():
    n = 0
    for _ in itertools.combinations(cycle_list, 3):
        n += 1
    nodes_combo[common_nodes] = n

In [19]:
nodes_combo

{frozenset({np.int64(8)}): 16222590,
 frozenset({np.int64(22)}): 125181420,
 frozenset({np.int64(0)}): 66177440,
 frozenset({np.int64(0), np.int64(2)}): 5935160,
 frozenset({np.int64(0), np.int64(3)}): 1435820,
 frozenset({np.int64(0),
            np.int64(2),
            np.int64(4),
            np.int64(5),
            np.int64(7)}): 477191,
 frozenset({np.int64(0), np.int64(4), np.int64(5), np.int64(7)}): 3737581,
 frozenset({np.int64(0), np.int64(2), np.int64(4), np.int64(5)}): 286,
 frozenset({np.int64(0), np.int64(4), np.int64(5)}): 1140,
 frozenset({np.int64(0), np.int64(2), np.int64(7)}): 3817670,
 frozenset({np.int64(0), np.int64(7)}): 26979680,
 frozenset({np.int64(4), np.int64(5), np.int64(7)}): 4590551,
 frozenset({np.int64(4), np.int64(5)}): 2925,
 frozenset({np.int64(7)}): 9735114,
 frozenset({np.int64(0),
            np.int64(4),
            np.int64(5),
            np.int64(7),
            np.int64(8),
            np.int64(9),
            np.int64(13),
            np.in