In [1]:
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname('__file__'))))
import jReversion as jR
import pandas as pd
import networkx as nx
import numpy as np

In [2]:
network_df = pd.read_excel('../networks/natcomm_2019.xlsx',
                           sheet_name='Supplementary Table 1',
                           header=2)
print(network_df.head())
network = nx.from_pandas_edgelist(df=network_df, source='Regulator', target='Target gene', create_using=nx.DiGraph)

  Regulator Target gene
0     AADAT         DLD
1     AADAT        DLST
2     AADAT        LDHB
3     AADAT        OGDH
4     AANAT        ASMT


In [3]:
def identify_ffl(g_read, cutoff=None):
    num_node = len(g_read)
    mx_num_sp = np.zeros((num_node, num_node), dtype=int)
    mx_min_sp = np.zeros((num_node, num_node), dtype=int)
    mx_max_sp = np.zeros((num_node, num_node), dtype=int)
    for s_idx, source in enumerate(g_read.nodes):
        for t_idx, target in enumerate(g_read.nodes):
            num_sp = 0
            min_sp = num_node
            max_sp = 0
            if source is not target:
                for path in nx.all_simple_paths(g_read, source, target, cutoff):
                    num_sp += 1
                    length = len(path)
                    min_sp = min([length, min_sp])
                    max_sp = max([length, max_sp])
            mx_num_sp[s_idx, t_idx] = num_sp
            mx_min_sp[s_idx, t_idx] = min_sp
            mx_max_sp[s_idx, t_idx] = max_sp

    mx_ffl = mx_num_sp >= 2

    output = {'mx_num_sp': mx_num_sp,
              'mx_min_sp': mx_min_sp,
              'mx_max_sp': mx_max_sp,
              'mx_ffl': mx_ffl}

    return output

def node_deletion_effects_on_ffl(g_read, cutoff=None):
    # num_node = len(g_read)
    node_deletion_effects = dict()
    for mut in g_read.nodes:
        g_mut = g_read.copy()
        g_mut.remove_node(mut)
        g_mut.add_node(mut)
        node_deletion_effects[mut] = identify_ffl(g_mut, cutoff)

    return node_deletion_effects

def ffl_test(g_read, cut_off_range=[None]):
    result = dict()
    for node in g_read.nodes:
        result[node] = dict()

    for cut_off in cut_off_range:
        ffl = identify_ffl(g_read, cut_off)
        eff = node_deletion_effects_on_ffl(g_read, cut_off)

        for node in g_read.nodes:

            test1 = ffl['mx_num_sp'] != eff[node]['mx_num_sp']
            # test1 = abs(test1) == 1
            # test2 = np.logical_and(FFL['mx_ffl'], EFF[node]['mx_num_sp'] > 0)
            test2 = np.logical_and(test1, ffl['mx_ffl'])
            # test = np.logical_and(test3, EFF[node]['mx_num_sp'] > 0)

            test = eff[node]['mx_num_sp']
            test = np.delete(test, node, axis=0)
            test = np.delete(test, node, axis=1)

            test2 = np.delete(test2, node, axis=0)
            test2 = np.delete(test2, node, axis=1)

            if test2.any():
                if (test[test2] > 0).all():
                    result[node][cut_off] = 'C1'
                elif (test[test2] == 0).any():
                    result[node][cut_off] = 'C2'
            else:
                result[node][cut_off] = 'ND'

    return result

In [None]:
CutOffRange = [2, 3, 4, 5, 6, 7]
ffl_test_result = ffl_test(g_read=network, cut_off_range=CutOffRange)

In [None]:
NetworkName = 'natcomm_2019'
with open('data/' + NetworkName + '_ffl_test.tsv', 'w') as f:
    TMP = '\t'.join(['FFL' + str(CUT) for CUT in CutOffRange])

    f.write('node\t' + TMP + '\n')
    NodeList = set(network.nodes)
    InputNodes = [node for node, in_degree in network.in_degree(network.nodes) if in_degree==0]
    OutputNodes = [node for node, out_degree in network.out_degree(network.nodes) if out_degree==0]
    NodeList.difference_update(InputNodes)
    NodeList.difference_update(OutputNodes)
    for NODE in NodeList:
        FFL = '\t'.join([str(x) for x in ffl_test_result[NODE].values()])
        f.write(NODE + '\t' + FFL + '\n')