In [1]:
import pandas as pd
import networkx as nx
import random
from scipy import special
from collections import defaultdict
import click
import os
from statsmodels.stats import multitest
import csv

In [109]:
"""
Constructs a NetworkX graph.

Input:
    - file : csv file

Output:
    - graph: NetworkX graph
"""
def construct_graph(file):
    df_ppi = pd.read_csv(file, sep = "\t", header=None, names=["Protein1", "interaction", "Protein2"])
    
    df_ppi.Protein1 = df_ppi['Protein1'].str.lower()
    df_ppi.Protein2 = df_ppi['Protein2'].str.lower()
    
    df_interactions = df_ppi.replace("in-complex-with", +1)
    df_interactions = df_interactions.replace("controls-expression-of", -1)
    df_interactions = df_interactions.replace("controls-state-change-of", -1)
    
    df_interactions['interaction'].loc[(df_interactions['interaction'] != +1) ] = 0
    df_interactions['interaction'].loc[(df_interactions['interaction'] != -1) ] = 0
    #& &&(df_interactions.loc[:,1] != -1)
    #df_dgxp_thr_filtered['fold-change'].loc[(df_dgxp_thr_filtered[DGXPCOLUMNS[1]] > 0)] = +1
    G = nx.DiGraph()
    
    for i in range(len(df_interactions)):
        prot1 = df_interactions.iloc[i,0]
        prot2 = df_interactions.iloc[i,2]
        print(prot1, prot2)
        interaction = df_interactions.iloc[i,1]
        G.add_node(prot1)
        G.add_node(prot2)
        G.add_edge(prot1, prot2)
        G[prot1][prot2]['relation'] = interaction
 
        
    return G

In [110]:

construct_graph(path)

ccnc cdk8
ccnc crebbp
ccnc ep300
ccnc kat2a
ccnc kat2b
ccnc maml1
ccnc maml2
ccnc maml3
ccnc mamld1
ccnc notch1
ccnc notch1
ccnc rbpj
ccnc snw1
cdk8 crebbp
cdk8 ep300
cdk8 kat2a
cdk8 kat2b
cdk8 maml1
cdk8 maml2
cdk8 maml3
cdk8 mamld1
cdk8 notch1
cdk8 notch1
cdk8 rbpj
cdk8 snw1
crebbp ep300
crebbp hes1
crebbp hes1
crebbp hes5
crebbp hes5
crebbp hey1
crebbp hey1
crebbp hey2
crebbp hey2
crebbp heyl
crebbp heyl
crebbp kat2a
crebbp kat2b
crebbp maml1
crebbp maml2
crebbp maml3
crebbp mamld1
crebbp myc
crebbp myc
crebbp notch1
crebbp notch1
crebbp rbpj
crebbp snw1
cul1 fbxw7
cul1 notch1
cul1 rbx1
cul1 skp1
ep300 hes1
ep300 hes1
ep300 hes5
ep300 hes5
ep300 hey1
ep300 hey1
ep300 hey2
ep300 hey2
ep300 heyl
ep300 heyl
ep300 kat2a
ep300 kat2b
ep300 maml1
ep300 maml2
ep300 maml3
ep300 mamld1
ep300 myc
ep300 myc
ep300 notch1
ep300 notch1
ep300 rbpj
ep300 snw1
fbxw7 notch1
fbxw7 rbx1
fbxw7 skp1
hdac10 ncor1
hdac10 ncor2
hdac10 rbpj
hdac10 snw1
hdac10 tbl1xr1
hdac10 tbl1x
hdac11 ncor1
hdac11 ncor2
hda

<networkx.classes.digraph.DiGraph at 0x12a24c5f8>

In [87]:
path = "/Users/sophiakrix/Desktop/MechEnrichmentLab/NOTCH1_Intracellular.txt"
df = pd.read_csv(path, sep = "\t", header=None, names=["Protein1", "interaction", "Protein2"])

In [4]:
df.head()

Unnamed: 0,Protein1,interaction,Protein2
0,CCNC,in-complex-with,CDK8
1,CCNC,in-complex-with,CREBBP
2,CCNC,in-complex-with,EP300
3,CCNC,in-complex-with,KAT2A
4,CCNC,in-complex-with,KAT2B


In [5]:
df.columns

Index(['Protein1', 'interaction', 'Protein2'], dtype='object')

In [6]:
interactions_set = set(df.iloc[:,1])
interactions_set

{'controls-expression-of', 'controls-state-change-of', 'in-complex-with'}

In [7]:
interactions = df["interaction"].value_counts()
interactions

in-complex-with             234
controls-expression-of       72
controls-state-change-of     12
Name: interaction, dtype: int64

In [8]:
df_interactions = df.replace("in-complex-with", +1)
df_interactions = df_interactions.replace("controls-expression-of", -1)
df_interactions = df_interactions.replace("controls-state-change-of", -1)

In [9]:
df_interactions["interaction"].value_counts()

 1    234
-1     84
Name: interaction, dtype: int64

# NetworkX

In [10]:
G = nx.DiGraph()

In [11]:
len(df_interactions)

318

In [12]:
for i in range(len(df_interactions)):
    prot1 = df_interactions.iloc[i,0]
    prot2 = df_interactions.iloc[i,2]
    interaction = df_interactions.iloc[i,1]
    G.add_node(prot1)
    G.add_node(prot2)
    G.add_edge(prot1, prot2)
    G[prot1][prot2]['relation'] = interaction

In [13]:
for n, nbrs in G.adj.items():
    for nbr, eattr in nbrs.items():
        relation = eattr['relation']
        #print(n, nbr, relation)

In [14]:
plt.subplot(121)
nx.draw(G, with_labels=True, font_weight='bold')
#plt.subplot(122)
#nx.draw_shell(G, nlist=[range(5, 10), range(5)], with_labels=True, font_weight='bold')

NameError: name 'plt' is not defined

In [15]:
# Nr edges and nr of nodes 

print("Number of edges: ",G.number_of_edges(),"\nNumber of nodes: ",G.number_of_nodes())


Number of edges:  290 
Number of nodes:  47


# Algorithmic Development

In [16]:
"""
Randomly assigns labels of [-1,0,1] to nodes in a graph
Labels:
-1 : Downregulated
0 : No change
+1 : Upregulated

Input:
    - graph : the graph consisting of protein nodes 

Output:
    - prints list of nodes with associated attribute label
"""
def random_node_labels(graph):
    for node in graph.nodes():
        random_label = random.randint(-1,1)
        graph.nodes[node]['label'] = random_label
    print(graph.nodes.data())

In [17]:
random_node_labels(G)

[('CCNC', {'label': -1}), ('CDK8', {'label': -1}), ('CREBBP', {'label': 1}), ('EP300', {'label': 1}), ('KAT2A', {'label': 0}), ('KAT2B', {'label': 1}), ('MAML1', {'label': 1}), ('MAML2', {'label': 1}), ('MAML3', {'label': -1}), ('MAMLD1', {'label': 0}), ('NOTCH1', {'label': 0}), ('RBPJ', {'label': 0}), ('SNW1', {'label': -1}), ('HES1', {'label': 1}), ('HES5', {'label': 1}), ('HEY1', {'label': -1}), ('HEY2', {'label': 1}), ('HEYL', {'label': 1}), ('MYC', {'label': 1}), ('CUL1', {'label': 1}), ('FBXW7', {'label': 0}), ('RBX1', {'label': 1}), ('SKP1', {'label': 1}), ('HDAC10', {'label': 0}), ('NCOR1', {'label': 1}), ('NCOR2', {'label': 1}), ('TBL1XR1', {'label': 0}), ('TBL1X', {'label': -1}), ('HDAC11', {'label': 0}), ('HDAC1', {'label': -1}), ('HDAC2', {'label': 0}), ('HDAC3', {'label': -1}), ('HDAC4', {'label': -1}), ('HDAC5', {'label': -1}), ('HDAC6', {'label': -1}), ('HDAC7', {'label': 0}), ('HDAC8', {'label': -1}), ('HDAC9', {'label': 0}), ('TLE1', {'label': -1}), ('TLE2', {'label': 

In [18]:
"""
Caclulates the shortest path between two nodes.

Input:
    - graph : NetworkX graph
    - source : upstream source node


Output:
    - dictionary of shortest path nodes between source node and all other nodes in graph
"""
def shortest_path(graph, source):
    for target in graph.nodes():
        shortest_paths = nx.shortest_path(graph, source)
    return shortest_paths

In [19]:
shortest_path(G, "CCNC")

{'CCNC': ['CCNC'],
 'CDK8': ['CCNC', 'CDK8'],
 'CREBBP': ['CCNC', 'CREBBP'],
 'EP300': ['CCNC', 'EP300'],
 'KAT2A': ['CCNC', 'KAT2A'],
 'KAT2B': ['CCNC', 'KAT2B'],
 'MAML1': ['CCNC', 'MAML1'],
 'MAML2': ['CCNC', 'MAML2'],
 'MAML3': ['CCNC', 'MAML3'],
 'MAMLD1': ['CCNC', 'MAMLD1'],
 'NOTCH1': ['CCNC', 'NOTCH1'],
 'RBPJ': ['CCNC', 'RBPJ'],
 'SNW1': ['CCNC', 'SNW1'],
 'HES1': ['CCNC', 'CREBBP', 'HES1'],
 'HES5': ['CCNC', 'CREBBP', 'HES5'],
 'HEY1': ['CCNC', 'CREBBP', 'HEY1'],
 'HEY2': ['CCNC', 'CREBBP', 'HEY2'],
 'HEYL': ['CCNC', 'CREBBP', 'HEYL'],
 'MYC': ['CCNC', 'CREBBP', 'MYC'],
 'RBX1': ['CCNC', 'NOTCH1', 'RBX1'],
 'RPS27A': ['CCNC', 'NOTCH1', 'RPS27A'],
 'SKP1': ['CCNC', 'NOTCH1', 'SKP1'],
 'UBA52': ['CCNC', 'NOTCH1', 'UBA52'],
 'UBB': ['CCNC', 'NOTCH1', 'UBB'],
 'UBC': ['CCNC', 'NOTCH1', 'UBC'],
 'TBL1XR1': ['CCNC', 'RBPJ', 'TBL1XR1'],
 'TBL1X': ['CCNC', 'RBPJ', 'TBL1X'],
 'TLE1': ['CCNC', 'CREBBP', 'HES1', 'TLE1'],
 'TLE2': ['CCNC', 'CREBBP', 'HES1', 'TLE2'],
 'TLE3': ['CCNC', 'CR

In [20]:
"""
Check if node labels of source and target node are the same

Input:
    - graph: NetworkX graph
    - source: source upstream node

Output:
    - list of concordant and non-concordant nodes for the source node
"""


def count_concordance(graph, source):
    same_label = False

    nodes_dic = defaultdict(list)

    for target, path_nodes in shortest_path(graph, source).items():

        # check if node labels of source and target are the same
        if G.nodes[source]['label'] * G.nodes[target]['label'] is 1:
            same_label = True

        # multiply the edge labels
        #edge_label = [G[path_nodes[i]][path_nodes[i + 1]]['relation'] * G[path_nodes[i]][path_nodes[i + 1]]['relation']
                      #for i in range(len(path_nodes) - 1)]

        edge_label = 1
        for i in range(len(path_nodes)-1):
            temp_edge_label = G[path_nodes[i]][path_nodes[i+1]]['relation']
            edge_label *= temp_edge_label

        # concordant node
        if same_label == True and edge_label == +1:
            graph.nodes[target]['concordance'] = +1
            nodes_dic['concordant'].append(target)

        # non-concordant node
        if same_label == False and edge_label == -1:
            graph.nodes[target]['concordance'] = -1
            nodes_dic['non-concordant'].append(target)

        # no change node
        if G.nodes[source]['label'] == 0 and G.nodes[target]['label'] == 0:
            nodes_dic['no change'].append(target)

    return nodes_dic


In [21]:
count_concordance(G, "NOTCH1")

defaultdict(list,
            {'no change': ['NOTCH1',
              'RBPJ',
              'RPS27A',
              'UBB',
              'KAT2A',
              'MAMLD1',
              'TLE2',
              'TLE4',
              'TBL1XR1'],
             'non-concordant': ['HES1',
              'HES5',
              'HEY1',
              'HEY2',
              'HEYL',
              'MYC',
              'KAT2A',
              'KAT2B',
              'MAML1',
              'MAML2',
              'MAML3',
              'MAMLD1',
              'TLE1',
              'TLE2',
              'TLE3',
              'TLE4']})

In [22]:

"""
Returns a dictionary of the nodes of the graph with their according
     - shortest path nodes
     - concordant nodes
     - non-concordant nodes
     - no change nodes

Input:
    - graph

Output:
    - dictionary of nodes 
"""
def nodes_dictionary(graph):
    dic = {}
    for node in graph.nodes():
        dic[node] = {}

        # concordant, non-concordant and no change nodes
        dic[node] = count_concordance(graph, node)

        # shortest path nodes
        dic[node]['shortest_path'] = list(shortest_path(graph, node).keys())

    return dic


In [23]:
nodes_dictionary(G)['CCNC']

defaultdict(list,
            {'concordant': ['CCNC',
              'CDK8',
              'CREBBP',
              'EP300',
              'KAT2A',
              'KAT2B',
              'MAML1',
              'MAML2',
              'MAML3',
              'MAMLD1',
              'NOTCH1',
              'RBPJ',
              'SNW1',
              'HES1',
              'HES5',
              'HEY1',
              'HEY2',
              'HEYL',
              'MYC',
              'RBX1',
              'RPS27A',
              'SKP1',
              'UBA52',
              'UBB',
              'UBC',
              'TBL1XR1',
              'TBL1X',
              'TLE1',
              'TLE2',
              'TLE3',
              'TLE4'],
             'shortest_path': ['CCNC',
              'CDK8',
              'CREBBP',
              'EP300',
              'KAT2A',
              'KAT2B',
              'MAML1',
              'MAML2',
              'MAML3',
              'MAMLD1',
              'NOTCH1'

In [24]:
"""
Calculates the concordance for an upstream node with its downstream nodes
Probability of getting at least the number of state changes consistent
with the direction
Input:
    - graph
    - p : probability of achieving a result

Output:
    - dictionary of p-values and corrected p-values for concordance
"""
def calculate_concordance(graph):
    concordance_dic = {}

    #assert 0 <= p and p <= 1, "p must be within [0,1]"

    for hyp_node in graph.nodes():
        
        if hyp_node not in graph.nodes():
            raise ValueError(f"The node {hyp_node} is not in the graph.")
        
        # n is number of trials
        n = len(shortest_path(graph, hyp_node).keys())
        # k is number of successful predictions
        k = len(count_concordance(graph, hyp_node)['concordant'])

        bin_coeff = special.binom(n, k)
        concordance = bin_coeff * (0.5 ** k) * (1 - 0.5) ** (n - k)
        concordance_dic[hyp_node] = {}
        concordance_dic[hyp_node]['p_val'] = concordance

    # correction for multiple testing
    pval_list = [concordance_dic[hyp_node]['p_val'] for hyp_node in graph.nodes()]
    reject, pvals_corrected = multitest.multipletests(pval_list,alpha=0.05,method='bonferroni')[:2]
    corrected_concordance_dic = {}
    for node, pval in zip(graph.nodes(),pvals_corrected):
        concordance_dic[node]['p_val_corrected'] = pval

    return concordance_dic

In [25]:
calculate_concordance(G)


{'CCNC': {'p_val': 4.656612873077393e-10,
  'p_val_corrected': 2.1886080503463745e-08},
 'CDK8': {'p_val': 9.313225746154785e-10,
  'p_val_corrected': 4.377216100692749e-08},
 'CREBBP': {'p_val': 1.862645149230957e-09,
  'p_val_corrected': 8.754432201385498e-08},
 'EP300': {'p_val': 3.725290298461914e-09,
  'p_val_corrected': 1.7508864402770996e-07},
 'KAT2A': {'p_val': 7.450580596923828e-09,
  'p_val_corrected': 3.501772880554199e-07},
 'KAT2B': {'p_val': 0.06285522133111954, 'p_val_corrected': 1.0},
 'MAML1': {'p_val': 0.14944598078727722, 'p_val_corrected': 1.0},
 'MAML2': {'p_val': 0.14944598078727722, 'p_val_corrected': 1.0},
 'MAML3': {'p_val': 0.14944598078727722, 'p_val_corrected': 1.0},
 'MAMLD1': {'p_val': 7.450580596923828e-09,
  'p_val_corrected': 3.501772880554199e-07},
 'NOTCH1': {'p_val': 7.450580596923828e-09,
  'p_val_corrected': 3.501772880554199e-07},
 'RBPJ': {'p_val': 7.450580596923828e-09,
  'p_val_corrected': 3.501772880554199e-07},
 'SNW1': {'p_val': 2.179294824

In [26]:
"""
Writes the values for nodes, concordant_nodes, non_concordant_nodes, no_change_nodes, p_val, p_val_corrected to a csv file

Input:
- graph
- csv_output : path for output file 

Output:
- csv file
"""
def create_concordant_df(graph, csv_output):
    rows = []
    for node in graph.nodes():
        node_dict = {}
        node_dict['node'] = node
        node_dict['concordant_nodes'] = len(count_concordance(graph, node)['concordant'])
        node_dict['non_concordant_nodes'] = len(count_concordance(graph, node)['non-concordant'])
        node_dict['no_change'] = len(count_concordance(graph, node)['no change'])
        node_dict['p_val'] = calculate_concordance(graph)[node]['p_val']
        node_dict['p_val_corrected'] = calculate_concordance(graph)[node]['p_val_corrected']
        
        rows.append(node_dict)
        
    df_ = pd.DataFrame(rows)
    
    df_.to_csv(csv_output)
        

        
        

In [27]:
create_concordant_df(G, "/Users/sophiakrix/Desktop/Concordance_test.csv")

In [53]:
import os.path
import pandas as pd

print('Here:',os.path.abspath(os.path.dirname("__file__")))

try:
    dgxp_file = os.path.join(os.path.abspath(os.path.dirname("__file__")),'data', 'example_dgxp.txt')
except NameError:  # We are the main py2exe script, not a module
    import sys
    dgxp_file = os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])),'data', 'example_dgxp.txt')

#dgxp_file = os.path.join(os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))),'data', 'example_dgxp.txt')
print(dgxp_file)
df = pd.read_csv(dgxp_file, sep='\t')

Here: /Users/sophiakrix/git/MechanismEnrichmentLab
/Users/sophiakrix/git/MechanismEnrichmentLab/data/example_dgxp.txt


In [54]:
df.head()

Unnamed: 0,ID,adj.P.Val,P.Value,t,B,logFC,Gene.symbol,Gene.title
0,10415081,1.95e-08,2.19e-12,-28.259642,11.23514,-5.81675,,
1,10520121,1.95e-08,2.19e-12,-28.259642,11.23514,-5.81675,,
2,10578405,1.95e-08,2.19e-12,-28.259642,11.23514,-5.81675,,
3,10586076,1.95e-08,2.19e-12,-28.259642,11.23514,-5.81675,,
4,10569972,0.000405,6.78e-08,-11.595062,7.06249,-1.53425,Cers4,ceramide synthase 4


In [55]:
df_dgxp = df[['Gene.symbol', 'logFC', 'adj.P.Val']].copy()

In [56]:
df_dgxp.columns = ["gene", "fold-change", "p-value"]

In [57]:
len(df_dgxp)

35556

In [58]:
df_dgxp.head()

Unnamed: 0,gene,fold-change,p-value
0,,-5.81675,1.95e-08
1,,-5.81675,1.95e-08
2,,-5.81675,1.95e-08
3,,-5.81675,1.95e-08
4,Cers4,-1.53425,0.000405


In [59]:
df_dgxp = df_dgxp.dropna()

In [60]:
df_dgxp.head()

Unnamed: 0,gene,fold-change,p-value
4,Cers4,-1.53425,0.000405
6,Gdpd3,-2.08502,0.000427
7,Wdfy1,-1.354708,0.00128
8,Igkv4-59///Igkv4-77///Igkv4-70///Igkv4-72///Ig...,-1.654181,0.0018
9,Parp3,-1.148608,0.00195


In [61]:
df_dgxp.gene = df_dgxp['gene'].str.lower() 

In [62]:
df_dgxp.head()

Unnamed: 0,gene,fold-change,p-value
4,cers4,-1.53425,0.000405
6,gdpd3,-2.08502,0.000427
7,wdfy1,-1.354708,0.00128
8,igkv4-59///igkv4-77///igkv4-70///igkv4-72///ig...,-1.654181,0.0018
9,parp3,-1.148608,0.00195


In [63]:
df_dgxp.loc[4,'gene']

'cers4'

In [64]:
"""# alternative gene names

for gene in df_dgxp.loc[:, 'gene']:
    print(gene)
    print(df_dgxp.loc[gene, 'gene'])
    
    if '///' in df_dgxp.loc[gene, 'gene']:
        gene_names = gene.split('///')
        df_dgxp.gene.replace(gene, gene_names, in_place=True)
"""

"# alternative gene names\n\nfor gene in df_dgxp.loc[:, 'gene']:\n    print(gene)\n    print(df_dgxp.loc[gene, 'gene'])\n    \n    if '///' in df_dgxp.loc[gene, 'gene']:\n        gene_names = gene.split('///')\n        df_dgxp.gene.replace(gene, gene_names, in_place=True)\n"

In [73]:
df_dgxp.head()


Unnamed: 0,gene,fold-change,p-value
4,cers4,-1.53425,0.000405
6,gdpd3,-2.08502,0.000427
7,wdfy1,-1.354708,0.00128
8,igkv4-59///igkv4-77///igkv4-70///igkv4-72///ig...,-1.654181,0.0018
9,parp3,-1.148608,0.00195


In [79]:
DGXPCOLUMNS = ["gene", "fold-change", "p-value"]
threshold = 0.2

# filter dgxp

# filter nodes with p-value < 0.05
df_dgxp_pval_filtered = df_dgxp.loc[df_dgxp[DGXPCOLUMNS[2]] < 1.0]
print('pval filtered:',len(df_dgxp_pval_filtered))
# filter nodes with threshold (given by user)
df_dgxp_thr_filtered = df_dgxp_pval_filtered.loc[abs(df_dgxp_pval_filtered[DGXPCOLUMNS[1]]) > threshold]
print('thr filtered:',len(df_dgxp_thr_filtered))
# set fold change labels from float to +1 or -1
df_dgxp_thr_filtered['fold-change'].loc[(df_dgxp_thr_filtered[DGXPCOLUMNS[1]] > 0)] = +1
df_dgxp_thr_filtered['fold-change'].loc[(df_dgxp_thr_filtered[DGXPCOLUMNS[1]] < 0)] = -1

pval filtered: 24988
thr filtered: 7361


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [80]:
df_dgxp_pval_filtered

Unnamed: 0,gene,fold-change,p-value
4,cers4,-1.534250,0.000405
6,gdpd3,-2.085020,0.000427
7,wdfy1,-1.354708,0.001280
8,igkv4-59///igkv4-77///igkv4-70///igkv4-72///ig...,-1.654181,0.001800
9,parp3,-1.148608,0.001950
11,bcat1,1.291582,0.005000
13,igkv4-59///igkv4-70///igkc///igkv4-59///igkv4-80,-1.509383,0.009380
14,hpgds,-1.020187,0.009700
15,tnip3,-1.231459,0.017300
16,cep162,-0.782766,0.019000


In [81]:
COLUMNS = ["Protein1", "interaction", "Protein2"]

ppi_file = os.path.join(os.path.abspath(os.path.dirname("__file__")),'data', 'example.txt')

df_ppi = pd.read_csv(ppi_file, sep='\t', header=None)
df_ppi.columns = ["Protein1", "interaction", "Protein2"]

df_ppi.Protein1 = df_ppi['Protein1'].str.lower()
df_ppi.Protein2 = df_ppi['Protein2'].str.lower()

df_interactions = df_ppi.replace("in-complex-with", +1)
df_interactions = df_interactions.replace("controls-expression-of", -1)
df_interactions = df_interactions.replace("controls-state-change-of", -1)
# replace all other interaction expressions with 0
df_interactions.loc[(df_interactions[COLUMNS[1]] != +1) & (df_interactions[COLUMNS[1]] != -1)] = 0


In [82]:
set(df_interactions.Protein1.to_list())

{'ccnc',
 'cdk8',
 'crebbp',
 'cul1',
 'ep300',
 'fbxw7',
 'hdac1',
 'hdac10',
 'hdac11',
 'hdac2',
 'hdac3',
 'hdac4',
 'hdac5',
 'hdac6',
 'hdac7',
 'hdac8',
 'hdac9',
 'hes1',
 'hes5',
 'hey1',
 'hey2',
 'heyl',
 'hif1a',
 'kat2a',
 'kat2b',
 'maml1',
 'maml2',
 'maml3',
 'mamld1',
 'myc',
 'ncor1',
 'ncor2',
 'notch1',
 'rbpj',
 'rbx1',
 'snw1'}

In [83]:
G = nx.DiGraph()
LABEL = 'label'
for i in range(len(df_interactions)):
    
    # add nodes to graph
    prot1 = df_interactions.loc[i, COLUMNS[0]]
    prot2 = df_interactions.loc[i, COLUMNS[2]]
    
    G.add_node(prot1)
    G.add_node(prot2)

    # add node attribute from dgxp file
    
    if prot1 in list(df_dgxp_thr_filtered['gene']):
        index = df_dgxp_thr_filtered[df_dgxp_thr_filtered.gene == prot1].index[0]
        G.nodes[prot1][LABEL] = df_dgxp_thr_filtered.loc[index, ['fold-change']]
        #print(df_dgxp.loc[index, 'fold-change'])
        print(f"Label for {prot1} is now {df_dgxp_thr_filtered.loc[index, 'fold-change']}")
        
    
    # edge attributes
    interaction = df_interactions.loc[i, COLUMNS[1]]
    G.add_edge(prot1, prot2)
    G[prot1][prot2]['relation'] = interaction

Label for cul1 is now -1.0
Label for cul1 is now -1.0
Label for cul1 is now -1.0
Label for cul1 is now -1.0
Label for hdac10 is now -1.0
Label for hdac10 is now -1.0
Label for hdac10 is now -1.0
Label for hdac10 is now -1.0
Label for hdac10 is now -1.0
Label for hdac10 is now -1.0
Label for hdac9 is now -1.0
Label for hdac9 is now -1.0
Label for hdac9 is now -1.0
Label for hdac9 is now -1.0
Label for hdac9 is now -1.0
Label for hdac9 is now -1.0
Label for hes1 is now -1.0
Label for hes1 is now -1.0
Label for hes1 is now -1.0
Label for hes1 is now -1.0
Label for hes1 is now -1.0
Label for hes1 is now -1.0
Label for hes1 is now -1.0
Label for hes1 is now -1.0
Label for hes1 is now -1.0
Label for hes1 is now -1.0
Label for hes1 is now -1.0
Label for hes1 is now -1.0
Label for hes1 is now -1.0
Label for hey1 is now -1.0
Label for hey1 is now -1.0
Label for hey1 is now -1.0
Label for hey1 is now -1.0
Label for hey1 is now -1.0
Label for hey1 is now -1.0
Label for hey1 is now -1.0
Label for 

In [52]:
list(df_dgxp_thr_filtered['gene'])

['cers4',
 'gdpd3',
 'wdfy1',
 'igkv4-59///igkv4-77///igkv4-70///igkv4-72///igkv4-68///igkc///igkv19-93///igkv4-59///igkv4-80///igkv4-70///igkv4-68',
 'parp3',
 'bcat1',
 'igkv4-59///igkv4-70///igkc///igkv4-59///igkv4-80',
 'hpgds',
 'tnip3',
 'hddc3',
 'igkv10-96']