In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import random
#from igraph import Graph
from scipy import special
import statsmodels.stats
import csv

In [2]:
"""
Constructs a NetworkX graph.

Input:
    - file : csv file

Output:
    - graph: NetworkX graph
"""
def construct_graph(file):
    df = pd.read_csv(file, sep = "\t", header=None, names=["Protein1", "interaction", "Protein2"])
    df_interactions = df.replace("in-complex-with", +1)
    df_interactions = df_interactions.replace("controls-expression-of", -1)
    df_interactions = df_interactions.replace("controls-state-change-of", -1)
    
    G = nx.DiGraph()
    
    for i in range(len(df_interactions)):
    prot1 = df_interactions.iloc[i,0]
    prot2 = df_interactions.iloc[i,2]
    interaction = df_interactions.iloc[i,1]
    G.add_node(prot1)
    G.add_node(prot2)
    G.add_edge(prot1, prot2)
    G[prot1][prot2]['relation'] = interaction
    
    for n, nbrs in G.adj.items():
    for nbr, eattr in nbrs.items():
        relation = eattr['relation']
        
    return G

IndentationError: expected an indented block (<ipython-input-2-b996ee9459fd>, line 19)

In [None]:
path = "/Users/sophiakrix/Desktop/MechEnrichmentLab/NOTCH1_Intracellular.txt"
df = pd.read_csv(path, sep = "\t", header=None, names=["Protein1", "interaction", "Protein2"])

In [None]:
df.head()

In [None]:
df.columns

In [None]:
interactions_set = set(df.iloc[:,1])
interactions_set

In [None]:
interactions = df["interaction"].value_counts()
interactions

In [None]:
df_interactions = df.replace("in-complex-with", +1)
df_interactions = df_interactions.replace("controls-expression-of", -1)
df_interactions = df_interactions.replace("controls-state-change-of", -1)

In [None]:
df_interactions["interaction"].value_counts()

# NetworkX

In [None]:
G = nx.DiGraph()

In [None]:
len(df_interactions)

In [None]:
for i in range(len(df_interactions)):
    prot1 = df_interactions.iloc[i,0]
    prot2 = df_interactions.iloc[i,2]
    interaction = df_interactions.iloc[i,1]
    G.add_node(prot1)
    G.add_node(prot2)
    G.add_edge(prot1, prot2)
    G[prot1][prot2]['relation'] = interaction

In [None]:
for n, nbrs in G.adj.items():
    for nbr, eattr in nbrs.items():
        relation = eattr['relation']
        #print(n, nbr, relation)

In [None]:
plt.subplot(121)
nx.draw(G, with_labels=True, font_weight='bold')
#plt.subplot(122)
#nx.draw_shell(G, nlist=[range(5, 10), range(5)], with_labels=True, font_weight='bold')

In [None]:
# Nr edges and nr of nodes 

print("Number of edges: ",G.number_of_edges(),"\nNumber of nodes: ",G.number_of_nodes())


# Algorithmic Development

In [None]:
"""
Randomly assigns labels of [-1,0,1] to nodes in a graph
Labels:
-1 : Downregulated
0 : No change
+1 : Upregulated

Input:
    - graph : the graph consisting of protein nodes 

Output:
    - prints list of nodes with associated attribute label
"""
def random_node_labels(graph):
    for node in graph.nodes():
        random_label = random.randint(-1,1)
        graph.nodes[node]['label'] = random_label
    print(graph.nodes.data())

In [None]:
random_node_labels(G)

In [None]:
"""
Caclulates the shortest path between two nodes.

Input:
    - graph : NetworkX graph
    - source : upstream source node


Output:
    - dictionary of shortest path nodes between source node and all other nodes in graph
"""
def shortest_path(graph, source):
    for target in graph.nodes():
        shortest_paths = nx.shortest_path(graph, source)
    return shortest_paths

In [None]:
shortest_path(G, "CCNC")

In [None]:
"""
Check if node labels of source and target node are the same

Input:
    - graph: NetworkX graph
    - source: source upstream node
    
Output:
    - list of concordant and non-concordant nodes for the source node
"""
def count_concordance(graph, source):
    
    same_label = False

    concordant_nodes = []
    non_concordant_nodes = []
    no_change_nodes = []
    
    for target, path_nodes in shortest_path(graph,source).items():

        # check if node labels of source and target are the same
        if G.nodes[source]['label'] == 1 and G.nodes[target]['label'] == 1:
            same_label = True
        if G.nodes[source]['label'] == -1 and G.nodes[target]['label'] == -1:
            same_label = True
            
        # multiply the edge labels
        edge_label = 1
        for i in range(len(path_nodes)-1):
            temp_edge_label = G[path_nodes[i]][path_nodes[i+1]]['relation']
            edge_label *= temp_edge_label
        
        # concordant node
        if same_label == True and edge_label == +1:
            graph.nodes[target]['concordance'] = +1
            concordant_nodes.append(target)
        
        # non-concordant node
        if same_label == False and edge_label == -1:
            graph.nodes[target]['concordance'] = -1
            non_concordant_nodes.append(target)
            
        # no change node
        if G.nodes[source]['label'] == 0 and G.nodes[target]['label'] == 0:
            no_change_nodes.append(target)
        
    return concordant_nodes, non_concordant_nodes, no_change_nodes
            

In [None]:
count_concordance(G, "CCNC")

In [None]:
"""
Returns a dictionary of the nodes of the graph with their according
     - shortest path nodes
     - concordant nodes
     - non-concordant nodes
     - no change nodes
     
Input:
    - graph
    
Output:
    - dictionary of nodes 
"""
def nodes_dictionary(graph):
    dic = {}
    for node in graph.nodes():
        dic[node] = {}
        
        # shortest path nodes
        dic[node]['shortest_path'] = list(shortest_path(graph,node).keys())
        
        # concordant nodes
        dic[node]['concordant'] = count_concordance(graph, node)[0]
        
        # non-concordant nodes
        dic[node]['non-concordant'] = count_concordance(graph, node)[1]
        
        # no change nodes
        dic[node]['no change'] = count_concordance(graph, node)[2]
    
    return dic

In [None]:
nodes_dictionary(G)['CCNC']

In [None]:
"""
Calculates the concordance for an upstream node with its downstream nodes
Probability of getting at least the number of state changes consistent
with the direction
Input:
    - graph
    
Output:
    - p-value for concordance
"""
def calculate_concordance(graph):
    concordance_dic = {}

    #assert 0 <= p and p <= 1, "p must be within [0,1]"

    if hyp_node not in graph.nodes():
        raise ValueError(f"The node {hyp_node} is not in the graph.")

    for hyp_node in graph.nodes():
        # n is number of trials
        n = len(shortest_path(graph, hyp_node).keys())
        # k is number of successful predictions
        k = len(count_concordance(graph, hyp_node)['concordant'])

        bin_coeff = special.binom(n, k)
        concordance = bin_coeff * (0.5 ** k) * (1 - 0.5) ** (n - k)
        concordance_dic[hyp_node] = {}
        concordance_dic[hyp_node]['p_val'] = concordance

    # correction for multiple testing
    reject, pvals_corrected = multitest.multipletests(concordance_dic.values()),alpha=0.05,method='bonferroni')
    corrected_concordance_dic = {}
    for node, pval in zip(graph.nodes(),pvals_corrected):
        concordance_dic[node]['p_val_corrected'] = pval

    return concordance_dic

In [None]:
calculate_concordance(G, 'NOTCH1', 0.5)


In [None]:
"""
Writes the values for nodes, concordant_nodes, non_concordant_nodes, no_change_nodes, p_val, p_val_corrected to a csv file

Input:
- dic

Output:
- csv file
"""
def write_concordance_csv(graph, csv_output):
    
    for node in graph.nodes():
        
        dic[node] = count_concordance(graph, node)
        dic[node]['p_val'] = calculate_concordance(graph)[node]['p_val']
        dic[node]['p_val_corrected'] = calculate_concordance(graph)[node]['p_val_corrected']
        
 
    csv_columns = ['concordant', 'non-concordant', 'no change', 'p_val', 'p_val_corrected']

    try:
        with open(csv_output, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
            writer.writeheader()
            for data in dic:
                writer.writerow(data)
    except IOError:
        print("I/O error")
        
        

In [None]:
write_concordance_csv(G, "/Users/sophiakrix/Desktop/Concordance_test.csv")