<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Find-all-shortest-paths-and-shortest-paths-with-highest-PRS-weight" data-toc-modified-id="Find-all-shortest-paths-and-shortest-paths-with-highest-PRS-weight-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Find all shortest paths and shortest paths with highest PRS weight</a></span><ul class="toc-item"><li><ul class="toc-item"><li><span><a href="#Fig-S5A" data-toc-modified-id="Fig-S5A-1.0.1"><span class="toc-item-num">1.0.1&nbsp;&nbsp;</span>Fig S5A</a></span></li></ul></li></ul></li><li><span><a href="#Calculate-GO-enrichment-of-paths-using-only-the-genes-on-the-path-(excl.-effector-and-sensor)" data-toc-modified-id="Calculate-GO-enrichment-of-paths-using-only-the-genes-on-the-path-(excl.-effector-and-sensor)-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Calculate GO enrichment of paths using only the genes on the path (excl. effector and sensor)</a></span></li></ul></div>

In [2]:
from matplotlib.lines import Line2D
import matplotlib.pyplot as plt
import matplotlib as mpl
import networkx as nx
import numpy as np
import pandas as pd
import pickle
import os
import re
import itertools as itr
import matplotlib as mpl
import seaborn as sns
from enm.Enm import Enm
from enm.utils import *
from tqdm import tqdm

In [3]:
with open(snakemake.input.pickle_file_name , 'rb') as f:
    e_pcc = pickle.load(f)

In [4]:
nodes = e_pcc.nodes
n = len(nodes)

In [5]:
effectors_df = pd.read_csv(snakemake.input.effector_pcc)

In [6]:
sensors_df = pd.read_csv(snakemake.input.sensors_pcc)

# Find all shortest paths and shortest paths with highest PRS weight

In [7]:
def find_all_shortest_paths(sources, targets, g):
    paths = []
    counts = []
    for s in sources.orf_name.tolist():
        w_track=0
        for t in targets.orf_name.tolist():
            p = list(nx.all_shortest_paths(g, s, t))
#            counts.append(len(p))
#            p_weights = [prs_mat_df.loc[s,pp].sum() for pp in p]
            paths.extend(p)
    #
    shortest_paths = [i for i in paths if len(i) == np.min([len(uu) for uu in paths])]
    #path_weights = [prs_mat_df.loc[pp[0],pp].sum() for pp in shortest_paths]
    return shortest_paths#[np.argmax(path_weights)], counts 
#    return paths

In [8]:
def find_highest_prs_path(sources, targets, g, prs_mat_df):
    paths = []
    counts = []
    for s in sources.orf_name.tolist():
        w_track=0
        for t in targets.orf_name.tolist():
            p = list(nx.all_shortest_paths(g, s, t))
            counts.append(len(p))
            p_weights = [prs_mat_df.loc[s,pp].sum() for pp in p]
            paths.append(p[np.argmax(p_weights)])
    #
    shortest_paths = [i for i in paths if len(i) == np.min([len(uu) for uu in paths])]
    path_weights = [prs_mat_df.loc[pp[0],pp].sum() for pp in shortest_paths]
    return shortest_paths[np.argmax(path_weights)], counts 
#    return paths

In [9]:
all_paths = {0:{} , 1:{}, 2:{}}
all_path_counts ={0:{} , 1:{}, 2:{}}

In [10]:
all_shortest_paths = {0:{},1:{},2:{}}

In [11]:
#counts = []
for i in range(3):
    sources = effectors_df.loc[effectors_df.effector_cluster==i]
    for j in np.arange(1,10):
        targets = sensors_df.loc[sensors_df.gid==j]
        p= find_all_shortest_paths(sources, targets,e_pcc.graph_gc)
        all_shortest_paths[i][j]=p
        #all_path_counts[i][j] = c
        #counts.append(c)

In [12]:
all_shortest_paths_counts = np.zeros((3,10))

In [13]:
for i in range(3):
    for j in range(1,10):
        c = len(all_shortest_paths[i][j])
        all_shortest_paths_counts[i][j] = c

In [14]:
np.set_printoptions(suppress=True)
all_shortest_paths_counts

Total shortest paths between effector and sensor clusters

### Fig S5A

In [15]:
fig, ax = plt.subplots(figsize=(7,2.5))
sns.heatmap(pd.DataFrame(all_shortest_paths_counts[:,1:],index=['EC1','EC2','EC3'],columns=[f"SC{i}" for i in range(1,10)]),
           annot=True, ax = ax, fmt='.0f',linewidths=.5, cmap='Blues',cbar_kws={"orientation": "horizontal"})
plt.yticks(rotation=0) 
if snakemake.params.save:
    fig.savefig('reports/figures/paper_figures_supp/figs5_a.png',dpi=150, bbox_inches='tight')

In [16]:
all_shortest_paths_counts.sum()

# Calculate GO enrichment of paths using only the genes on the path (excl. effector and sensor)

In [17]:
%%capture
gaf = snakemake.input['gaf']
obo = snakemake.input['obo']
background_file = snakemake.input['background_file']
sgd_info = snakemake.input['sgd_info']
pickle_file = snakemake.input['pickle_file_name']
goea, geneid2name, _ = create_goea(gaf = gaf, obo_fname=obo, 
                                background=background_file, sgd_info_tab = sgd_info)

In [21]:
counts = []
for i in range(3):
    sources = effectors_df.loc[effectors_df.effector_cluster==i]
    for j in np.arange(1,10):
        targets = sensors_df.loc[sensors_df.gid==j]
        p,c = find_highest_prs_path(sources, targets,e_pcc.graph_gc, e_pcc.prs_mat_df)
        all_paths[i][j]=p
        all_path_counts[i][j] = c
        #counts.append(c)

In [22]:
#%%capture
goa_go_dict = {0:{},1:{},2:{}}
for i in range(3):
    #sources = effectors_df.loc[effectors_df.effector_cluster==i]
    for j in np.arange(1,10):
        #targets = sensors_df.loc[sensors_df.gid==j]
        p= all_paths[i][j]
        p_sub = p[1:-1]
        p_sub_df = e_pcc.df.loc[e_pcc.df.orf_name.isin(p_sub)]
        go_res = query_goatools(p_sub_df, goea,geneid2name)
        goa_go_dict[i][j] = go_res
        ##all_shortest_paths[i][j]=p
        
        #all_path_counts[i][j] = c
        #counts.append(c)

There are 12 paths with no GO enrichments

In [23]:
len([val_sub for key, val in goa_go_dict.items() for _, val_sub in val.items() if val_sub is None])

In [111]:
sensor_colors = [mpl.colors.to_hex(i) for i in sns.color_palette('Set3')]
effector_colors = ["#A65628", "#F781BF", "#999999",'blue','yellow','red']


In [175]:
name_dict = e_pcc.df.loc[:,['orf_name','Systematic gene name']].set_index('orf_name').to_dict()['Systematic gene name']

In [166]:
all_paths

In [159]:
effectors_df['cluster']=['EC'+str(i+1) for i in effectors_df['effector_cluster'].tolist()]
effectors_df['is_effector']=True

In [140]:
sensors_df['cluster']=['SC'+str(i) for i in sensors_df['gid'].tolist()]
sensors_df['is_sensor']=True

In [141]:
pd.concat([effectors_df,sensors_df]).to_csv(snakemake.output.combined_data_for_colors,index=False)

In [133]:
node_list = [*effectors_df.loc[effectors_df.effector_cluster==0].orf_name.tolist(), 
             *sensors_df.loc[sensors_df.gid!=0].orf_name.tolist(),
             *[item for sublist in [v for key,v in all_paths[0].items()] for item in sublist]]

node_sub = nx.induced_subgraph(e_pcc.graph_gc, node_list)

nx.draw(node_sub)
nx.write_edgelist(nx.induced_subgraph(e_pcc.graph_gc,node_list),snakemake.output.ec1, delimiter=',',data=False)

In [134]:
node_list = [*effectors_df.loc[effectors_df.effector_cluster==1].orf_name.tolist(), 
             *sensors_df.loc[sensors_df.gid!=0].orf_name.tolist(),
             *[item for sublist in [v for key,v in all_paths[1].items()] for item in sublist]]

node_sub = nx.induced_subgraph(e_pcc.graph_gc, node_list)

nx.draw(node_sub)
nx.write_edgelist(nx.induced_subgraph(e_pcc.graph_gc,node_list),snakemake.output.ec2, delimiter=',',data=False)

In [135]:
node_list = [*effectors_df.loc[effectors_df.effector_cluster==2].orf_name.tolist(), 
             *sensors_df.loc[sensors_df.gid!=0].orf_name.tolist(),
             *[item for sublist in [v for key,v in all_paths[2].items()] for item in sublist]]

node_sub = nx.induced_subgraph(e_pcc.graph_gc, node_list)

nx.draw(node_sub)
nx.write_edgelist(nx.induced_subgraph(e_pcc.graph_gc,node_list),snakemake.output.ec3, delimiter=',',data=False)

In [107]:
pos = e_pcc.graph_gc.nodes('pos')

In [160]:
sensor_color_map = dict(zip(sensors_df.cluster.sort_values().unique()[1:10],sensor_colors[1:10]))
effector_color_map = dict(zip(effectors_df.cluster.sort_values().unique(),effector_colors[:3]))

In [619]:
sensor_legend_elements = [Line2D([0], [0], marker='^', color='black', label=label,
                              markerfacecolor=color, markersize=10, linestyle="None") for label,color in sensor_color_map.items()] 
effector_legend_elements = [Line2D([0], [0], marker='s', color='black', label=label,
                              markerfacecolor=color, markersize=10, linestyle="None") for label,color in effector_color_map.items()] 

In [620]:
legend_elements = [*effector_legend_elements, 
                   *sensor_legend_elements,
                   *[Line2D([0], [0], marker='o', color='black', label='Other Genes',
                              markerfacecolor='black', markersize=10, linestyle="None"),
                     Line2D([0], [0], marker='o', color='black', label= 'PCC ≥ 0.2',
                              markerfacecolor='black', markersize=0, linestyle="-", alpha=0.5, lw=5)
                    ]]

In [627]:
fig = plt.figure()
figlegend = plt.figure(figsize=(3,1))
ax = fig.add_subplot(111)
#lines = ax.plot(range(10), plt.randn(10), range(10), plt.randn(10))
ax.axis('off')
lgd = ax.legend(handles=legend_elements,
                handletextpad=0.1, 
                labelspacing=0.4, 
                borderpad=0,
                columnspacing=0.4,
                fontsize=16, 
                ncol=2,
                frameon=False, 
                loc = 'center',
                bbox_to_anchor=(0., 0., 1, 1))
#if snakemake.params['save']:
#fig.savefig(f'../reports/figures/paper_figures_supp/figs6_legend.pdf', dpi=150, pad_inches=0)#, bbox_inches='tight')