In [None]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib as mpl
import matplotlib.pyplot as plt
from networkx.drawing.nx_agraph import graphviz_layout

In [13]:
# Add truth idicator to edges
# Add how many intermediates for indirect


In [14]:
DECIMAL_PRECISION = 2
PROBABILITY_THRESHOLD = 0.2
FILE_NAME = "/Users/jugne/Documents/TnT-material/hiv/inf/final/full_tiedParams_heated_sameNe/combined/env_pol_saConstrained_s1_tiedParams_heated_sameNe_combined_tr_analyser"
FILE_PATH = "/Users/jugne/Documents/TnT-material/hiv/inf/final/full_tiedParams_heated_sameNe/combined/env_pol_saConstrained_s1_tiedParams_heated_sameNe_combined_tr_analyser.txt"



In [15]:
tnt_analyser = pd.read_csv(FILE_PATH, sep="\t")
#sim_truth_file = /Users/jugne/Documents/Source/TnT/scripts/trees_and_pars.txt
# tnt_analyser.head()

In [16]:
# host list
hosts = np.unique([i.split('_', 1)[0] for i in tnt_analyser.columns])

In [17]:
# prob of direct transmission:
dd = tnt_analyser.loc[:,:]==1
prob_direct = dd.sum()/dd.shape[0]

# prob of indirect or direct transmission:
dd = tnt_analyser.loc[:,:]>=1
prob_indirectAndDirect = dd.sum()/dd.shape[0]

# prob of indirect transmission:
dd = tnt_analyser.loc[:,:]>1
prob_indirect = dd.sum()/dd.shape[0]

# prob of no transmission:
dd = tnt_analyser.loc[:,:]==0
prob_noTr = dd.sum()/dd.shape[0]

# count intermediate unobserved transmissions
n_unobserved_transmissions = tnt_analyser[tnt_analyser.loc[:,:]>1]-1

In [18]:
##########################################
####### indirect dataframes ##############
##########################################
names_indirect = tnt_analyser.columns[prob_indirect>0]
from_indirect = [i.split('_', 1)[0] for i in names_indirect]
to_indirect = [i.split('_', 1)[1] for i in names_indirect]

edges_indirect = pd.DataFrame({'from': from_indirect, 
                               'to': to_indirect, 
                               'probability': prob_indirect[prob_indirect>0],
                               'mean_unobserved': n_unobserved_transmissions.loc[:,names_indirect].mean()})
nodes_indirect = pd.DataFrame({'id': np.unique([i.split('_', 1)[0] for i in from_indirect+to_indirect])})

# add edge labels, set precision
edges_indirect.loc[:, 'label'] = np.around(edges_indirect.loc[:, 'probability'],
                                           decimals=DECIMAL_PRECISION).astype(str)

###################################
######## direct dataframes ########
###################################
names_direct = tnt_analyser.columns[prob_direct>0]
from_direct = [i.split('_', 1)[0] for i in names_direct]
to_direct = [i.split('_', 1)[1] for i in names_direct]

edges_direct = pd.DataFrame({'from': from_direct, 
                             'to': to_direct, 
                             'probability': prob_direct[prob_direct>0]})
nodes_direct = pd.DataFrame({'id': np.unique([i.split('_', 1)[0] for i in from_direct+to_direct])})

# add edge labels, set precision
edges_direct.loc[:, 'label'] = np.around(edges_direct.loc[:, 'probability'],
                                         decimals=DECIMAL_PRECISION).astype(str)

###############################################################
######### direct and indirect transmission dataframes #########
###############################################################
names_indirectAndDirect = tnt_analyser.columns[prob_indirectAndDirect>0]
from_indirectAndDirect = [i.split('_', 1)[0] for i in names_indirectAndDirect]
to_indirectAndDirect = [i.split('_', 1)[1] for i in names_indirectAndDirect]

edges_indirectAndDirect = pd.DataFrame({'from': from_indirectAndDirect, 
                               'to': to_indirectAndDirect, 
                               'probability': prob_indirectAndDirect[prob_indirectAndDirect>0],
                               'mean_unobserved': np.nan_to_num(n_unobserved_transmissions.loc[:,names_indirectAndDirect].mean())})
nodes_indirectAndDirect = pd.DataFrame({'id': np.unique([i.split('_', 1)[0] for i in from_indirectAndDirect+to_indirectAndDirect])})

# add edge labels, set precision
edges_indirectAndDirect.loc[:, 'label'] = np.around(edges_indirectAndDirect.loc[:, 'probability'],
                                                    decimals=DECIMAL_PRECISION).astype(str)

# calculate root probability
root = [1-edges_indirectAndDirect.loc[edges_indirectAndDirect.loc[:,'to']==h,'probability'].sum() for h in nodes_indirectAndDirect['id']]
nodes_indirectAndDirect.loc[:, 'root_probability'] = np.around(root, decimals=3)

In [19]:
edges_indirectAndDirect_trh = edges_indirectAndDirect[edges_indirectAndDirect.probability > PROBABILITY_THRESHOLD]

In [20]:
# Build your graph. Note that we use the DiGraph function to create the graph!
G=nx.from_pandas_edgelist(edges_indirectAndDirect_trh, 'from', 'to',  ['probability', 'mean_unobserved'], create_using=nx.DiGraph())
# colors = [nodes_indirectAndDirect[nodes_indirectAndDirect['id']==i]['root_probability'].item() for i in list(G.nodes)]
# col = ["None" if x <=0 else x for x in [np.around((i * 10)-1) for i in colors]]

# Node colors by root probability
cmap = mpl.cm.get_cmap("Greens")
node_col = dict(zip(nodes_indirectAndDirect.id, nodes_indirectAndDirect.root_probability))
for key, value in node_col.items():
            rgba = cmap(value)
            node_col[key] = mpl.colors.rgb2hex(rgba)


nx.set_node_attributes(G,
                       node_col,
                       "fillcolor")

# set edge width by transmission probability
nx.set_edge_attributes(G,
                       nx.get_edge_attributes(G, 'probability'),
                       "penwidth")

# merge transmission probability and mean intermediate transmissions for edge labels
dct_rounded_probs = {k: round(v, DECIMAL_PRECISION) for k, v in nx.get_edge_attributes(G, 'probability').items()} 
dct_rounded_unobserved_count = {k: round(v, DECIMAL_PRECISION) for k, v in nx.get_edge_attributes(G, 'mean_unobserved').items()}
dct_label = {k: str(dct_rounded_probs[k])+', '+str(dct_rounded_unobserved_count[k]) for k in nx.get_edge_attributes(G, 'probability').keys()}

nx.set_edge_attributes(G,
                       dct_label,
                       "label")

# fig, ax = plt.subplots(figsize=(15,10))
# # Make the graph
# nx.draw(G,ax=ax, with_labels=True, 
#         pos=graphviz_layout(G,prog="dot", args='-Grankdir=LR'),#pos=nx.spiral_layout(G, equidistant=True, resolution=0.8),#pos=nx.spring_layout(G, k=20/np.sqrt(G.order())),
#         node_color=[nodes_indirectAndDirect[nodes_indirectAndDirect['id']==i]['root_probability'] for i in list(G.nodes)],
#         cmap=plt.get_cmap("Greens"), node_size=1500, node_shape='H', alpha=0.6, arrows=True, connectionstyle='Angle3')



# plt.title("Directed")
# plt.show()

In [21]:
A = nx.nx_agraph.to_agraph(G)
A.layout(prog='dot')   
A.draw(FILE_NAME+'.png',args='-Grankdir=LR -Gsplines=true -Goverlap="false" -Nshape=hexagon -Nstyle=filled', prog='dot' ) 

# A.draw('test3.png',args='-Gnodesep=0.01 -Gfont_size=1', prog='dot' ) 
# plt.show() 

In [84]:
# from jaal import Jaal
# Jaal(edges_indirectAndDirect, nodes_indirectAndDirect).plot(directed=True)

In [22]:
edges_indirectAndDirect_trh

Unnamed: 0,from,to,probability,mean_unobserved,label
A_B,A,B,0.4449,0.0,0.44
B_C,B,C,0.484814,0.0,0.48
B_H,B,H,0.809596,0.0,0.81
C_D,C,D,0.372107,0.0,0.37
C_E,C,E,0.831434,0.0,0.83
C_L,C,L,0.690301,0.0,0.69
D_C,D,C,0.608451,0.0,0.61
D_I,D,I,0.453901,1.0,0.45
E_C,E,C,0.300492,0.0,0.3
E_K,E,K,0.986489,1.0,0.99
