# Benchmarking Layouts + Correlation diagrams 
--------

In [None]:
# import
from multidimvis_main import *

### Model Networks

In [None]:
# --------------------------
# CHOOSE ONE OF THE FOLLOWING
# --------------------------

# ZACHARYS KARATE NETWORK
#G = nx.karate_club_graph()

# TREE GRAPH
n = 1000 # number of nodes
r = 100 # branching factor
G = nx.full_rary_tree(r, n)

# STAR GRAPH
#n = 1000
#G = nx.star_graph(n)

# CYCLE GRAPH
#n = 1000
#G = nx.cycle_graph(n)

# DRAW spring layout of graph
#nx.draw(G)

In [None]:
print('Number of nodes i.e. genes: %s' %len(list(G.nodes())))
print('Number of edges: %s' %len(list(G.edges())))
print('Network density: %.1f%%' %(200.*len(list(G.edges()))/(len(list(G.nodes()))*len(list(G.nodes()))-1)))

+ Feature Collection 

In [None]:
degs = dict(G.degree())
d_deghubs = {}
for node, de in sorted(degs.items(),key = lambda x: x[1], reverse = 1):
    d_deghubs[node] = round(float(de/max(degs.values())),4)
    
closeness = nx.closeness_centrality(G)
d_clos = {}
for node, cl in sorted(closeness.items(), key = lambda x: x[1], reverse = 1):
    d_clos[node] = round(cl,4)  

betweens = nx.betweenness_centrality(G)
d_betw = {}
for node, be in sorted(betweens.items(), key = lambda x: x[1], reverse = 1):
     d_betw[node] = round(be,4)
    
#eigen = nx.eigenvector_centrality(G)
#d_eigen = {}
#for node, eig in sorted(eigen.items(), key = lambda x: x[1], reverse = 1):
#     d_eigen[node] = round(eig,4)

d_hubscored = calc_hub_score(G, degs, closeness, betweens, 3)

In [None]:
# same order of node IDs for all features

d_deghubs_sorted = {key:d_deghubs[key] for key in sorted(d_deghubs.keys())}
d_clos_sorted = {key:d_clos[key] for key in sorted(d_clos.keys())}
d_betw_sorted = {key:d_betw[key] for key in sorted(d_betw.keys())}
#d_eigen_sorted = {key:d_eigen[key] for key in sorted(d_eigen.keys())}
#d_hubscored_sorted = {key:d_hubscored[key] for key in sorted(d_hubscored.keys())}


# feature collection
feature_dict = dict(zip(d_deghubs_sorted.keys(), zip(
                                                     d_deghubs_sorted.values(), 
                                                     d_clos_sorted.values(), 
                                                     d_betw_sorted.values(), 
                                                    # d_eigen_sorted.values(),
                                                    # d_hubscored_sorted.values(),
                                                    )))

# IMPORTANT :
# sort all feature according to Graph node IDs
feature_dict_sorted = {key:feature_dict[key] for key in G.nodes()}
feature_df = pd.DataFrame.from_dict(feature_dict_sorted, orient = 'index', columns = ['degs', 
                                                                                      'clos', 
                                                                                      'betw', 
                                                                                      #'eigen',
                                                                                      # 'hubs',
                                                                                      ]) 


l_features = [] 
for i in feature_dict_sorted.items():
    k = list(i)
    l_features.append(k)

In [None]:
feature_df.head()

+ Matrices

In [None]:
# ---------------------------------------
# Calculations
# ---------------------------------------

d_idx_entz = {}
cc = 0
for entz in sorted(G.nodes()):
    d_idx_entz[cc] = entz
    cc += 1

Mspl = np.zeros(len(list(G.nodes())))

for n1 in range(len(list(G.nodes()))):
    vec = []
    for n2 in range(len(list(G.nodes()))):
        geneA = d_idx_entz[n1]
        geneB = d_idx_entz[n2]
        try:
            spl = nx.shortest_path_length(G,geneA,geneB)
            vec.append(spl)
        except nx.NetworkXNoPath:
            print('no path')
        
    Mspl = np.vstack((Mspl,vec))
Mspl = np.delete(Mspl, (0), axis=0)

# Restart probability
r = .8 # originally 0.8

# Degree of Freedom 
alpha = 1.0 # indicating "randomness" - originally 1.


# -----------------------
# All Matrices
# -----------------------

# adjacency matrix
A = nx.adjacency_matrix(G)
DM_adj = A.toarray()

# SPL MATRIX
DM_spl = Mspl 

# invert Markov (random walk)
DM_m = rnd_walk_matrix2(A, r, alpha, len(G.nodes()))
DM_m_mod = np.array([(1-(x/max(x))) for x in DM_m])

# -log(Markov) matrix
min_log = lambda t: -np.log(t)
DM_mlog = np.array([min_log(x/max(x)) for x in DM_m])

# scipy "pdist" for distance
DM_cos = distance.squareform(distance.pdist(feature_df, 'cosine'))
DM_eucl = distance.squareform(distance.pdist(feature_df, 'euclidean'))
DM_sqeucl = distance.squareform(distance.pdist(feature_df, 'sqeuclidean'))
DM_corr = distance.squareform(distance.pdist(feature_df, 'correlation'))

+ Coloring

In [None]:
# Define colour parameter
d_to_be_coloured = d_clos # dict sorted by dict.values (that way the biggest value matches darkest colour of palette)


# Colouring
colour_groups = set(d_to_be_coloured.values())
colour_count = len(colour_groups)
palette = sns.color_palette('Reds', colour_count)

d_colourgroups = {}
for n in colour_groups:
    d_colourgroups[n] = [k for k in d_to_be_coloured.keys() if d_to_be_coloured[k] == n]
    
d_colourgroups_sorted = {key:d_colourgroups[key] for key in sorted(d_colourgroups.keys())}

d_val_col = {}
for idx,val in enumerate(d_colourgroups_sorted):
    for ix,v in enumerate(palette):
        if idx == ix:
            d_val_col[val] = v
d_node_colour = {}
for y in d_to_be_coloured.items(): # y[0] = node id, y[1] = val
    for x in d_val_col.items(): # x[0] = val, x[1] = (col,col,col)
        if x[0] == y[1]:
            d_node_colour[y[0]]=x[1]
            
# SORT dict based on G.nodes
d_node_colour_sorted = dict([(key, d_node_colour[key]) for key in G.nodes()])

l_col = list(d_node_colour_sorted.values())

# NODE COLOURING based on Closeness Centrality
colours = l_col
edge_color = 'lightgrey'

#sns.palplot(colours)

### Layouts | 2D Portraits

In [None]:
prplxty = 50 # range: 5-50 / default = 30.
density = 1  # default 12.
l_rate = 200 # default 200.
steps = 250 # min 250

In [None]:
%%time 

# EMBEDDING 

# ---------------------------------------
# Spring
# ---------------------------------------
forceatlas2 = ForceAtlas2()
#posG_spring = forceatlas2.forceatlas2_networkx_layout(G, pos=None, iterations=200)
posG_spring = nx.spring_layout(G)


# ---------------------------------------
# Matrices
# ---------------------------------------

# Adjacency Matrix
embed_adj = embed_tsne_2D(DM_adj, prplxty, density, l_rate, steps)
posG_adj = get_posG(G,embed_adj)

# SPL Matrix 
embed_spl = embed_tsne_2D(DM_spl, prplxty, density, l_rate, steps)
posG_spl = get_posG(G,embed_spl)

# Markov Matrix
embed_m = embed_tsne_2D(DM_m_mod, prplxty, density, l_rate, steps)
posG_m = get_posG(G,embed_m)

# Markov -log Matrix 
embed_mlog = embed_tsne_2D(DM_mlog, prplxty, density, l_rate, steps)
posG_mlog =get_posG(G,embed_mlog)


# Distance Matrices based on pdist
# cosine
embed_cos = embed_tsne_2D(DM_cos, prplxty, density, l_rate, steps)
posG_cos = get_posG(G,embed_cos)

# euclidean
embed_eucl = embed_tsne_2D(DM_eucl, prplxty, density, l_rate, steps)
posG_eucl = get_posG(G,embed_eucl)

# squared euclidean
embed_sqeucl = embed_tsne_2D(DM_sqeucl, prplxty, density, l_rate, steps)
posG_sqeucl = get_posG(G,embed_sqeucl)

# Correlation Pairwise Matrix
embed_corr = embed_tsne_2D(DM_corr, prplxty, density, l_rate, steps)
posG_corr = get_posG(G,embed_corr)

In [None]:
organism = 'model_networks'

node_factor = 3
size = draw_node_degree(G, node_factor) # node size based on degree
#size = 15

opacity_nodes = 0.5
edge_color = 'lightgrey'
opacity_edges = 0.25
node_edge_col = None

#### 2D SPRING

In [None]:
plt.figure(figsize=(18,18))
plt.title('Organic spring', size=16)
plt.xlabel('x - axis', fontsize=16)
plt.ylabel('y - axis', fontsize=16)

nx.draw_networkx_nodes(G, posG_spring,
                       edgecolors = node_edge_col, 
                       linewidths = 0.5, 
                       node_color=colours, node_size=size)
nx.draw_networkx_edges(G, pos = posG_spring, width = 1., edge_color = edge_color, alpha = opacity_edges)
#nx.draw_networkx_labels(G, pos = posG_spring, font_size = fontsize_labels, font_color = 'black')

print('Network: ', organism)
print('Number of Nodes:', len(G.nodes()))

plt.savefig('output_plots/Organic_spring_layout2D_'+organism+'.png')
plt.show()

#### 2D PORTRAITS

In [None]:
print('Network: ', organism)
print('Number of nodes:', len(G.nodes()))
print('')
print('Perplexity:', prplxty)
print('Early Exaggeration:', density)
print('Learning rate:', l_rate)
print('Iterations:', steps)


plt.figure(figsize=(35,35))

plt.subplot(441)
plt.title('Adjacency + t-SNE', size=16)
plt.xlabel('tsne comp1')
plt.ylabel('tsne comp2')

nx.draw_networkx_nodes(G, posG_adj, edgecolors = node_edge_col, linewidths = 0.5, node_color=colours, node_size=size, alpha = opacity_nodes)
nx.draw_networkx_edges(G, pos = posG_adj, width = 1., edge_color = edge_color, alpha = opacity_edges)
#nx.draw_networkx_labels(G, pos = posG_adj, font_size = fontsize_labels, font_color = 'black')
plt.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)


plt.subplot(442)
plt.title('Shortest Path length + t-SNE', size=16)
plt.xlabel('tsne comp1')
plt.ylabel('tsne comp2')

nx.draw_networkx_nodes(G, posG_spl, edgecolors = node_edge_col, linewidths = 0.5, node_color=colours, node_size=size, alpha = opacity_nodes)
nx.draw_networkx_edges(G, pos = posG_spl, width = 1., edge_color = edge_color, alpha = opacity_edges)
#nx.draw_networkx_labels(G, pos = posG_spl, font_size = fontsize_labels, font_color = 'black')
plt.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)


plt.subplot(443)
plt.title('Markov + t-SNE', size=16)
plt.xlabel('tsne comp1')
plt.ylabel('tsne comp2')
plt.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)

nx.draw_networkx_nodes(G, posG_m, edgecolors = node_edge_col, linewidths = 0.5, node_color=colours, node_size=size, alpha = opacity_nodes)
nx.draw_networkx_edges(G, pos = posG_m, width = 1., edge_color = 'lightgrey', alpha = opacity_edges)
#nx.draw_networkx_labels(G, pos = posG_m, font_size = fontsize_labels, font_color = 'black')
plt.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)

plt.subplot(444)
plt.title('-log Markov + t-SNE', size=16)
plt.xlabel('tsne comp1')
plt.ylabel('tsne comp2')

nx.draw_networkx_nodes(G, posG_mlog, edgecolors = node_edge_col, linewidths = 0.5, node_color=colours, node_size=size, alpha = opacity_nodes)
nx.draw_networkx_edges(G, pos = posG_mlog, width = 1., edge_color = edge_color, alpha = opacity_edges)
#nx.draw_networkx_labels(G, pos = posG_mlog, font_size = fontsize_labels, font_color = 'black')
plt.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)

plt.savefig('output_plots/matrices_4x4_01'+ organism +'.png')
plt.show()


# 4 Plots
plt.figure(figsize=(35,35))

plt.subplot(441)
plt.xlabel('tsne comp1')
plt.ylabel('tsne comp2')
plt.title('PDist Cosine + t-SNE', size=16)

nx.draw_networkx_nodes(G, posG_cos, edgecolors = node_edge_col, linewidths = 0.5, node_color=colours, node_size=size, alpha = opacity_nodes)
nx.draw_networkx_edges(G, pos = posG_cos, width = 1., edge_color = edge_color, alpha = opacity_edges)
#nx.draw_networkx_labels(G, pos = posG_cos, font_size = fontsize_labels, font_color = 'black')
plt.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)


plt.subplot(442)
plt.xlabel('tsne comp1')
plt.ylabel('tsne comp2')
plt.title('PDist Euclidean + t-SNE', size=16)

nx.draw_networkx_nodes(G, posG_eucl, edgecolors = node_edge_col, linewidths = 0.5, node_color=colours, node_size=size, alpha = opacity_nodes)
nx.draw_networkx_edges(G, pos = posG_eucl, width = 1., edge_color = edge_color, alpha = opacity_edges)
#nx.draw_networkx_labels(G, pos = posG_eucl, font_size = fontsize_labels, font_color = 'black')
plt.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)


plt.subplot(443)
plt.xlabel('tsne comp1')
plt.ylabel('tsne comp2')
plt.title('PDist Squared euclidean + t-SNE', size=16)

nx.draw_networkx_nodes(G, posG_sqeucl, edgecolors = node_edge_col, linewidths = 0.5, node_color=colours, node_size=size, alpha = opacity_nodes)
nx.draw_networkx_edges(G, pos = posG_sqeucl, width = 1., edge_color = edge_color, alpha = opacity_edges)
#nx.draw_networkx_labels(G, pos = posG_sqeucl, font_size = fontsize_labels, font_color = 'black')
plt.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)


plt.subplot(444)
plt.xlabel('tsne comp1')
plt.ylabel('tsne comp2')
plt.title('Correlation matrix + t-SNE', size=16)

nx.draw_networkx_nodes(G, posG_corr, edgecolors = node_edge_col, linewidths = 0.5, node_color=colours, node_size=size)#, alpha = opacity_nodes)
nx.draw_networkx_edges(G, pos = posG_corr, width = 1., edge_color = edge_color, alpha = opacity_edges)
#nx.draw_networkx_labels(G, pos = posG_corr, font_size = fontsize_labels, font_color = 'black')
plt.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)

plt.savefig('output_plots/matrices_4x4_2'+ organism +'.png')
plt.show()

### Layouts | 3D Portraits

In [None]:
node_factor = 30 # node size factor
size3d = draw_node_degree_3D(G, node_factor) # node size based on degree
#size3d = 5

In [None]:
prplxty3d = 50 # range: 5-50 / default = 30.#
density3d = 1 # default 12.
l_rate3d = 200 # default 200.
steps3d = 250 #min 250

l_features = None 

In [None]:
# EMBEDDING 

# Adjacency Matrix
posG3d_adj = embed_tsne_3D(G, DM_adj, prplxty3d, density3d, l_rate3d, steps3d)
adj_edges = get_trace_edges(G, posG3d_adj, edge_color) 
adj_nodes = get_trace_nodes(G, posG3d_adj,  l_features, colours, size3d)
data_adj = [adj_edges, adj_nodes]

# SPL Matrix 
posG3d_spl = embed_tsne_3D(G, DM_spl, prplxty3d, density3d, l_rate3d, steps3d)
spl_edges = get_trace_edges(G, posG3d_spl, edge_color) 
spl_nodes = get_trace_nodes(G, posG3d_spl,  l_features, colours, size3d)
data_spl = [spl_edges, spl_nodes]

# Markov Matrix
posG3d_m = embed_tsne_3D(G, DM_m_mod, prplxty3d, density3d, l_rate3d, steps3d)
m_edges = get_trace_edges(G, posG3d_m, edge_color) 
m_nodes = get_trace_nodes(G, posG3d_m, l_features, colours, size3d)
data_m = [m_edges, m_nodes]


# Markov -log Matrix 
posG3d_mlog = embed_tsne_3D(G, DM_mlog, prplxty3d, density3d, l_rate3d, steps3d)
mlog_edges = get_trace_edges(G, posG3d_mlog, edge_color) 
mlog_nodes = get_trace_nodes(G, posG3d_mlog, l_features, colours, size3d)
data_mlog = [mlog_edges, mlog_nodes]


# cosine
posG3d_cos = embed_tsne_3D(G, DM_cos, prplxty3d, density3d, l_rate3d, steps3d)
cos_edges = get_trace_edges(G, posG3d_cos, edge_color) 
cos_nodes = get_trace_nodes(G, posG3d_cos, l_features, colours, size3d)
data_cos = [cos_edges, cos_nodes]


# euclidean
posG3d_eucl = embed_tsne_3D(G, DM_eucl, prplxty3d, density3d, l_rate3d, steps3d)
eucl_edges = get_trace_edges(G, posG3d_eucl, edge_color) 
eucl_nodes = get_trace_nodes(G, posG3d_eucl,l_features, colours, size3d)
data_eucl = [eucl_edges, eucl_nodes]


# seuclidean
posG3d_sqeucl = embed_tsne_3D(G, DM_sqeucl, prplxty3d, density3d, l_rate3d, steps3d)
sqeucl_edges = get_trace_edges(G, posG3d_sqeucl, edge_color) 
sqeucl_nodes = get_trace_nodes(G, posG3d_sqeucl, l_features, colours, size3d)
data_sqeucl = [sqeucl_edges, sqeucl_nodes]


# Correlation Pairwise Matrix
posG3d_corr = embed_tsne_3D(G, DM_corr, prplxty3d, density3d, l_rate3d, steps3d)
corr_edges = get_trace_edges(G, posG3d_corr, edge_color) 
corr_nodes = get_trace_nodes(G, posG3d_corr, l_features, colours, size3d)
data_corr = [corr_edges, corr_nodes]

In [None]:
print('Perplexity:', prplxty3d)
print('Early Exaggeration:', density3d)
print('Learning rate:', l_rate3d)
print('Iterations:', steps3d)


fig1 = make_subplots(rows = 1, cols = 4,
                    specs=[4 * [{'type': 'scatter3d'}]],
                    print_grid=False, subplot_titles=('Adjacency + t-SNE', 
                                                     'SPL + t-SNE',
                                                     'Markov + t-SNE',
                                                     'Markov -log + t-SNE')
                    )

    
for i in data_adj:
    fig1.add_trace(i, row = 1, col = 1)
    
for i in data_spl:
    fig1.add_trace(i, row = 1, col = 2)
    
for i in data_m:
    fig1.add_trace(i, row = 1, col = 3)

for i in data_mlog:
    fig1.add_trace(i, row = 1, col = 4)

fig1.update_layout(template='none', showlegend = False, width = 2000, height = 800)
py.iplot(fig1)

plotly.offline.plot(fig1, filename = 'output_plots/3Dmatrices_1_'+organism+'.html', auto_open=False)


print('Perplexity:', prplxty3d)
print('Early Exaggeration:', density3d)
print('Learning rate:', l_rate3d)
print('Iterations:', steps3d)


fig2 = make_subplots(rows = 1, cols = 4,
                    specs=[4 * [{'type': 'scatter3d'}]],
                    print_grid=False, subplot_titles=('Cosine + t-SNE', 
                                                     'Euclidean + t-SNE', 
                                                     'Squared Euclidean + t-SNE',
                                                     'Correlation + t-SNE')
                    )

for i in data_cos:
    fig2.add_trace(i, row = 1, col = 1)
    
for i in data_eucl:
    fig2.add_trace(i, row = 1, col = 2)
    
for i in data_sqeucl:
    fig2.add_trace(i, row = 1, col = 3)

for i in data_corr:
    fig2.add_trace(i, row = 1, col = 4)

fig2.update_layout(template='none', showlegend = False, width = 2000, height = 800)
py.iplot(fig2)

plotly.offline.plot(fig2, filename = 'output_plots/3Dmatrices_2_'+organism+'.html', auto_open=False)

### Layouts | 3D Spheres

In [None]:
# tSNE optimization 

momentum = 0.1 # default 0.1
final_momentum = 0.2
max_iter = 100
learning_rate = 200
min_gain = 0.01

In [None]:
embedded_adj = embed_tsne_sphere(G, DM_adj, momentum, final_momentum, learning_rate, min_gain, max_iter)
embedded_spl = embed_tsne_sphere(G, DM_spl, momentum, final_momentum, learning_rate, min_gain, max_iter)
embedded_m = embed_tsne_sphere(G, DM_m, momentum, final_momentum, learning_rate, min_gain, max_iter)
embedded_mlog = embed_tsne_sphere(G, DM_mlog, momentum, final_momentum, learning_rate, min_gain, max_iter)


embedded_cos = embed_tsne_sphere(G, DM_cos, momentum, final_momentum, learning_rate, min_gain, max_iter)
embedded_eucl = embed_tsne_sphere(G, DM_eucl, momentum, final_momentum, learning_rate, min_gain, max_iter)
embedded_sqeucl = embed_tsne_sphere(G, DM_sqeucl, momentum, final_momentum, learning_rate, min_gain, max_iter)
embedded_corr = embed_tsne_sphere(G, DM_corr, momentum, final_momentum, learning_rate, min_gain, max_iter)

In [None]:
# choose radius parameter for layout

# dict param should be : 
# {NODE ID: VALUE}
dict_param = d_clos  # example


d_node_rad = get_radii_spheres(dict_param)
l_rad = list(set(d_node_rad.values()))
spheres_back = get_sphere_background(l_rad)

In [None]:
# fix edge_trace with radius factor 

trace_nodes_adj = get_tsne_sphere_trace_nodes(embedded_adj, d_node_rad, colours, size3d)
trace_edges_adj = get_trace_edges(G, embedded_adj, edge_color)
data_adj = [trace_nodes_adj]#, trace_edges_adj]


trace_nodes_spl = get_tsne_sphere_trace_nodes(embedded_spl, d_node_rad, colours, size3d)
data_spl = [trace_nodes_spl]

trace_nodes_m = get_tsne_sphere_trace_nodes(embedded_m, d_node_rad, colours, size3d)
data_m = [trace_nodes_m]

trace_nodes_mlog = get_tsne_sphere_trace_nodes(embedded_mlog, d_node_rad, colours, size3d)
data_mlog = [trace_nodes_mlog]


trace_nodes_cos = get_tsne_sphere_trace_nodes(embedded_cos, d_node_rad, colours, size3d)
data_cos = [trace_nodes_cos]

trace_nodes_eucl = get_tsne_sphere_trace_nodes(embedded_eucl, d_node_rad, colours, size3d)
data_eucl = [trace_nodes_eucl]

trace_nodes_sqeucl = get_tsne_sphere_trace_nodes(embedded_sqeucl, d_node_rad, colours, size3d)
data_sqeucl = [trace_nodes_sqeucl]

trace_nodes_corr = get_tsne_sphere_trace_nodes(embedded_corr, d_node_rad, colours, size3d)
data_corr = [trace_nodes_corr]

In [None]:
print('Momentum:', momentum)
print('Final Momentum:', final_momentum)
print('Max.Iterations:', max_iter)
print('Learning rate:', learning_rate)
print('Min.Gain:', min_gain)


fig_s1 = make_subplots(rows = 1, cols = 4,
                    specs=[4 * [{'type': 'scatter3d'}]],
                    print_grid=False, subplot_titles=('Adjacency + t-SNE', 
                                                     'SPL + t-SNE',
                                                     'Markov + t-SNE',
                                                     'Markov -log + t-SNE')
                    )

    
for i in data_adj:
    fig_s1.add_trace(i, row = 1, col = 1)
for i in spheres_back: 
    fig_s1.add_trace(i, row = 1, col = 1)
    
for i in data_spl:
    fig_s1.add_trace(i, row = 1, col = 2)
for i in spheres_back: 
    fig_s1.add_trace(i, row = 1, col = 2)
    
for i in data_m:
    fig_s1.add_trace(i, row = 1, col = 3)
for i in spheres_back: 
    fig_s1.add_trace(i, row = 1, col = 3)

for i in data_mlog:
    fig_s1.add_trace(i, row = 1, col = 4)
for i in spheres_back: 
    fig_s1.add_trace(i, row = 1, col = 4)
    
    
fig_s1.update_layout(template='none', showlegend = False, width = 2000, height = 800)
py.iplot(fig_s1)

plotly.offline.plot(fig_s1, filename = 'output_plots/3Dmatrices_spheres1_'+organism+'.html', auto_open=False)


print('Perplexity:', prplxty3d)
print('Early Exaggeration:', density3d)
print('Learning rate:', l_rate3d)
print('Iterations:', steps3d)


fig_s2 = make_subplots(rows = 1, cols = 4,
                    specs=[4 * [{'type': 'scatter3d'}]],
                    print_grid=False, subplot_titles=('Cosine + t-SNE', 
                                                     'Euclidean + t-SNE', 
                                                     'Squared Euclidean + t-SNE',
                                                     'Correlation + t-SNE')
                    )

for i in data_cos:
    fig_s2.add_trace(i, row = 1, col = 1)
for i in spheres_back: 
    fig_s2.add_trace(i, row = 1, col = 1)
    
for i in data_eucl:
    fig_s2.add_trace(i, row = 1, col = 2)
for i in spheres_back: 
    fig_s2.add_trace(i, row = 1, col = 2)
    
for i in data_sqeucl:
    fig_s2.add_trace(i, row = 1, col = 3)
for i in spheres_back: 
    fig_s2.add_trace(i, row = 1, col = 3)

for i in data_corr:
    fig_s2.add_trace(i, row = 1, col = 4)
for i in spheres_back: 
    fig_s2.add_trace(i, row = 1, col = 4)

fig_s2.update_layout(template='none', showlegend = False, width = 2000, height = 800)
py.iplot(fig_s2)

plotly.offline.plot(fig_s2, filename = 'output_plots/3Dmatrices_spheres2_'+organism+'.html', auto_open=False)

# THE LANDSCAPES | 2D t-SNE + Z 

In [None]:
z_list = draw_node_degree(G, node_factor) # node size based on degree


#node_factor = 10 # node size factor
#size3d = draw_node_degree_3D(G, node_factor) # node size based on degree
size3d = 5

In [None]:
prplxty3d = 50. # range: 5-50 / default = 30.
density3d = 1. # default 12.
l_rate3d = 200 # default 200.
steps3d = 250 #min 250

In [None]:
# EMBEDDING 

adj_x, adj_y, adj_z = get_coords_landscape(posG_adj)
adj_trace, adj_trace_z = get_trace_nodes_Z(adj_x, adj_y, adj_z, z_list)
adj_edges = get_trace_edges_Z(adj_x, adj_y, adj_z, z_list)
data_adj = [adj_trace, adj_trace_z,adj_edges]

spl_x, _spl_y, spl_z = get_coords_landscape(posG_spl)
spl_trace, spl_trace_z = get_trace_nodes_Z(spl_x, _spl_y, spl_z, z_list)
spl_edges = get_trace_edges_Z(spl_x, _spl_y, spl_z, z_list)
data_spl = [spl_trace, spl_trace_z, spl_edges]

m_x, m_y, m_z = get_coords_landscape(posG_m)
m_trace, m_trace_z = get_trace_nodes_Z(m_x, m_y, m_z, z_list)
m_edges = get_trace_edges_Z(m_x, m_y, m_z, z_list)
data_m = [m_trace, m_trace_z,m_edges]

mlog_x, mlog_y, mlog_z = get_coords_landscape(posG_mlog)
mlog_trace, mlog_trace_z = get_trace_nodes_Z(mlog_x, mlog_y, mlog_z, z_list)
mlog_edges = get_trace_edges_Z(mlog_x, mlog_y, mlog_z, z_list)
data_mlog = [mlog_trace, mlog_trace_z,mlog_edges]


cos_x, cos_y, cos_z = get_coords_landscape(posG_cos)
cos_trace, cos_trace_z = get_trace_nodes_Z(cos_x, cos_y, cos_z, z_list)
cos_edges = get_trace_edges_Z(cos_x, cos_y, cos_z, z_list)
data_cos = [cos_trace, cos_trace_z,cos_edges]


eucl_x, eucl_y, eucl_z = get_coords_landscape(posG_eucl)
eucl_trace, eucl_trace_z = get_trace_nodes_Z(eucl_x, eucl_y, eucl_z, z_list)
eucl_edges = get_trace_edges_Z(eucl_x, eucl_y, eucl_z, z_list)
data_eucl = [eucl_trace, eucl_trace_z,eucl_edges]

sqeucl_x, sqeucl_y, sqeucl_z = get_coords_landscape(posG_sqeucl)
sqeucl_trace, sqeucl_trace_z = get_trace_nodes_Z(sqeucl_x, sqeucl_y, sqeucl_z, z_list)
sqeucl_edges = get_trace_edges_Z(sqeucl_x, sqeucl_y, eucl_z, z_list)
data_sqeucl = [sqeucl_trace, sqeucl_trace_z,sqeucl_edges]

corr_x, corr_y, corr_z = get_coords_landscape(posG_corr)
corr_trace, corr_trace_z = get_trace_nodes_Z(corr_x, corr_y, corr_z, z_list)
corr_edges = get_trace_edges_Z(corr_x, corr_y, corr_z, z_list)
data_corr = [corr_trace, corr_trace_z,corr_edges]

In [None]:
print('Perplexity:', prplxty3d)
print('Early Exaggeration:', density3d)
print('Learning rate:', l_rate3d)
print('Iterations:', steps3d)


fig_1 = make_subplots(rows = 2, cols = 4,
                    specs=2*[4 * [{'type': 'scatter3d'}]],
                    print_grid=False, subplot_titles=('Adjacency + t-SNE', 
                                                     'SPL + t-SNE',
                                                     'Markov + t-SNE',
                                                     'Markov -log + t-SNE',
                                                      'Cosine + t-SNE', 
                                                     'Euclidean + t-SNE', 
                                                     'Squared Euclidean + t-SNE',
                                                     'Correlation + t-SNE')
                    )

    
for i in data_adj:
    fig_1.add_trace(i, row = 1, col = 1)
    
for i in data_spl:
    fig_1.add_trace(i, row = 1, col = 2)
    
for i in data_m:
    fig_1.add_trace(i, row = 1, col = 3)

for i in data_mlog:
    fig_1.add_trace(i, row = 1, col = 4)

plotly.offline.plot(fig_1, filename = 'output_plots/Landscape_2' + organism + '.html', auto_open=False)


for i in data_cos:
    fig_1.add_trace(i, row = 2, col = 1)
    
for i in data_eucl:
    fig_1.add_trace(i, row = 2, col = 2)
    
for i in data_sqeucl:
    fig_1.add_trace(i, row = 2, col = 3)

for i in data_corr:
    fig_1.add_trace(i, row = 2, col = 4)

fig_1.update_layout(template='none', showlegend = False, width = 1400, height = 1400)
py.iplot(fig_1)

plotly.offline.plot(fig_1, filename = 'output_plots/Landscape_2' + organism + '.html', auto_open=False)

_________

# EVALUATION OF 2D layout compared to Spring
### CALCULATE DISTANCES OF NODES IN LAYOUTS
+ Plots to test "Original Distances" with embeded Distances 
+ X-axis = distances of layout, Y-axis = actual Distance Matrix Distances

In [None]:
# Distance from embeded Spring layout 
dist_spring = calc_dist_from_layout(posG_spring)

# Distances from embeded coordinates from matrices 
dist_adj = calc_dist_from_layout(posG_adj)
dist_spl = calc_dist_from_layout(posG_spl)
dist_m = calc_dist_from_layout(posG_m)
dist_mlog = calc_dist_from_layout(posG_mlog)

dist_cos = calc_dist_from_layout(posG_cos)
dist_eucl = calc_dist_from_layout(posG_eucl)
dist_sqeucl = calc_dist_from_layout(posG_sqeucl)
dist_corr = calc_dist_from_layout(posG_corr)

In [None]:
# ADJ 
spring_adjtrace = [] 
for i in range(len(dist_spring)):
    spring_adjtrace.append(get_trace(dist_spring[i], DM_adj[i], "Spring", "darkgrey"))

adj_trace = []
for i in range(len(dist_adj)):
    adj_trace.append(get_trace(dist_adj[i], DM_adj[i], "Aij", "coral"))

data_adj = [
    spring_adjtrace, 
    adj_trace]


# SPL
spring_spltrace = [] 
for i in range(len(dist_spring)):
    spring_spltrace.append(get_trace(dist_spring[i], DM_spl[i], "Spring", "darkgrey"))

spl_trace = []
for i in range(len(dist_spring)):
    spl_trace.append(get_trace(dist_spl[i], DM_spl[i], "SPL", "darkorange"))

data_spl = [
    spring_spltrace, 
    spl_trace]


# MARKOV
spring_mtrace = [] 
for i in range(len(dist_spring)):
    spring_mtrace.append(get_trace(dist_spring[i], DM_m_mod[i], "Spring", "darkgrey"))

m_trace = []
for i in range(len(dist_spring)):
    m_trace.append(get_trace(dist_m[i], DM_m_mod[i], "Markov", "chocolate"))

data_m = [
    spring_mtrace, 
    m_trace]


# -log MARKOV
spring_mlogtrace = [] 
for i in range(len(dist_spring)):
    spring_mlogtrace.append(get_trace(dist_spring[i], DM_mlog[i], "Spring", "darkgrey"))

mlog_trace = []
for i in range(len(dist_spring)):
    mlog_trace.append(get_trace(dist_mlog[i], DM_mlog[i], "-log Markov", "orangered"))

data_mlog = [
    spring_mlogtrace, 
    mlog_trace]


# COSINE
spring_costrace = [] 
for i in range(len(dist_spring)):
    spring_costrace.append(get_trace(dist_spring[i], DM_cos[i], "Spring", "darkgrey"))

cos_trace = []
for i in range(len(dist_spring)):
    cos_trace.append(get_trace(dist_cos[i], DM_cos[i], "Cos", "royalblue"))

data_cos = [
    spring_costrace, 
    cos_trace]


# EUCLIDEAN
spring_eucltrace = [] 
for i in range(len(dist_spring)):
    spring_eucltrace.append(get_trace(dist_spring[i], DM_eucl[i], "Spring", "darkgrey"))

eucl_trace = []
for i in range(len(dist_spring)):
    eucl_trace.append(get_trace(dist_eucl[i], DM_eucl[i], "Eucl", "navy"))

data_eucl = [
    spring_eucltrace, 
    eucl_trace]


# SQUARED EUCLIDEAN
spring_sqeucltrace = [] 
for i in range(len(dist_spring)):
    spring_sqeucltrace.append(get_trace(dist_spring[i], DM_sqeucl[i], "Spring", "darkgrey"))

sqeucl_trace = []
for i in range(len(dist_spring)):
    sqeucl_trace.append(get_trace(dist_sqeucl[i], DM_sqeucl[i], "Sq Eucl", "slategrey"))

data_sqeucl = [
    spring_sqeucltrace, 
    sqeucl_trace]


# CORRELATION
spring_corrtrace = [] 
for i in range(len(dist_spring)):
    spring_corrtrace.append(get_trace(dist_spring[i], DM_corr[i], "Spring", "darkgrey"))

corr_trace = []
for i in range(len(dist_spring)):
    corr_trace.append(get_trace(dist_corr[i], DM_corr[i], "Corr", "cornflowerblue"))

data_corr = [
    spring_corrtrace, 
    corr_trace]

In [None]:
fig = make_subplots(rows = 2, cols = 4,
                    specs=2*[4 * [{'type': 'Scatter'}]],
                    print_grid=False, 
                    subplot_titles=('Aij', 
                                    'SPL',
                                    'Markov',
                                    '-log Markov',
                                   'Cosine',
                                   'Euclidean',
                                   'Squared Euclidean',
                                   'Correlation')
                   )

    
for i in data_adj:
    for j in i:
        fig.add_trace(j, row=1, col=1)
    
for i in data_spl:
    for j in i:
        fig.add_trace(j, row=1, col=2)
    
for i in data_m:
    for j in i:
        fig.add_trace(j, row=1, col=3)

for i in data_mlog:
    for j in i:
        fig.add_trace(j, row=1, col=4)
        
    
for i in data_cos:
    
    for j in i:
        fig.add_trace(j, row=2, col=1)
    
for i in data_eucl:
    for j in i:
        fig.add_trace(j, row=2, col=2)
    
for i in data_sqeucl:
    for j in i:
        fig.add_trace(j, row=2, col=3)

for i in data_corr:
    for j in i:
        fig.add_trace(j, row=2, col=4)

fig.update_xaxes(title_text = "Embedded Distances")
fig.update_yaxes(title_text = "Calculated Distances")

fig.update_layout(
    #xaxis_type="log", 
    #yaxis_type="log",
    template='none',showlegend = False,
    height=1600,
    width=1800
)

fig.write_image("output_plots/Diagram_Comparison_2D_DM_Embedded.png")

print('Number of Nodes:', len(G.nodes()))
fig.show()

# Statistical Test for correlation between data sets (DMs to Layouts): 
### Pearson Correlation
+ Pearson product-moment correlation coefficients
+ The relationship between the correlation coefficient matrix, R, and the covariance matrix, C
+ Value can range from -1 to 1, the closer to 1 the more positive linear correlation ( the closer to -1 the more negative linear correlation)

In [None]:
# Pearson Correlation coefficient comparison of Spring- to Layouts-coordinates

print('Number of Nodes: ', len(G.nodes()))
print('')

# Aij 
pearson_spring_adj = np.corrcoef(dist_spring,DM_adj)
print('Spring: Mean of Pearson Correlation Coefficient: ',round(np.mean(pearson_spring_adj),4))

pearson_adj = np.corrcoef(dist_adj,DM_adj)
print('Aij: Mean of Pearson Correlation Coefficient: ',round(np.mean(pearson_adj),4))

print('')

# SPL
pearson_spring_spl = np.corrcoef(dist_spring,DM_spl)
print('Spring: Mean of Pearson Correlation Coefficient: ',round(np.mean(pearson_spring_spl),4))

pearson_spl = np.corrcoef(dist_spl,DM_spl)
print('SPL: Mean of Pearson Correlation Coefficient: ',round(np.mean(pearson_spl),4))

print('')

# Markov
pearson_spring_m = np.corrcoef(dist_spring, DM_m_mod)
print('Spring: Mean of Pearson Correlation Coefficient: ',round(np.mean(pearson_spring_m),4))

pearson_m = np.corrcoef(dist_m,DM_m_mod)
print('Markov: Mean of Pearson Correlation Coefficient: ',round(np.mean(pearson_m),4))

print('')
 
    
# -log Markov
pearson_spring_mlog = np.corrcoef(dist_spring,DM_mlog)
print('Spring: Mean of Pearson Correlation Coefficient: ',round(np.mean(pearson_spring_mlog),4))

pearson_mlog = np.corrcoef(dist_mlog,DM_mlog)
print('-log Markov: Mean of Pearson Correlation Coefficient: ',round(np.mean(pearson_mlog),4))

print('')


# Cos
pearson_spring_cos = np.corrcoef(dist_spring,DM_cos)
print('Spring: Mean of Pearson Correlation Coefficient: ', round(np.mean(pearson_spring_cos),4))

pearson_cos = np.corrcoef(dist_cos,DM_cos)
print('Cosine: Mean of Pearson Correlation Coefficient: ',round(np.mean(pearson_cos),4))
 
print('')


# Eucl
pearson_spring_eucl = np.corrcoef(dist_spring,DM_eucl)
print('Spring: Mean of Pearson Correlation Coefficient: ',round(np.mean(pearson_spring_eucl),4))

pearson_eucl = np.corrcoef(dist_eucl,DM_eucl)
print('Euclidean: Mean of Pearson Correlation Coefficient: ',round(np.mean(pearson_eucl),4))

print('')
  
    
# SqEucl
pearson_spring_sqeucl = np.corrcoef(dist_spring,DM_sqeucl)
print('Spring: Mean of Pearson Correlation Coefficient: ',round(np.mean(pearson_spring_sqeucl),4))

pearson_sqeucl = np.corrcoef(dist_sqeucl,DM_sqeucl)
print('Squared Euclidean: Mean of Pearson Correlation Coefficient: ',round(np.mean(pearson_sqeucl),4))

print('')


# Corr
pearson_spring_corr = np.corrcoef(dist_spring,DM_corr)
print('Spring: Mean of Pearson Correlation Coefficient: ',round(np.mean(pearson_spring_corr),4))

pearson_corr = np.corrcoef(dist_corr,DM_corr)
print('Correlation: Mean of Pearson Correlation Coefficient: ',round(np.mean(pearson_corr),4))

### Spearman Correlation 
+ to summarize the strength of the linear relationship between two data samples (Rank correlation)
+ Compared to Pearson: assumes a non-gaussian distribution and less sensitive to outliers that are in the tail of both samples - because of limiting the outliers to the value of their ranks 
+ Value can range from -1 to 1, the closer to 1 the more monotonic relationship between X and Y

In [None]:
from scipy.stats import spearmanr

print('Number of Nodes: ', len(G.nodes()))
print('')

# Spearman Correlation coefficient comparison of Spring- to Layouts-coordinates
# Aij 
spearman_spring_adj, _ = spearmanr(DM_adj, dist_spring)
print('Spring: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_spring_adj),4))

spearman_adj, _ = spearmanr(DM_adj, dist_adj)
print('Aij: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_adj),4))

print('')
      
      
# SPL
spearman_spring_spl, _ = spearmanr(DM_spl, dist_spring)
print('Spring: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_spring_spl),4))

spearman_spl, _ = spearmanr(DM_spl,dist_spl)
print('SPL: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_spl),4))

print('')

      
# Markov
spearman_spring_m, _ = spearmanr(DM_m_mod, dist_spring)
print('Spring: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_spring_m),4))

spearman_m, _ = spearmanr(DM_m_mod,dist_m)
print('Markov: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_m),4))

print('')

       
# -log Markov
spearman_spring_mlog, _ = spearmanr(DM_mlog, dist_spring)
print('Spring: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_spring_mlog),4))

spearman_mlog, _ = spearmanr(DM_mlog,dist_mlog)
print('-log Markov: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_mlog),4))

print('')

       
# Cos
spearman_spring_cos, _ = spearmanr(DM_cos, dist_spring)
print('Spring: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_spring_cos),4))

spearman_cos, _ = spearmanr(DM_cos,dist_cos)
print('Cosine: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_cos),4))

print('')

       
# Eucl
spearman_spring_eucl, _ = spearmanr(DM_eucl, dist_spring)
print('Spring: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_spring_eucl),4))

spearman_eucl, _ = spearmanr(DM_eucl,dist_eucl)
print('Euclidean: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_eucl),4))

print('')
      
      
# SqEucl
spearman_spring_sqeucl, _ = spearmanr(DM_sqeucl, dist_spring)
print('Spring: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_spring_sqeucl),4))

spearman_sqeucl, _ = spearmanr(DM_cos,dist_sqeucl)
print('Squared Euclidean: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_sqeucl),4))

print('')

    
# Corr
spearman_spring_corr, _ = spearmanr(DM_corr, dist_spring)
print('Spring: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_spring_corr),4))

spearman_corr, _ = spearmanr(DM_corr,dist_corr)
print('Correlation: Mean of Spearman Correlation Coefficient: ',round(np.mean(spearman_corr),4))

### WORK IN PROGRESS Plot - Regression Line 

In [None]:
from sklearn.linear_model import LinearRegression

X = dist_spl[0]
Y = DM_spl[0]
df = pd.DataFrame({'X': X, 'Y':Y})

reg = LinearRegression().fit(np.vstack(df['X']), Y)
df['bestfit'] = reg.predict(np.vstack(df['X']))

fig=pgo.Figure()
fig.add_trace(pgo.Scatter(name='Embedded Distance vs Matrix Distance', x=df['X'], y=df['Y'].values, 
                          mode='markers'))
fig.add_trace(pgo.Scatter(name='line of best fit', x=X, y=df['bestfit'], 
                          mode='lines'))
fig.update_layout(template='none',
                  height = 600,
                  width = 800,
                  xaxis_title = 'Embedded Distances', yaxis_title = 'Matrix Distances')
print('Linear Regression Line for 1. set of Distances of SPL Matrix and SPL Layout Distances')
fig.show()

In [None]:
posG_3D = nx.spring_layout(G, dim = 3, iterations=200)
