# Load Modules

In [2]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

from plotly.subplots import make_subplots
from multiprocessing import Pool
from tqdm import tqdm
import networkx as nx

pio.templates.default = 'plotly_white'
pd.options.mode.chained_assignment = None

# Load Graph

In [65]:
def prepare_graph(fn):
    edge_list = pd.read_csv(
        fn, sep="\t", names = ['source', 'target', 'distance'], skiprows=1
    )
    G = nx.read_edgelist(
        fn, delimiter="\t", data = [('distance', str)]
    )
    
    [G.remove_node(n) for n in ['node1', 'node2']]
    
    for edge in G.edges():
        G.edges()[edge]['distance'] = int((7 - int(G.edges()[edge]['distance'])) / 7) 
    
    return edge_list, G

fn = "../results/aa800_md1_4.tab"
edge_list, G = prepare_graph(fn)
edge_list

Unnamed: 0,source,target,distance
0,t100330,t219327,1
1,t100388,t10640,1
2,t100388,t160414,1
3,t100388,t199658,1
4,t100388,t74121,1
...,...,...,...
1787,t99891,t83438,0
1788,t99920,t1545,1
1789,t99920,t54019,1
1790,t99920,t76151,1


# Cluster

In [66]:
from sklearn.cluster import Birch, SpectralClustering

adjacency_matrix = nx.to_numpy_matrix(G)
b = SpectralClustering(affinity = 'precomputed')
cluster_labels = b.fit_predict(adjacency_matrix)


Graph is not fully connected, spectral embedding may not work as expected.



# Plot Network

## Plotting Functions

In [67]:
def plot_network(graph):
    """
    generic network plotter
    """
    
    G = graph.copy()
    
    layout = nx.spring_layout(G)
    
    edge_x = []
    edge_y = []
    for edge in G.edges():
        x0, y0 = layout[edge[0]]
        x1, y1 = layout[edge[1]]
        edge_x.append(x0)
        edge_x.append(x1)
        edge_x.append(None)
        edge_y.append(y0)
        edge_y.append(y1)
        edge_y.append(None)
    
    node_list = [n for n in G.nodes()]
    
    node_positions = np.vstack([
        layout[n] for n in node_list
    ])
    
    fig = go.Figure()
    
        
    node_colors = [
        'red' if n in [0, 1, 2, 3, 13] else 'black' for n in node_list
    ]
    
    node_trace = go.Scatter(
        x = node_positions[:,0],
        y = node_positions[:,1],
        mode = 'markers',
        marker = dict(color = node_colors)
    )
    
    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.25, color='#888'),
        mode='lines'
        
    )
    
    fig.add_trace(node_trace)
    fig.add_trace(edge_trace)
    
    fig.update_layout(height = 1000, width = 1000)
    
    return fig

def plot_network_distance(G, cluster_labels):
    """
    takes a cluster label and creates layout with distances
    """
    
    layout = nx.spring_layout(G, weight = 'distance')
    
    edge_x = []
    edge_y = []
    edge_distance = []
    for edge in G.edges():
        x0, y0 = layout[edge[0]]
        x1, y1 = layout[edge[1]]
        edge_x.append(x0)
        edge_x.append(x1)
        edge_x.append(None)
        edge_y.append(y0)
        edge_y.append(y1)
        edge_y.append(None)
        edge_distance.append(G.edges()[edge]['distance'])
    
    node_list = [n for n in G.nodes()]
    
    node_positions = np.vstack([
        layout[n] for n in node_list
    ])
    
    fig = go.Figure()
    
    node_trace = go.Scatter(
        x = node_positions[:,0],
        y = node_positions[:,1],
        mode = 'markers',
        marker = dict(color = cluster_labels),
        hovertext=cluster_labels
    )
    
    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.5, color='#888'),
        hovertext=edge_distance,
        mode='lines'
        
    )
    
    fig.add_trace(node_trace)
    fig.add_trace(edge_trace)
    
    fig.update_layout(height = 1000, width = 1000)
    
    return fig



## Plot Network w/ Clusters

In [68]:
plot_network_distance(G, cluster_labels)

In [69]:
cliques = list(nx.find_cliques(G))
clique_sizes = [len(c) for c in cliques]
px.histogram(clique_sizes)

In [70]:
def create_new_g_obj(graph, nodes):
    G = nx.Graph()
    sg = graph.subgraph(nodes)
    
    G.add_nodes_from(sg.nodes())
    G.add_edges_from(sg.edges())
    
    return G

graph = G.copy()
subgraphs = []
subgraph_sizes = []
i = 0
while(len(graph.nodes()) > 0 ):
    
    graph_size = len(graph.nodes())
    print("iter {} : graph size {}".format(i, graph_size))
    
    cliques = list(nx.find_cliques(graph))
    clique_sizes = [len(c) for c in cliques]
    
    largest_clique = cliques[np.argmax(clique_sizes)]
    largest_subgraph = create_new_g_obj(graph, largest_clique)
    subgraphs.append(largest_subgraph)
    subgraph_sizes.append(len(largest_clique))
    
    [graph.remove_node(n) for n in largest_clique]
    i += 1

iter 0 : graph size 670
iter 1 : graph size 654
iter 2 : graph size 646
iter 3 : graph size 638
iter 4 : graph size 631
iter 5 : graph size 625
iter 6 : graph size 619
iter 7 : graph size 613
iter 8 : graph size 608
iter 9 : graph size 603
iter 10 : graph size 598
iter 11 : graph size 593
iter 12 : graph size 589
iter 13 : graph size 585
iter 14 : graph size 581
iter 15 : graph size 577
iter 16 : graph size 573
iter 17 : graph size 569
iter 18 : graph size 565
iter 19 : graph size 562
iter 20 : graph size 559
iter 21 : graph size 556
iter 22 : graph size 553
iter 23 : graph size 550
iter 24 : graph size 547
iter 25 : graph size 544
iter 26 : graph size 541
iter 27 : graph size 538
iter 28 : graph size 535
iter 29 : graph size 532
iter 30 : graph size 529
iter 31 : graph size 526
iter 32 : graph size 523
iter 33 : graph size 520
iter 34 : graph size 517
iter 35 : graph size 514
iter 36 : graph size 511
iter 37 : graph size 508
iter 38 : graph size 505
iter 39 : graph size 502
iter 40 : 

In [71]:
subgraph_sizes

[16,
 8,
 8,
 7,
 6,
 6,
 6,
 5,
 5,
 5,
 5,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,

In [73]:
plot_network(subgraphs[0])

In [74]:
def ConcatenateGraphs(subgraphs):
    

[<networkx.classes.graph.Graph at 0x7fabae18eaf0>,
 <networkx.classes.graph.Graph at 0x7fabaea32850>,
 <networkx.classes.graph.Graph at 0x7fabad5fa0d0>,
 <networkx.classes.graph.Graph at 0x7fabad5fa760>,
 <networkx.classes.graph.Graph at 0x7fabae036370>,
 <networkx.classes.graph.Graph at 0x7fabade024c0>,
 <networkx.classes.graph.Graph at 0x7fabadfeb7c0>,
 <networkx.classes.graph.Graph at 0x7fabadfeb4f0>,
 <networkx.classes.graph.Graph at 0x7fabae036520>,
 <networkx.classes.graph.Graph at 0x7fabad5fb880>,
 <networkx.classes.graph.Graph at 0x7fabae18e3d0>,
 <networkx.classes.graph.Graph at 0x7fabae18e430>,
 <networkx.classes.graph.Graph at 0x7fabae18e160>,
 <networkx.classes.graph.Graph at 0x7fabad09b250>,
 <networkx.classes.graph.Graph at 0x7fabae18eca0>,
 <networkx.classes.graph.Graph at 0x7fabae18ec10>,
 <networkx.classes.graph.Graph at 0x7fabae18e490>,
 <networkx.classes.graph.Graph at 0x7fac039664f0>,
 <networkx.classes.graph.Graph at 0x7fabaea3c310>,
 <networkx.classes.graph.Graph 