# Cluster-Based layout
By Keiichiro Ono
June 2, 2020

## What's this?
In many situations, we want to layout nodes in same subgroup, or cluster, as close as possible. This script creates such layout from HiView compatible interaction data.

## Create group assignments

In [1]:
import ndex2.client as nc
import ndex2
import networkx as nx

# Currently, this is designed for 
# Root raw interaction network for Nest
ROOT_UUID = "bf1b0392-1adb-11ea-a741-0660b7976219"

# Largest child of root
SIGNALING = "d1989896-1adb-11ea-a741-0660b7976219"

In [2]:
client = nc.Ndex2("http://test.ndexbio.org")

root_cx = ndex2.create_nice_cx_from_server(server='test.ndexbio.org', uuid=SIGNALING)
root_cx.print_summary()

nodes = root_cx.nodes
nodeAttr = root_cx.nodeAttributes

Name: NeST_final_test_20191209-152700 supporting network for NEST:2
Nodes: 1085
Edges: 14561
Node Attributes: 64015
Edge Attributes: 14561



In [39]:
root_cx.nodes[0]

{'@id': 0, 'n': 'HIST1H4A'}

## Create set of genes for each subsystem with overraps
The dict has name of subsystem as key, and values are set of genes assigned to the system

In [3]:
# This contains membership for each subsystem
subsystems = {}

for key in nodeAttr:
    # All attributes for a gene
    attr = nodeAttr[key]    
    for entry in attr:
        gene_id = entry["po"]
        dataType = entry["d"]
        if dataType == 'boolean':
            name = entry["n"]
            value = entry["v"]
            
            system_members = set()
            if name in subsystems.keys():
                system_members = subsystems[name]
            
            if value == 'true':
                system_members.add(gene_id)

            subsystems[name] = system_members

In [4]:
# Check the result: Smallest subsystem to the largest
sorted_systems = {k: v for k, v in sorted(subsystems.items(), key=lambda item: len(item[1]))}
for key in sorted_systems.keys():
    print(key, len(sorted_systems[key]))

Group:NEST:298 4
Group:NEST:272 4
Group:NEST:287 4
Group:NEST:286 4
Group:NEST:283 4
Group:NEST:337 4
Group:NEST:336 4
Group:NEST:310 4
Group:NEST:300 4
Group:NEST:294 4
Group:NEST:302 4
Group:NEST:291 4
Group:NEST:282 4
Group:NEST:309 4
Group:NEST:299 4
Group:NEST:343 4
Group:NEST:235 5
Group:NEST:240 5
Group:NEST:225 5
Group:NEST:224 5
Group:NEST:244 5
Group:NEST:221 5
Group:NEST:256 5
Group:NEST:250 5
Group:NEST:249 5
Group:NEST:209 6
Group:NEST:205 6
Group:NEST:211 6
Group:NEST:204 6
Group:NEST:202 6
Group:NEST:210 6
Group:NEST:181 7
Group:NEST:191 7
Group:NEST:179 7
Group:NEST:173 8
Group:NEST:176 8
Group:NEST:174 8
Group:NEST:160 9
Group:NEST:150 10
Group:NEST:153 10
Group:NEST:149 10
Group:NEST:146 10
Group:NEST:125 12
Group:NEST:126 12
Group:NEST:128 12
Group:NEST:119 13
Group:NEST:117 13
Group:NEST:124 13
Group:NEST:111 14
Group:NEST:109 15
Group:NEST:104 16
Group:NEST:103 16
Group:NEST:100 18
Group:NEST:95 19
Group:NEST:90 21
Group:NEST:80 28
Group:NEST:76 30
Group:NEST:56 47

## Count number of genes in each system

In [5]:
clusterMap = {}
gene_counts = {}

for key in nodeAttr:
    attr = nodeAttr[key]    
    for entry in attr:
        pointer = entry["po"]
        if pointer in clusterMap.keys():
            members = clusterMap[pointer]
        else:
            members = []

        dataType = entry["d"]
        if dataType == 'boolean':
            name = entry["n"]
            value = entry["v"]
            
            count = 0
            if name in gene_counts.keys():
                count = gene_counts[name]
            
            if value == 'true':
                members.append(name)
                count += 1
            
            gene_counts[name] = count

        clusterMap[pointer] = members
    
print(len(clusterMap), len(gene_counts))

1085 59


In [6]:
# Sort small to large
ranking = {k: v for k, v in sorted(gene_counts.items(), key=lambda item: item[1])}

In [7]:
print(ranking)

{'Group:NEST:298': 4, 'Group:NEST:272': 4, 'Group:NEST:287': 4, 'Group:NEST:286': 4, 'Group:NEST:283': 4, 'Group:NEST:337': 4, 'Group:NEST:336': 4, 'Group:NEST:310': 4, 'Group:NEST:300': 4, 'Group:NEST:294': 4, 'Group:NEST:302': 4, 'Group:NEST:291': 4, 'Group:NEST:282': 4, 'Group:NEST:309': 4, 'Group:NEST:299': 4, 'Group:NEST:343': 4, 'Group:NEST:235': 5, 'Group:NEST:240': 5, 'Group:NEST:225': 5, 'Group:NEST:224': 5, 'Group:NEST:244': 5, 'Group:NEST:221': 5, 'Group:NEST:256': 5, 'Group:NEST:250': 5, 'Group:NEST:249': 5, 'Group:NEST:209': 6, 'Group:NEST:205': 6, 'Group:NEST:211': 6, 'Group:NEST:204': 6, 'Group:NEST:202': 6, 'Group:NEST:210': 6, 'Group:NEST:181': 7, 'Group:NEST:191': 7, 'Group:NEST:179': 7, 'Group:NEST:173': 8, 'Group:NEST:176': 8, 'Group:NEST:174': 8, 'Group:NEST:160': 9, 'Group:NEST:150': 10, 'Group:NEST:153': 10, 'Group:NEST:149': 10, 'Group:NEST:146': 10, 'Group:NEST:125': 12, 'Group:NEST:126': 12, 'Group:NEST:128': 12, 'Group:NEST:119': 13, 'Group:NEST:117': 13, 'Gr

In [8]:
# Revered keys
rank_list = list(ranking.keys())
rank_list.reverse()

# Largest to smallest
rank_list

['Group:NEST:37',
 'Group:NEST:56',
 'Group:NEST:76',
 'Group:NEST:80',
 'Group:NEST:90',
 'Group:NEST:95',
 'Group:NEST:100',
 'Group:NEST:103',
 'Group:NEST:104',
 'Group:NEST:109',
 'Group:NEST:111',
 'Group:NEST:124',
 'Group:NEST:117',
 'Group:NEST:119',
 'Group:NEST:128',
 'Group:NEST:126',
 'Group:NEST:125',
 'Group:NEST:146',
 'Group:NEST:149',
 'Group:NEST:153',
 'Group:NEST:150',
 'Group:NEST:160',
 'Group:NEST:174',
 'Group:NEST:176',
 'Group:NEST:173',
 'Group:NEST:179',
 'Group:NEST:191',
 'Group:NEST:181',
 'Group:NEST:210',
 'Group:NEST:202',
 'Group:NEST:204',
 'Group:NEST:211',
 'Group:NEST:205',
 'Group:NEST:209',
 'Group:NEST:249',
 'Group:NEST:250',
 'Group:NEST:256',
 'Group:NEST:221',
 'Group:NEST:244',
 'Group:NEST:224',
 'Group:NEST:225',
 'Group:NEST:240',
 'Group:NEST:235',
 'Group:NEST:343',
 'Group:NEST:299',
 'Group:NEST:309',
 'Group:NEST:282',
 'Group:NEST:291',
 'Group:NEST:302',
 'Group:NEST:294',
 'Group:NEST:300',
 'Group:NEST:310',
 'Group:NEST:336',

In [9]:
# For simple large to small assignment
def findTopGene(genes, ranking):
    top = None
    final_member = None
    for gene in genes:
        member_count = ranking[gene]
        if top is None:
            top = member_count
            final_member = gene
        elif top > member_count:
            top = member_count
            final_member = gene
    return final_member

assignedMember = {}
for key in clusterMap.keys():
    genes = clusterMap[key]
    top = findTopGene(genes, gene_counts)
    if top is not None:
        assignedMember[key] = top

In [10]:
# Assign subsystem name for each gene
for key in assignedMember.keys():
#     root_cx.add_node_attribute(property_of=key, name='membership', values=clusterMap[key])
    root_cx.add_node_attribute(property_of=key, name='layoutMember', values=assignedMember[key])

## Export to GraphML
For loading into graph-tool

In [11]:
nx_graph = root_cx.to_networkx()
# Remove None value from edge attr
for (e1, e2, d) in nx_graph.edges(data=True):
    del d['interaction']
    
GRAPHML_IN = 'temp-prepared.graphml'
nx.write_graphml(nx_graph, GRAPHML_IN)

# Actual layout using graph-tool

In [12]:
from graph_tool.all import *
nest = load_graph(GRAPHML_IN)
nest

<Graph object, undirected, with 1085 vertices and 14561 edges, 61 internal vertex properties, 12 internal edge properties, at 0x7f633cffb940>

In [13]:
members = nest.vertex_properties["layoutMember"]

value_set = set()
for member in members:
    value_set.add(member)

v2idx = {}
idx = 1
for entry in value_set:
    v2idx[entry] = idx
    idx += 1

# Group names to integers
print(v2idx)

{'': 1, 'Group:NEST:225': 2, 'Group:NEST:204': 3, 'Group:NEST:128': 4, 'Group:NEST:95': 5, 'Group:NEST:111': 6, 'Group:NEST:294': 7, 'Group:NEST:244': 8, 'Group:NEST:146': 9, 'Group:NEST:119': 10, 'Group:NEST:283': 11, 'Group:NEST:209': 12, 'Group:NEST:256': 13, 'Group:NEST:56': 14, 'Group:NEST:205': 15, 'Group:NEST:124': 16, 'Group:NEST:337': 17, 'Group:NEST:160': 18, 'Group:NEST:202': 19, 'Group:NEST:126': 20, 'Group:NEST:149': 21, 'Group:NEST:179': 22, 'Group:NEST:191': 23, 'Group:NEST:240': 24, 'Group:NEST:181': 25, 'Group:NEST:150': 26, 'Group:NEST:310': 27, 'Group:NEST:117': 28, 'Group:NEST:272': 29, 'Group:NEST:291': 30, 'Group:NEST:302': 31, 'Group:NEST:287': 32, 'Group:NEST:153': 33, 'Group:NEST:80': 34, 'Group:NEST:224': 35, 'Group:NEST:176': 36, 'Group:NEST:336': 37, 'Group:NEST:173': 38, 'Group:NEST:103': 39, 'Group:NEST:300': 40, 'Group:NEST:90': 41, 'Group:NEST:343': 42, 'Group:NEST:309': 43, 'Group:NEST:211': 44, 'Group:NEST:125': 45, 'Group:NEST:109': 46, 'Group:NEST:23

In [24]:
# New attribute to show membership as an integer
groups = nest.new_vertex_property("int32_t")
nodes = nest.vertices()
for node in nodes:
    groups[node] = v2idx[members[node]]
    
# Name it "layoutIndex"
nest.vp.layoutIndex = groups

# Edge scores to be used for layout
score = nest.edge_properties["Score"]
new_score = nest.new_edge_property("double")
edges = nest.edges()
for e in edges:
    score_str = score[e]
    score_dbl = float(score_str)
    new_score[e] = score_dbl

nest.ep.Score = new_score


In [29]:
# pos1 = sfdp_layout(nest, groups=groups, vweight=groups, gamma=0.01, mu=1000, eweight=score, C=0.5, multilevel=True)

# Dense cluster
#pos1 = sfdp_layout(nest, groups=groups, vweight=groups, gamma=0.01, mu=10000, eweight=score, C=0.5, multilevel=True)

# Clusters are closer to each other
#pos1 = sfdp_layout(nest, groups=groups, vweight=groups, gamma=10000, mu=10000, eweight=score, C=0.5, multilevel=True)

# Final for root
#pos1 = sfdp_layout(nest, groups=groups, vweight=groups, gamma=10000, mu=10000, eweight=score, C=0.5)

# For sig
# pos1 = sfdp_layout(nest, groups=groups, vweight=groups, gamma=10000, mu=10000, eweight=score, C=0.5)
pos1 = sfdp_layout(nest, groups=groups, vweight=groups, gamma=10000, mu=10000, eweight=new_score, C=0.5)



# graph_draw(nest, pos=pos1, vertex_fill_color=groups, edge_color=[0.7, 0.7,0.7, 0.3], output="graph-draw-sfdp-all.pdf")

# Assign positions to the original network

In [47]:
gene_names = nest.vp._graphml_vertex_id
all_nodes = nest.vertices()

position_map = {}
for n in all_nodes:
    name = gene_names[n]
    position = pos1[n]
    position_map[name] = position

In [66]:
original_layout = root_cx.opaqueAspects["cartesianLayout"]
original_nodes = root_cx.nodes

id2gene = {}
for key in original_nodes:
    node = original_nodes[key]
    id2gene[node['@id']] = node["n"]

In [85]:
final_positions = []
SCALING = 300

for layout in original_layout:
    node_id = layout['node']
    gene = id2gene[node_id]
    new_position = position_map[gene]
    x = new_position[0] * SCALING
    y = new_position[1] * SCALING
    final_pos = {'node': node_id, 'x': x, 'y': y}
    final_positions.append(final_pos)
    

root_cx.opaqueAspects["cartesianLayout"] = final_positions
original_layout[0]

{'node': 0, 'x': 2368.223388671875, 'y': 1981.9049072265625}

In [86]:
import json

final_cx = root_cx.to_cx()
out_file_name = 'sig_final.cx'

with open(out_file_name, 'w') as outfile:
    json.dump(final_cx, outfile)

Generating CX
