# Create group assignments

In [1]:
import ndex2.client as nc
import io
import json
import ndex2

# Root raw interaction network for Nest
ROOT_UUID = "bf1b0392-1adb-11ea-a741-0660b7976219"

In [2]:
client = nc.Ndex2("http://test.ndexbio.org")

root_cx = ndex2.create_nice_cx_from_server(server='test.ndexbio.org', uuid=ROOT_UUID)
root_cx.print_summary()

nodes = root_cx.nodes
nodeAttr = root_cx.nodeAttributes

Name: NeST_final_test_20191209-152700 supporting network for NEST
Nodes: 5452
Edges: 336377
Node Attributes: 234436
Edge Attributes: 336377



## Create set of genes for each subsystem with overraps
The dict has name of subsystem as key, and values are set of genes assigned to the system

In [3]:
# This contains membership for each subsystem
subsystems = {}

for key in nodeAttr:
    # All attributes for a gene
    attr = nodeAttr[key]    
    for entry in attr:
        gene_id = entry["po"]
        dataType = entry["d"]
        if dataType == 'boolean':
            name = entry["n"]
            value = entry["v"]
            
            system_members = set()
            if name in subsystems.keys():
                system_members = subsystems[name]
            
            if value == 'true':
                system_members.add(gene_id)

            subsystems[name] = system_members

In [5]:
# Check the result: Smallest subsystem to the largest
sorted_systems = {k: v for k, v in sorted(subsystems.items(), key=lambda item: len(item[1]))}
for key in sorted_systems.keys():
    print(key, len(sorted_systems[key]))

Group:NEST:277 2
Group:NEST:362 3
Group:NEST:325 4
Group:NEST:238 4
Group:NEST:239 4
Group:NEST:260 5
Group:NEST:237 5
Group:NEST:217 6
Group:NEST:145 10
Group:NEST:116 13
Group:NEST:108 15
Group:NEST:97 18
Group:NEST:66 18
Group:NEST:43 51
Group:NEST:35 86
Group:NEST:32 119
Group:NEST:18 128
Group:NEST:33 138
Group:NEST:16 153
Group:NEST:28 153
Group:NEST:29 220
Group:NEST:19 229
Group:NEST:26 243
Group:NEST:23 252
Group:NEST:25 263
Group:NEST:24 264
Group:NEST:12 378
Group:NEST:20 407
Group:NEST:14 435
Group:NEST:17 509
Group:NEST:13 545
Group:NEST:15 560
Group:NEST:7 571
Group:NEST:8 611
Group:NEST:11 622
Group:NEST:4 625
Group:NEST:5 685
Group:NEST:9 715
Group:NEST:10 794
Group:NEST:1 828
Group:NEST:3 957
Group:NEST:6 966
Group:NEST:2 1098


## Count number of genes in each system

In [None]:
# clusterMap = {}
# gene_counts = {}

# for key in nodeAttr:
#     attr = nodeAttr[key]    
#     for entry in attr:
#         pointer = entry["po"]
#         if pointer in clusterMap.keys():
#             members = clusterMap[pointer]
#         else:
#             members = []

#         dataType = entry["d"]
#         if dataType == 'boolean':
#             name = entry["n"]
#             value = entry["v"]
            
#             count = 0
#             if name in gene_counts.keys():
#                 count = gene_counts[name]
            
#             if value == 'true':
#                 members.append(name)
#                 count += 1
            
#             gene_counts[name] = count

#         clusterMap[pointer] = members
    
# print(len(clusterMap), len(gene_counts))
# gene_counts

In [None]:
# Sort small to large
# ranking = {k: v for k, v in sorted(gene_counts.items(), key=lambda item: item[1])}

In [None]:
# print(ranking)

In [6]:
# Revered keys
rank_list = list(sorted_systems.keys())
rank_list.reverse()
rank_list

['Group:NEST:2',
 'Group:NEST:6',
 'Group:NEST:3',
 'Group:NEST:1',
 'Group:NEST:10',
 'Group:NEST:9',
 'Group:NEST:5',
 'Group:NEST:4',
 'Group:NEST:11',
 'Group:NEST:8',
 'Group:NEST:7',
 'Group:NEST:15',
 'Group:NEST:13',
 'Group:NEST:17',
 'Group:NEST:14',
 'Group:NEST:20',
 'Group:NEST:12',
 'Group:NEST:24',
 'Group:NEST:25',
 'Group:NEST:23',
 'Group:NEST:26',
 'Group:NEST:19',
 'Group:NEST:29',
 'Group:NEST:28',
 'Group:NEST:16',
 'Group:NEST:33',
 'Group:NEST:18',
 'Group:NEST:32',
 'Group:NEST:35',
 'Group:NEST:43',
 'Group:NEST:66',
 'Group:NEST:97',
 'Group:NEST:108',
 'Group:NEST:116',
 'Group:NEST:145',
 'Group:NEST:217',
 'Group:NEST:237',
 'Group:NEST:260',
 'Group:NEST:239',
 'Group:NEST:238',
 'Group:NEST:325',
 'Group:NEST:362',
 'Group:NEST:277']

In [8]:
# Choose from largest to smallest without overlap
assignedMember = {}
# Set of all gene IDs
all_genes = set(map(lambda key : nodes[key]["@id"], list(nodes.keys())))

for key in rank_list:
    genes = sorted_systems[key]
    for gene in genes:
        # Only if it is not assiged to other subsystem
        if gene in all_genes:
            assignedMember[gene] = key
            all_genes.remove(gene)

# total number of subsystems with gene assignments
len(set(assignedMember.values()))

28

In [None]:
# # For simple large to small assignment
# def findTopGene(genes, ranking):
#     top = None
#     final_member = None
#     for gene in genes:
#         member_count = ranking[gene]
#         if top is None:
#             top = member_count
#             final_member = gene
#         elif top > member_count:
#             top = member_count
#             final_member = gene
#     return final_member

# assignedMember = {}
# for key in clusterMap.keys():
#     genes = clusterMap[key]
#     top = findTopGene(genes, gene_counts)
#     assignedMember[key] = top



In [9]:
# Assign subsystem name for each gene
for key in assignedMember.keys():
#     root_cx.add_node_attribute(property_of=key, name='membership', values=clusterMap[key])
    root_cx.add_node_attribute(property_of=key, name='layoutMember', values=assignedMember[key])

In [10]:
root_cx.nodeAttributes[0]

new_cx = root_cx.to_cx()

# out_file = 'newCX2_reverse.cx'
out_file_name = 'root_with_groups.cx'

with open(out_file_name, 'w') as outfile:
    json.dump(new_cx, outfile)

Generating CX


## Export to GraphML

In [11]:
import networkx as nx

In [20]:
nx_graph = root_cx.to_networkx()
# nx.get_node_attributes(nx_graph, 'layoutMember')

# nx.write_graphml_lxml(nx_graph, "nest-grouped.graphml")