In [1]:
#https://arxiv.org/pdf/1110.2515.pdf

In [1]:
import subprocess
import os
import pickle

In [1]:
saveFile1 = ''
saveFile2 = ''

# Compare covers using NMI

In [2]:
def parseAnswer(answer):
    parsedAnswer = []
    for line in answer:
        line = line.split('\t')
        if len(line) >= 2:
            parsedAnswer.append(line)

    for l in parsedAnswer:
        l[0] = l[0].replace(':', '').replace(' ', '')
    return parsedAnswer

In [3]:
def compareCovers(cover1, cover2, folderNMI, NMI_type='NMI<Max>'):
    command = folderNMI +'onmi ' + cover1 + ' ' + cover2
    p = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    answer = p.stdout.decode('ascii').split('\n')
    parsedAnswer = parseAnswer(answer)
    
    if NMI_type == 'NMI<Max>':
        return parsedAnswer[0][1]
    elif NMI_type == 'lfkNMI':
        return parsedAnswer[1][1]
    elif NMI_type == 'NMI<Sum>':
        return parsedAnswer[2][1]
    else:
        print('Wrong NMI_type!\n')
        return parsedAnswer

In [6]:
folderNMI = '' #nmi file
c1 = '' #cover1 file
c2 = '' #cover2 file

In [None]:
folderNMI +'onmi ' + c1 + ' ' + c2

In [8]:
compareCovers(c1,c2)

'0.0591832'

# Compare Clusters

In [5]:
def openCover(file):
    with open(file, 'rb') as fp:
        cover = pickle.load(fp)
    return cover

In [6]:
def openGraph(file):
    with open(file, 'rb') as fp:
        graph = pickle.load(fp)
    return graph

In [7]:
def calcDegreeCentrality(g):
    grau = {}
    for v in g:
        grau[v] = len(g[v])
    #sorted_grau = sorted(grau.items(), key=operator.itemgetter(1), reverse=True)
    return grau

In [8]:
def calcNodesCentrality(g1,g2):
    grau = {}
    for v in g1:
        grau[v] = len(g1[v])
    for v2 in g2:
        if v2 in grau:
            grau[v2] = (len(g1[v2]) + len(g2[v2])) / 2
        else:
            grau[v2] = len(g2[v2])
    return grau

In [9]:
def clusterCentrality(cluster, g, nodeCentrality):
    total = 0
    for n in cluster:
        total += nodeCentrality[n]
    return total

In [10]:
def calcClustersCentralities(cover, g, nodeCentrality):
    centralities = {}
    for c,i in zip(cover, range(len(cover))):
        centralities[i] = clusterCentrality(c, g, nodeCentrality)
        
    return centralities

In [11]:
def comunitySimilarity(c1,c2, n1,n2,nodeCentrality, clustersCentralities1, clustersCentralities2):
    similarity = 0
    for n in c1:
        if n in c2:
            #similarity += 1
            similarity += nodeCentrality[n]
            
    #return similarity / max(len(c1), len(c2))
    return [similarity / max(clustersCentralities1[n1], clustersCentralities2[n2]), similarity / min(clustersCentralities1[n1], clustersCentralities2[n2])]

In [12]:
def bestComunitySimilarity(comunity, cover1, nodeCentrality, clustersCentralities1, clustersCentralities2):
    higherSimilarity = -1
    nCluster = -1
    for c, i in zip(cover1, range(len(cover1))):
        similarity = comunitySimilarity(c, comunity, nodeCentrality, clustersCentralities1, clustersCentralities2)
        if similarity > higherSimilarity:
            higherSimilarity = similarity
            nCluster = i
            
    return higherSimilarity, nCluster

In [13]:
def coverSimilarities(cover1, cover2, nodeCentrality, clustersCentralities1, clustersCentralities2, sizeThreshold=10):
    all_similarities = []
    
    for c1, n1 in zip(cover1, range(len(cover1))):
        if len(c1) >= sizeThreshold:
            local_similarities = []
            for c2, n2 in zip(cover2, range(len(cover2))):
                if len(c2) >= sizeThreshold:
                    local_similarities.append(comunitySimilarity(c1,c2,n1,n2, nodeCentrality, clustersCentralities1, clustersCentralities2))
                else:
                    local_similarities.append([0,0])
            all_similarities.append(local_similarities)
        else:
            local_similarities = []
            for c2 in cover2:
                local_similarities.append([0,0])
            all_similarities.append(local_similarities)
        
    return all_similarities

In [14]:
def compareCovers(all_similarities, threshold):
    
    similar_clusters = []
    for c1 in range(len(all_similarities)):
        for c2 in range(len(all_similarities[c1])):
            if all_similarities[c1][c2][0] >= threshold:
                #if [c2,c1,all_similarities[c1][c2]] not in similar_clusters:
                similar_clusters.append([c1,c2,all_similarities[c1][c2][0]])
    
    return similar_clusters            

In [29]:
cover1 = openCover('') #cover1 pickle file
cover2 = openCover('') #cover2 pickle file
g1 = openGraph('') #networkx graph1 pickle file
g2 = openGraph('') #networkx graph1 pickle file

In [30]:
nodeCentrality = calcNodesCentrality(g1,g2)
c1Centralities = calcClustersCentralities(cover1, g1, nodeCentrality)
c2Centralities = calcClustersCentralities(cover2, g2, nodeCentrality)

In [31]:
all_similarities = coverSimilarities(cover1, cover2, nodeCentrality, c1Centralities, c2Centralities, sizeThreshold=10)

In [32]:
similarity_threshold = 0.4
similar_clusters = compareCovers(all_similarities, similarity_threshold)

In [33]:
len(similar_clusters)

13

### Similar CLusters

In [35]:
similar_clusters

[[0, 0, 0.5857117427164303],
 [4, 1, 0.4581497797356828],
 [10, 10, 0.41357234314980795],
 [11, 3, 0.4150513112884835],
 [11, 6, 0.5568585643212509],
 [11, 8, 0.4012760241773002],
 [12, 4, 0.6664350243224462],
 [14, 9, 0.41645244215938304],
 [24, 32, 0.8148148148148148],
 [26, 33, 0.45012165450121655],
 [28, 16, 0.42718446601941745],
 [28, 37, 0.49299719887955185],
 [33, 38, 0.5144927536231884]]

### Cluster y from cover 1

In [17]:
lista = []
y = 26

for n in cover1[y]:
    lista.append([g1.nodes()[n]['peso'], n])
    
sorted(lista, key = lambda x: x[1], reverse=True)

NameError: name 'cover1' is not defined

### Cluster y from cover 2

In [197]:
lista = []
y = 8

for n in cover2[y]:
    lista.append([dictionaryCodeMerged2[n], g2.nodes()[n]['peso'], n])
    
sorted(lista, key = lambda x: x[1], reverse=True)

[['Features', 6380, 'bn:00017761n'],
 ['trained', 5235, 'bn:00086736v'],
 ['learned', 4888, 'bn:00082281v'],
 ['propose', 4325, 'bn:00082417v'],
 ['number', 3792, 'bn:00058285n'],
 ['compare', 2328, 'bn:00085450v'],
 ['select', 2327, 'bn:00084931v'],
 ['observe', 2295, 'bn:00086708v'],
 ['performs', 2279, 'bn:00087107v'],
 ['obtaining', 2225, 'bn:00091124v'],
 ['following', 2181, 'bn:00088421v'],
 ['computing', 2150, 'bn:00084373v'],
 ['provide', 2090, 'bn:00088643v'],
 ['consider', 2088, 'bn:00085647v'],
 ['need', 1920, 'bn:00082822v'],
 ['achieve', 1763, 'bn:00082226v'],
 ['find', 1717, 'bn:00084231v'],
 ['modes', 1506, 'bn:00033729n'],
 ['improved', 1352, 'bn:00082573v'],
 ['studied', 1144, 'bn:00082596v'],
 ['generated', 1089, 'bn:00084080v'],
 ['similar', 1087, 'bn:00110647a'],
 ['builds', 1038, 'bn:00084198v'],
 ['possible', 919, 'bn:00108829a'],
 ['determine', 871, 'bn:00082811v'],
 ['simple', 744, 'bn:00110649a'],
 ['activities', 636, 'bn:00001172n'],
 ['effective', 498, 'bn:00

### Nodes that belong to Clusters i of Cover 1 and j from Cover 2

In [94]:
i = 12
j = 19
lista_iguais = []
for n in cover1[i]:
    if n in cover2[j]:
        lista_iguais.append([dictionaryCodeMerged2[n], g1.nodes()[n]['peso'], g2.nodes()[n]['peso']])
        
sorted(lista_iguais, key = lambda x: x[1], reverse=True)

[['high', 813, 863],
 ['affinity', 79, 84],
 ['specificity', 76, 133],
 ['throughput', 31, 49],
 ['Salinity', 23, 45],
 ['sensitivity', 22, 30]]

# View in Cytoscape

In [37]:
import networkx as nx
from py2cytoscape import cyrest
from py2cytoscape import util as cy 
from py2cytoscape.data.cyrest_client import CyRestClient
from IPython.display import Image

/home/mauro/.local/lib/python3.7/site-packages/python_igraph-0.7.1.post6-py3.7-linux-x86_64.egg/igraph/_igraph.cpython-37m-x86_64-linux-gnu.so: undefined symbol: igraph_layout_davidson_harel
py2cytoscape: Error importing igraph. You won't be able to import from igraph.


In [392]:
c1 = cover1[14]
c2 = cover1[19]
c3 = cover2[5]

In [365]:
#comparison of two clusters from different covers
new_graph = nx.Graph()
for n in c1:
    if n in c2:
        new_graph.add_node(n, peso=g1.nodes()[n]['peso']+g2.nodes()[n]['peso'], clusters=3, dicionario=dictionaryCodeMerged1[n])
    else:
        new_graph.add_node(n, peso=g1.nodes()[n]['peso'], clusters=1, dicionario=dictionaryCodeMerged1[n])
for n in c2:
    if n not in c1:
        new_graph.add_node(n, peso=g2.nodes()[n]['peso'], clusters=2, dicionario=dictionaryCodeMerged2[n])

for e in g1.edges():
    if new_graph.has_node(e[0]) and new_graph.has_node(e[1]):
        if e in g2.edges():
            new_graph.add_edge(e[0], e[1], weight= g1[e[0]][e[1]]['weight'] + g2[e[0]][e[1]]['weight'])
        else:
            new_graph.add_edge(e[0], e[1], weight= g1[e[0]][e[1]]['weight'])
        
for e in g2.edges():
    if new_graph.has_node(e[0]) and new_graph.has_node(e[1]):
        if e not in g1.edges(): 
            new_graph.add_edge(e[0], e[1], weight= g2[e[0]][e[1]]['weight'])
   

In [393]:
#evolution of two clusters into one cluster
new_graph = nx.Graph()
count = 0 
dicionario1={}
dicionario2={}
dicionario3={}
for n in c1:
    new_graph.add_node(count, code=n, peso=g1.nodes()[n]['peso'], clusters=1, dicionario=dictionaryCodeMerged1[n])
    dicionario1[n] = count
    count += 1
for n in c2:
    new_graph.add_node(count, code=n, peso=g1.nodes()[n]['peso'], clusters=2, dicionario=dictionaryCodeMerged1[n])
    dicionario2[n] = count
    count += 1
for n in c3:
    new_graph.add_node(count, code=n, peso=g2.nodes()[n]['peso'], clusters=3, dicionario=dictionaryCodeMerged2[n])
    dicionario3[n] = count
    count += 1
    
for n in c1:
    if n in c2:
        new_graph.add_edge(dicionario1[n], dicionario2[n])
    if n in c3:
        new_graph.add_edge(dicionario1[n], dicionario3[n])
for n in c2:
    if n in c3:
        new_graph.add_edge(dicionario2[n], dicionario3[n])

In [394]:
#nx.write_gml(new_graph, saveFile1 + 'cls_12_19.gml')
nx.write_gml(new_graph, saveFile1 +'cls_14,19_5.gml')

In [395]:
cytoscape=cyrest.cyclient()
cyjs = CyRestClient()

In [396]:
plot = cyjs.network.create_from_networkx(new_graph)

In [397]:
#create groups
for i in range(1,4):
    group = ''
    for n in new_graph.nodes():
        if new_graph.nodes()[n]['clusters'] == i:
            group += 'name:' + str(n) + ','
    group = group[:-1]
    cytoscape.group.create(nodeList=group, groupName='group'+str(i))


In [66]:
dictionaryCodeMerged2['bn:00107646a']

'null'

In [64]:
for n in new_graph.nodes():
    print( new_graph.nodes()[n]['clusters'])

1
3
1
3
3
1
3
1
3
1
1
1
3
1
1
2
2
2
2
2
2
2
2
