In [22]:
import networkx as nx

import numpy as np
import pandas as pd
import csv

import pyvis.network

## Read in Edgelist

In [2]:
yeast_file = "yeast_network.txt"

G_exp = nx.read_edgelist(yeast_file,comments="#",nodetype=str)

## Read in CSV

In [3]:
clusters = []
csv_reader = csv.reader(open("dpclus_complexes.csv"))
for cluster in csv_reader:
    clusters.append(cluster)

In [4]:
ire1 = "YHR079C"
kar2 = "YJL034W"
yos9 = "YDR057W"
hac1 = "YFL031W"

## Create Cluster Graph

In [5]:
def cluster_weight(cluster1, cluster2, G):
    # Number of connected pairs / number of pairs in total
    total_num = len(cluster1) * len(cluster2)
    connected_num = 0.0
    for node1 in cluster1:
        for node2 in cluster2:
            if (node1, node2) in G.edges:
                connected_num += 1
    return connected_num / total_num

In [6]:
cluster_graph = nx.Graph()

In [7]:
cluster_weighted_edges = []

# Assume clusters are sorted
for i in range(len(clusters)):
    for j in range(i+1, len(clusters)):
        cluster_weighted_edges.append((i, j, cluster_weight(clusters[i], clusters[j], G_exp)))

In [8]:
cluster_graph.add_weighted_edges_from(cluster_weighted_edges)

## Find IRE1 Cluster

In [32]:
ire_clusters = []

for i in range(len(clusters)):
    if ire1 in clusters[i]:
        ire_clusters.append(i)

ire_weights = []
ire_cluster = ire_clusters[0]
print("IRE1 Cluster:", ire_clusters[0])

for i in range(len(clusters)):
    if i == ire_cluster:
        continue
    else:
        ire_weight = cluster_graph[i][ire_cluster]['weight']
        ire_weights.append((i, ire_weight))
        
ire_weights.sort(key=lambda x: x[1], reverse=True)
top_5_ire_indexes = [tup[0] for tup in ire_weights[:5]]
top_5_ire_clusters = [clusters[i] for i in top_5_ire_indexes]
print(top_5_hac_indexes)

IRE1 Cluster: 73
[70, 73, 67, 81, 164]


## HAC1 Clusters

In [33]:
hac_clusters = []

for i in range(len(clusters)):
    if hac1 in clusters[i]:
        hac_clusters.append(i)
        
hac_cluster = hac_clusters[0]
print("HAC1 Cluster:", hac_cluster)

hac_weights = []

for i in range(len(clusters)):
    if i == hac_cluster:
        continue
    else:
        hac_weight = cluster_graph[i][hac_cluster]['weight']
        hac_weights.append((i, hac_weight))
        
hac_weights.sort(key=lambda x: x[1], reverse=True)

top_5_hac_indexes = [tup[0] for tup in hac_weights[:5]]
print(top_5_hac_indexes)

HAC1 Cluster: 26
[70, 73, 67, 81, 164]


## KAR2 Clusters

In [34]:
kar_clusters = []

for i in range(len(clusters)):
    if kar2 in clusters[i]:
        kar_clusters.append(i)

kar_cluster = hac_clusters[0]
print("KAR2 Cluster:", kar_cluster)
kar_weights = []

for i in range(len(clusters)):
    if i == kar_cluster:
        continue
    else:
        kar_weight = cluster_graph[i][kar_cluster]['weight']
        kar_weights.append((i, kar_weight))
        
kar_weights.sort(key=lambda x: x[1], reverse=True)

top_5_kar_indexes = [tup[0] for tup in kar_weights[:5]]
print(top_5_kar_indexes)
top_5_kar_clusters = [clusters[i] for i in top_5_kar_indexes]

KAR2 Cluster: 26
[70, 73, 67, 81, 164]


In [35]:
top_5_overall = list(set(top_5_ire_indexes + top_5_hac_indexes + top_5_kar_indexes))
print(top_5_overall)

[67, 164, 70, 73, 81, 26, 61]


In [37]:
# Closest cluster to both relevant clusters
print(",".join(clusters[70]))

YHR030C,YDL095W,YBR015C,YAL023C,YJL062W,YGL027C,YGR166W,YBR229C,YML019W,YCR017C


## Draw Cluster Graph

In [38]:
cluster_network = pyvis.network.Network(notebook = True, bgcolor="#222222", font_color="white")

# Set up nodes and edges

cluster_network.inherit_edge_colors(False)

for node in top_5_overall:
    cluster_network.add_node(node, label=str(node))
    if (node == 73):
        cluster_network.get_node(73)['label'] = "IRE1"
    if (node == 26):
        cluster_network.get_node(26)['label'] = "HAC1/KAR2"
    if (node == 70):
        cluster_network.get_node(70)['label'] = "Nearest cluster"

# Add edges
for i in range(len(top_5_overall)):
    for j in range(i):
        n1 = top_5_overall[i]
        n2 = top_5_overall[j]
        cluster_network.add_edge(top_5_overall[i], top_5_overall[j], value=cluster_graph[n1][n2]['weight'], title = str(round(cluster_graph[n1][n2]['weight'], 2)), color='#9EC3F7')


# Highlight specific nodes

cluster_network.get_node(70)['color'] = "#07f507" # Desired cluster

cluster_network.get_node(73)['color'] = "#e33a02" # IRE1
cluster_network.get_node(26)['color'] = "#f2f25c" # HAC1 + KAR2

# Physics parameters
cluster_network.barnes_hut(gravity = -4000)
cluster_network.show("cluster.html", local=False, notebook=True)


cluster.html
