In [1]:
import networkx as nx

import numpy as np
import pandas as pd
import csv

import pyvis.network

## Read in Edgelist

In [2]:
yeast_file = "yeast_network.txt"

G_exp = nx.read_edgelist(yeast_file,comments="#",nodetype=str)

## Read in CSV

In [3]:
clusters = []
csv_reader = csv.reader(open("dpclus_complexes.csv"))
for cluster in csv_reader:
    clusters.append(cluster)

In [4]:
clusters

[['YDR500C',
  'YGR148C',
  'YIL052C',
  'YOL120C',
  'YLR167W',
  'YKL180W',
  'YDR447C',
  'YPL143W',
  'YNL002C',
  'YLR325C',
  'YPR132W',
  'YHR141C',
  'YBL087C',
  'YBL027W',
  'YKR057W',
  'YOR298C-A',
  'YHR203C',
  'YOL121C',
  'YLR029C',
  'YFR031C-A',
  'YOR167C',
  'YGL135W',
  'YHL033C',
  'YML063W',
  'YLR264W',
  'YLR150W',
  'YOL040C',
  'YOR096W',
  'YPL249C-A',
  'YPR102C',
  'YMR242C',
  'YGL076C',
  'YMR121C',
  'YBR181C',
  'YNL067W',
  'YOR312C',
  'YNL255C',
  'YLR048W',
  'YJR145C',
  'YJL136C',
  'YJL189W',
  'YML024W',
  'YLR388W',
  'YGR034W',
  'YIL148W',
  'YPL131W',
  'YGL031C',
  'YBR048W',
  'YNL001W',
  'YLR344W',
  'YPL198W',
  'YDR064W',
  'YLR287C-A',
  'YLR185W',
  'YNL301C',
  'YBR189W',
  'YGL030W',
  'YLR249W',
  'YBL072C',
  'YDL191W',
  'YGL173C',
  'YDR012W',
  'YER131W',
  'YBL092W',
  'YIR010W',
  'YOL127W',
  'YDR450W',
  'YNL178W',
  'YDL136W',
  'YDL184C',
  'YER074W',
  'YJR094W-A',
  'YHR010W',
  'YIL069C',
  'YPL090C',
  'YCR031C',
  

In [5]:
ire1 = "YHR079C"
kar2 = "YJL034W"
yos9 = "YDR057W"
hac1 = "YFL031W"

## Create Cluster Graph

In [6]:
def cluster_weight(cluster1, cluster2, G):
    # Number of connected pairs / number of pairs in total
    total_num = len(cluster1) * len(cluster2)
    connected_num = 0.0
    for node1 in cluster1:
        for node2 in cluster2:
            if (node1, node2) in G.edges:
                connected_num += 1
    return connected_num / total_num

In [7]:
cluster_weight(clusters[0], clusters[3], G_exp)

0.0244173140954495

In [8]:
cluster_graph = nx.Graph()

In [9]:
cluster_weighted_edges = []

# Assume clusters are sorted
for i in range(len(clusters)):
    for j in range(i+1, len(clusters)):
        cluster_weighted_edges.append((i, j, cluster_weight(clusters[i], clusters[j], G_exp)))

In [10]:
cluster_graph.add_weighted_edges_from(cluster_weighted_edges)

## Find IRE1 Cluster

In [11]:
ire_clusters = []

for i in range(len(clusters)):
    if ire1 in clusters[i]:
        ire_clusters.append(i)
        
print(ire_clusters)

[73]


In [12]:
ire_weights = []
ire_cluster = ire_clusters[0]

for i in range(len(clusters)):
    if i == ire_cluster:
        continue
    else:
        ire_weight = cluster_graph[i][ire_cluster]['weight']
        ire_weights.append((i, ire_weight))

In [13]:
ire_weights.sort(key=lambda x: x[1], reverse=True)

In [14]:
top_5_ire_indexes = [tup[0] for tup in ire_weights[:5]]

In [15]:
top_5_ire_indexes

[70, 164, 67, 26, 61]

In [16]:
top_5_ire_clusters = [clusters[i] for i in top_5_ire_indexes]

In [17]:
for cluster in top_5_ire_clusters:
    print(','.join(cluster))

YHR030C,YDL095W,YBR015C,YAL023C,YJL062W,YGL027C,YGR166W,YBR229C,YML019W,YCR017C
YMR238W,YGR229C,YMR307W,YKL190W,YDL006W,YNL322C,YDR182W
YAL058W,YDR414C,YMR200W,YBR015C,YHR101C,YJR075W,YJL034W,YMR214W,YBR229C,YOR336W
YOR085W,YOR002W,YGL226C-A,YFL031W,YOR067C,YOR103C,YPL227C,YGR227W,YJL002C,YMR149W,YGL022W,YEL002C,YBL082C,YNR030W,YML019W,YNL219C
YLR212C,YOR008C,YDR293C,YBL032W,YHR030C,YBR160W,YBL105C,YNL298W,YNL188W,YJL095W,YER111C


## HAC1 Clusters

In [18]:
hac_clusters = []

for i in range(len(clusters)):
    if hac1 in clusters[i]:
        hac_clusters.append(i)
        
hac_cluster = hac_clusters[0]
print(hac_cluster)

hac_weights = []

for i in range(len(clusters)):
    if i == hac_cluster:
        continue
    else:
        hac_weight = cluster_graph[i][hac_cluster]['weight']
        hac_weights.append((i, hac_weight))
        
hac_weights.sort(key=lambda x: x[1], reverse=True)

top_5_hac_indexes = [tup[0] for tup in hac_weights[:5]]
print(top_5_hac_indexes)
top_5_hac_clusters = [clusters[i] for i in top_5_hac_indexes]
for cluster in top_5_hac_clusters:
    print(','.join(cluster))

26
[70, 73, 67, 81, 164]
YHR030C,YDL095W,YBR015C,YAL023C,YJL062W,YGL027C,YGR166W,YBR229C,YML019W,YCR017C
YFL025C,YHR030C,YML008C,YFL031W,YMR307W,YJR073C,YGR167W,YGL084C,YCR044C,YHR079C
YAL058W,YDR414C,YMR200W,YBR015C,YHR101C,YJR075W,YJL034W,YMR214W,YBR229C,YOR336W
YLR378C,YOR254C,YPL094C,YML055W,YER087C-B,YEL031W,YLR292C,YJR117W,YBR171W
YMR238W,YGR229C,YMR307W,YKL190W,YDL006W,YNL322C,YDR182W


## KAR2 Clusters

In [19]:
kar_clusters = []

for i in range(len(clusters)):
    if kar2 in clusters[i]:
        kar_clusters.append(i)

kar_cluster = hac_clusters[0]
print(kar_cluster)
kar_weights = []

for i in range(len(clusters)):
    if i == kar_cluster:
        continue
    else:
        kar_weight = cluster_graph[i][kar_cluster]['weight']
        kar_weights.append((i, kar_weight))
        
kar_weights.sort(key=lambda x: x[1], reverse=True)

top_5_kar_indexes = [tup[0] for tup in kar_weights[:5]]
print(top_5_kar_indexes)
top_5_kar_clusters = [clusters[i] for i in top_5_kar_indexes]
for cluster in top_5_kar_clusters:
    print(','.join(cluster))

26
[70, 73, 67, 81, 164]
YHR030C,YDL095W,YBR015C,YAL023C,YJL062W,YGL027C,YGR166W,YBR229C,YML019W,YCR017C
YFL025C,YHR030C,YML008C,YFL031W,YMR307W,YJR073C,YGR167W,YGL084C,YCR044C,YHR079C
YAL058W,YDR414C,YMR200W,YBR015C,YHR101C,YJR075W,YJL034W,YMR214W,YBR229C,YOR336W
YLR378C,YOR254C,YPL094C,YML055W,YER087C-B,YEL031W,YLR292C,YJR117W,YBR171W
YMR238W,YGR229C,YMR307W,YKL190W,YDL006W,YNL322C,YDR182W


In [20]:
top_5_overall = list(set(top_5_ire_indexes + top_5_hac_indexes + top_5_kar_indexes))

In [21]:
clusters[70]

['YHR030C',
 'YDL095W',
 'YBR015C',
 'YAL023C',
 'YJL062W',
 'YGL027C',
 'YGR166W',
 'YBR229C',
 'YML019W',
 'YCR017C']

## Draw Cluster Graph

In [23]:
cluster_network = pyvis.network.Network(notebook = True, bgcolor="#222222", font_color="white")

# Set up nodes and edges

cluster_network.inherit_edge_colors(False)

for node in top_5_overall:
    cluster_network.add_node(node, label=str(node))
    if (node == 73):
        cluster_network.get_node(73)['label'] = "IRE1"
    if (node == 26):
        cluster_network.get_node(26)['label'] = "HAC1/KAR2"
    if (node == 70):
        cluster_network.get_node(70)['label'] = "Nearest cluster"

# Add edges
for i in range(len(top_5_overall)):
    for j in range(i):
        n1 = top_5_overall[i]
        n2 = top_5_overall[j]
        cluster_network.add_edge(top_5_overall[i], top_5_overall[j], value=cluster_graph[n1][n2]['weight'], title = str(round(cluster_graph[n1][n2]['weight'], 2)), color='#9EC3F7')


# Highlight specific nodes

cluster_network.get_node(70)['color'] = "#07f507" # Desired cluster

cluster_network.get_node(73)['color'] = "#e33a02" # IRE1
cluster_network.get_node(26)['color'] = "#f2f25c" # HAC1 + KAR2

# Physics parameters
cluster_network.barnes_hut(gravity = -4000)
cluster_network.show("cluster.html", local=False, notebook=True)


cluster.html
