In [11]:
import pandas as pd
from graph_tool.all import *
from collections import defaultdict
import numpy as np
import matplotlib as plt

# Load data

gene_block_dict = {}

location = "/mnt/c/Users/sauba/Desktop/20.RERConverge_trimmed/3.New_colias/61.New_alignment_colias/12.Rho_networks/2.Diapausing"
file_name = "Filtered_set_core_set"

df = pd.read_csv(f"{location}/{file_name}.csv")
# df = df[(df['Correlation'] > 0.4) & (df['Correlation'] < 1.0)]
df = df[df['Correlation'] < 1.0]


g = Graph(directed=False)

name_prop = g.new_vertex_property("string")
weight = g.new_edge_property("float")
pen_width = g.new_edge_property("float")
edge_label = g.new_edge_property("string")

vsize = g.new_vertex_property("float")
vsize.a = 1

min_w, max_w = 1.0, 5.0
min_corr = 0.4
max_corr = 0.99


node_map = {}
def get_vertex(name):
    if name not in node_map:
        v = g.add_vertex()
        node_map[name] = v
        name_prop[v] = name
    return node_map[name]

added_edges = set()

for _, row in df.iterrows():
    
    source = row['Annotation_gene1']
    target = row['Annotation_gene2']
    if type(source)== float:
       source = "nan" 
    if type(target)== float:
       target = "nan" 
    try:
        key = tuple(sorted([source, target]))
    except:
        print(source,target, row)
        assert False

    if key in added_edges:
        continue
    added_edges.add(key)
    v1 = get_vertex(source)
    v2 = get_vertex(target)
    
    if row["Correlation"] > 0.4 and row["Correlation"] < 1.0:
        corr = row["Correlation"]
        e = g.add_edge(v1, v2)
        weight[e] = corr
        scaled = min_w + (corr - min_corr) / (max_corr - min_corr) * (max_w - min_w)
        pen_width[e] = scaled
        edge_label[e] = f"{corr:.2f}"


# deg = g.degree_property_map("total")
# max_deg = max(deg.a) if max(deg.a) > 0 else 1
# color = g.new_vertex_property("vector<float>")
# for v in g.vertices():
#     d = deg[v]
#     color[v] = [d / max_deg, 0.2, 1.0 - d / max_deg, 1.0]  # RGBA
#     # color[v] = [x * 0.1 for x in range(0, 10)]

deg = g.degree_property_map("total")
max_deg = max(deg.a) if max(deg.a) > 0 else 1
color = g.new_vertex_property("vector<float>")



# Choose a colormap, for example 'viridis', 'plasma', 'magma', 'cividis', 'inferno'
# You can explore other colormaps in matplotlib's documentation.
cmap = plt.colormaps['Spectral_r']
list_of_degree = []
degree_list_plus = []
for v in g.vertices():
    d = deg[v]
    list_of_degree.append(d)
    degree_list_plus.append((name_prop[v],d))
    # Normalize degree to be between 0 and 1 for colormap mapping
    log_deg = np.log1p(d) # log1p(x) is log(1+x)
    max_log_deg = np.log1p(max_deg)
    normalized_degree = log_deg / max_log_deg if max_log_deg > 0 else 0
    # normalized_degree = d / max_deg if max_deg > 0 else 0
    
    # Get RGBA color from the colormap
    rgba_color = cmap(normalized_degree)
    color[v] = rgba_color # Assign the RGBA tuple directly

# assert False


# --- COMMUNITY DETECTION ---
from graph_tool.inference import minimize_blockmodel_dl
import matplotlib.cm as cm
import numpy as np

# Detect communities (blocks)
for i in range(100):
    print("Run ",i, end = " ")
    state = minimize_blockmodel_dl(g)
    blocks = state.get_blocks()
    
    # Color nodes by block
    block_ids = blocks.a
    num_blocks = len(set(block_ids))
    
    
    block_dict = defaultdict(list)
    
    for v in g.vertices():
    
        block_id = blocks[v]
        gene_name = name_prop[v]
        block_dict[block_id].append(gene_name)
    for key, value in block_dict.items():
        # print(key,value)
        # assert False
        for gene_name_in_cluster in value:
            if gene_name_in_cluster not in gene_block_dict:
                gene_block_dict[gene_name_in_cluster] = []
            gene_block_dict[gene_name_in_cluster].append(key)

# --- CLUSTER-AWARE LAYOUT ---
# This will pull clusters apart based on community membership
pos = sfdp_layout(g, groups=blocks)

# Draw graph with community-based colors
# file_name = "rho_set_non_dia"
# graph_draw(g,
#            pos=pos,  # <-- use the new layout
#            vertex_text=name_prop,
#            vertex_fill_color=color,
#            vertex_font_size=10,
#            vertex_size=vsize,
#            edge_pen_width=pen_width,
#            edge_text=edge_label,
#            edge_font_size=8,
#            # output_size=(800, 800),
#            output_size=(2000, 2000),
#            output=f"{location}/{file_name}.png") 
# output = "Gene"
# for i in range(100):
#     output += f",Run_{i+1}"
# for key, value in gene_block_dict.items():
#     output += f"\n{key},{','.join(value)}"
    
# with open(f"/mnt/c/Users/sauba/Desktop/20.RERConverge_trimmed/3.New_colias/61.New_alignment_colias/19. Graph tool/SBM_100_runs_cluster.csv", 'w') as out_file:
#     out_file.write(output)
# output = ''
# with open(f"{location}/{file_name}_degree.csv", 'w') as out_file:
#     for degree_num in degree_list_plus:
#         output +=  str(degree_num[0])+","+ str(degree_num[1])+ "\n"
#     # output = "\n".join(list_of_degree)
#     out_file.write(output)

# import matplotlib.pyplot as plt
# plt.hist(list_of_degree, bins=max(list_of_degree), edgecolor='black')

# # Add labels
# plt.title('Histogram')
# plt.xlabel('Degree')
# plt.ylabel('Count')

# # Show plot
# plt.savefig(f"{location}/{file_name}_degree_histogram_output.pdf", format='pdf', bbox_inches='tight')
# plt.close()

# print(degree_list_plus)

Run  0 Run  1 Run  2 Run  3 Run  4 Run  5 Run  6 Run  7 Run  8 Run  9 Run  10 Run  11 Run  12 Run  13 Run  14 Run  15 Run  16 Run  17 Run  18 Run  19 Run  20 Run  21 Run  22 Run  23 Run  24 Run  25 Run  26 Run  27 Run  28 Run  29 Run  30 Run  31 Run  32 Run  33 Run  34 Run  35 Run  36 Run  37 Run  38 Run  39 Run  40 Run  41 Run  42 Run  43 Run  44 Run  45 Run  46 Run  47 Run  48 Run  49 Run  50 Run  51 Run  52 Run  53 Run  54 Run  55 Run  56 Run  57 Run  58 Run  59 Run  60 Run  61 Run  62 Run  63 Run  64 Run  65 Run  66 Run  67 Run  68 Run  69 Run  70 Run  71 Run  72 Run  73 Run  74 Run  75 Run  76 Run  77 Run  78 Run  79 Run  80 Run  81 Run  82 Run  83 Run  84 Run  85 Run  86 Run  87 Run  88 Run  89 Run  90 Run  91 Run  92 Run  93 Run  94 Run  95 Run  96 Run  97 Run  98 Run  99 

In [12]:
output = "Gene"
for i in range(100):
    output += f",Run_{i+1}"
for key, value in gene_block_dict.items():
    output += f"\n{key}"
    for block_number in value:
        output += f",{str(block_number)}"
    
with open(f"/mnt/c/Users/sauba/Desktop/20.RERConverge_trimmed/3.New_colias/61.New_alignment_colias/19.Graph tool/SBM_100_runs_cluster.csv", 'w') as out_file:
    out_file.write(output)