In [79]:
import pandas as pd
import networkx as nx
from typing import List
from pyvis.network import Network
from collections import Counter

In [80]:
file_name = "ld_clump_assoc.txt"  # ld - clump association file
data = pd.read_csv(file_name,delimiter = "\t")

# separating LD clump properties into individual columns
# 1:798400_A_G -> chr : 1 , pos : 798400 , A1 : A , A2 , G

data['LD chr'] = data['LD clump'].apply(lambda x: x.split(':')[0])
data['LD pos'] = data['LD clump'].apply(lambda x: x.split(':')[1].split('_')[0])
data['LD A1'] = data['LD clump'].apply(lambda x: x.split(':')[1].split('_')[1])
data['LD A2'] = data['LD clump'].apply(lambda x: x.split(':')[1].split('_')[2])

# find cpgs with highest number of snp connections in the network to plot and filter

top_n = 5
cpg_ids = [i[0] for i in Counter(data['CpG'].values).most_common()[:top_n]] 
data = data[data['CpG'].isin(cpg_ids)]


In [81]:
# network elements (nodes,edges) 

# NODES 

cpgs:List[str] = data['CpG'].unique() # cpg nodes
snps:List[str]= data['Top SNP'].unique() # snp nodes
lds:List[str] = data['LD clump'].unique() # ld clump nodes


# networkX node format with added color attributes

nodes_cpg = [(cpg, {'color':'blue'} ) for cpg in cpgs] 
nodes_snp = [(snp, {'color':'green'}) for snp in snps]
nodes_LD = [(ld, {'color':'yellow'}) for ld in lds]

# EDGES

cpg_snp_edges = [(cpg,snp) for cpg,snp in zip(data['CpG'],data['Top SNP'])] # cpg-snp
snp_ld_edges = [(snp,ld) for snp,ld in zip(data['Top SNP'],data['LD clump'])]

# create snp - snp link object by ld clump

snp_snp_link_data = data.filter(items=['Top SNP','LD clump'])
snp_snp_link_data.columns = ['source', 'target']
snp_snp_link_data['value'] = 1
snp_snp_links = snp_snp_link_data.to_dict(orient='records')


In [82]:

# initiate network X directed graph 
cpgNet = nx.MultiDiGraph()
cpgNet.add_nodes_from(nodes_cpg)
cpgNet.add_nodes_from(nodes_snp)
cpgNet.add_nodes_from(nodes_LD)
cpgNet.add_edges_from(cpg_snp_edges,color='black')
cpgNet.add_edges_from(snp_ld_edges,color='red')


# initiate pyVis network drawer for web page interactive view
net_gen = Network()
net_gen.from_nx(cpgNet)
net_gen.show('example.html')


