In [7]:
import networkx as nx
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import igraph as ig
import pygraphviz
import cairo
from pyvis import network as net
from netgraph import Graph
from netgraph import InteractiveGraph

# Define directories
mainDir = r"/Users/rudi/Box Sync/GlycoAI/" #change this line to your folder structure
dataDir = ''.join([mainDir, 'geneNetGlyco/data/mutual_info/'])
figureDir = ''.join([mainDir, 'geneNetGlyco/figures/'])
glycoOntoDir = ''.join([mainDir, 'Data/GlycoEnzOnto/paper/'])

# Load Glycopathway dictionary 
GlycoEnzOntoFile = ''.join([glycoOntoDir, 'GlycoPathShort.json'])
with open(GlycoEnzOntoFile, 'r') as json_file:
    glycoSet = json.load(json_file)
    
glycoSet['core1-2'] = glycoSet.pop('core1/2')
glycoSet['core3-4'] = glycoSet.pop('core3/4')

In [None]:
#User-defined parameters
#Cutoff: percentile cutoff to clip the TF-gene scores
cutoff = 0.90

#Number of edges shown in the graph
nedge = 200; #total edges shown
min_edge = 5; #min edges per genes

#Min and Max edge weights
minW = 0.5; #min weight
maxW = 2; #max weight 

#Load data
fname = ''.join([dataDir, 'MI_All.txt']) #this is using "all" cells
df = pd.read_csv(fname, sep='\t')

for glycopath, value in glycoSet.items():
    #Initialize dataframe for drawing
    dfDraw = pd.DataFrame()

    #Trim TF-gene data to the glycopathway genes up to the selected number of edges
    #node_edge = round(nedge/len(glycoSet[glycopath]))
    #node_edge = max(node_edge, min_edge)
    node_edge = min_edge
    for gene in glycoSet[glycopath]:
        add = df[df['Gene']==gene].copy()
        add = add.sort_values('Score', ascending=False)
        add = add[0:node_edge]
        dfDraw = pd.concat([dfDraw, add])
    dfDraw.reset_index(inplace=True, drop=True)
    
    #Use networx to draw graph

    maxScore = dfDraw['Score'].max()
    minScore = dfDraw['Score'].min()
    dfDraw['weight'] = (dfDraw['Score']-minScore)/(maxScore-minScore)*(maxW-minW)+minW

    edges = pd.DataFrame(
        {
            "source": dfDraw['TF'].to_list(),
            "target": dfDraw['Gene'].to_list(),
            "weight": dfDraw['weight'].to_list()
        }
    )
    G = nx.from_pandas_edgelist(edges, edge_attr=True, create_using=nx.DiGraph())

    #Label the nodes
    for i, node in enumerate(set(G.nodes)):    
        if node in set(glycoSet[glycopath]):
            G.nodes[node]['group'] = 1
            G.nodes[node]['level'] = 0
            G.nodes[node]['title'] = 'Glycogene' 
        else:
            G.nodes[node]['group'] = 2
            G.nodes[node]['level'] = 1
            G.nodes[node]['title'] = 'TF'
    
    gene_nodes = [n for n,v in G.nodes(data=True) if v['title'] == 'Glycogene']  
    tf_nodes = [n for n,v in G.nodes(data=True) if v['title'] == 'TF']  

    #Set position of nodes
    scale = 0.002
    gene_radius = scale*max(750, 200/(2*np.pi/len(gene_nodes)))
    tf_radius = scale*max(1500, 1.9 * gene_radius)

    position = {}
    node_color = {}
    for i, node in enumerate(gene_nodes):
        x = gene_radius*np.cos(2*np.pi/len(gene_nodes)*i)
        y = gene_radius*np.sin(2*np.pi/len(gene_nodes)*i)
        position[node] = [x, y]
        node_color[node] = "pink"

    for i, node in enumerate(tf_nodes):
        x = tf_radius*np.cos(2*np.pi/len(tf_nodes)*i)
        y = tf_radius*np.sin(2*np.pi/len(tf_nodes)*i)
        position[node] = [x, y]
        node_color[node] = "lightblue"
    
    #Draw graph using netgraph package
    fig=plt.figure()
    #Graph(G, node_layout=position, node_size=20, node_labels=True, node_color=node_color, node_label_fontdict=dict(size=5), edge_layout="straight", edge_color="darkslategrey", arrows=True)
    Graph(G, node_layout=position, node_size=20, node_labels=True, node_color=node_color, node_label_fontdict=dict(size=5), edge_width=3, arrows=True)
    plt.savefig(''.join([figureDir, 'PNG/', glycopath, '.png']), dpi=600)
    plt.text(-len(glycopath)/2*scale*80,0,glycopath, fontsize=12)
    
    #Create graph using pyvis
    #Compute positions
    scale = 0.25
    gene_radius = scale*max(750, 150/(2*np.pi/len(gene_nodes)))
    tf_radius = scale*max(1500, 1.9 * gene_radius)

    g = net.Network('600px', '600px', directed=False, notebook=True)
    value = 50 #size of nodes

    node_dict = {}

    #Add nodes
    for i, node in enumerate(gene_nodes):
        x = gene_radius*np.cos(2*np.pi/len(gene_nodes)*i)
        y = gene_radius*np.sin(2*np.pi/len(gene_nodes)*i)
        g.add_nodes([i], title=['Glycogene'], value=[value], label=[node], color=['pink'], x=[x], y=[y])
        node_dict[node] = i

    for j, node in enumerate(tf_nodes):
        x = tf_radius*np.cos(2*np.pi/len(tf_nodes)*j)
        y = tf_radius*np.sin(2*np.pi/len(tf_nodes)*j)
        g.add_nodes([i+j+1], title=['TF'], value=[value], label=[node], color=['lightblue'], x=[x], y=[y])
        node_dict[node] = i+j+1

    #Add edges
    maxScore = dfDraw['Score'].max()
    minScore = dfDraw['Score'].min()

    for k, value in dfDraw.iterrows():
        tf = value["TF"]
        gene = value["Gene"]
        tf_id = node_dict[tf]
        gene_id = node_dict[gene]
        width = value["Score"]/maxScore*(maxW-minW)+minW
        #print(width)
        g.add_edge(tf_id, gene_id, width=width, arrowStrikethrough=False, color="darkslategrey")

    for n in g.nodes:
        n.update({'physics': False})
    g.show(''.join([figureDir,'HTML/', glycopath, '.html']))

In [None]:
#Create graph using pyvis
#Compute positions
scale = 0.25
gene_radius = scale*max(750, 150/(2*np.pi/len(gene_nodes)))
tf_radius = scale*max(1500, 1.9 * gene_radius)

g = net.Network('300px', '300px', directed=False, notebook=True)
value = 50 #size of nodes

node_dict = {}

#Add nodes
for i, node in enumerate(gene_nodes):
    x = gene_radius*np.cos(2*np.pi/len(gene_nodes)*i)
    y = gene_radius*np.sin(2*np.pi/len(gene_nodes)*i)
    g.add_nodes([i], title=['Glycogene'], value=[value], label=[node], color=['pink'], x=[x], y=[y])
    node_dict[node] = i
   
for j, node in enumerate(tf_nodes):
    x = tf_radius*np.cos(2*np.pi/len(tf_nodes)*j)
    y = tf_radius*np.sin(2*np.pi/len(tf_nodes)*j)
    g.add_nodes([i+j+1], title=['TF'], value=[value], label=[node], color=['lightblue'], x=[x], y=[y])
    node_dict[node] = i+j+1

#Add edges
minW = 0.5; #min edge thickness
maxW = 4; #max edge thickness
maxScore = dfDraw['Score'].max()
minScore = dfDraw['Score'].min()

for k, value in dfDraw.iterrows():
    tf = value["TF"]
    gene = value["Gene"]
    tf_id = node_dict[tf]
    gene_id = node_dict[gene]
    width = value["Score"]/maxScore*(maxW-minW)+minW
    #print(width)
    g.add_edge(tf_id, gene_id, width=width, arrowStrikethrough=False, color="darkslategrey")

for n in g.nodes:
    n.update({'physics': False})
g.show(''.join([figureDir,'HTML/', glycopath, '.html']))

In [16]:
#Top TFs 
#User-defined parameters
#Cutoff: percentile cutoff to clip the TF-gene scores
cutoff = 0.90

#Number of edges shown in the graph
nedge = 200; #total edges shown
min_edge = 100; #min edges per genes


#Load data
fname = ''.join([dataDir, 'MI_All.txt']) #this is using "all" cells
df = pd.read_csv(fname, sep='\t')

#for glycopath, value in glycoSet.items():
glycopath = 'Dolichol'
dfDraw = pd.DataFrame()

#Trim TF-gene data to the glycopathway genes up to the selected number of edges
#node_edge = round(nedge/len(glycoSet[glycopath]))
#node_edge = max(node_edge, min_edge)
node_edge = min_edge
for gene in glycoSet[glycopath]:
    add = df[df['Gene']==gene].copy()
    add = add.sort_values('Score', ascending=False)
    add = add[0:node_edge]
    dfDraw = pd.concat([dfDraw, add])
dfDraw.reset_index(inplace=True, drop=True)
    


In [17]:
dfDraw

Unnamed: 0,TF,Gene,Score
0,PRDM9,ALG1L2,1.797693e+308
1,KMT2A,ALG1L2,2.030257e-04
2,JUN,ALG1L2,2.023507e-04
3,STAT3,ALG1L2,1.974711e-04
4,H2AZ1,ALG1L2,1.915384e-04
...,...,...,...
2495,MXD4,ALG3,1.324520e-03
2496,ETS1,ALG3,1.307868e-03
2497,HDAC1,ALG3,1.306133e-03
2498,ZNF511,ALG3,1.303521e-03


In [18]:
dfDraw['TF'].value_counts()

STAT3     25
YY1       25
EP300     25
SMC3      25
NR3C1     25
          ..
CNOT3      1
CDK9       1
NFKB2      1
BCL11B     1
GTF2A2     1
Name: TF, Length: 329, dtype: int64

In [14]:
len(dfDraw['Gene'].unique())

25