In [114]:
import dgl
import numpy as np
import pandas as pd
import networkx as nx
from dgl.data.utils import save_graphs

In [42]:
drug_gene = pd.read_csv('../data/clean/drug-gene-all.edges')
gene_disease = pd.read_csv('../data/clean/gene-disease-association.edges',header=None)
gene_gene = pd.read_csv('../data/clean/gene-gene-association.edges',header=None)

drug_disease = pd.read_csv('../data/clean/drug-disease-treat.edges')

genes = pd.read_csv('../data/clean/genes.nodes', header=None)
diseases = pd.read_csv('../data/clean/diseases.nodes', header=None)

### Map gene names to numbers

In [43]:
gene_num_dict = {gene:num for (gene, num) in zip(genes[0], range(genes.shape[0]))}

In [44]:
drug_gene['gene_num'] = drug_gene['gene_names'].map(gene_num_dict)
drug_gene.head()

Unnamed: 0,compound_id,gene_names,mode,gene_num
0,4768,LTB4R,binding,4032
1,5709,CCKBR,binding,1152
2,21800,PPARA,binding,5453
3,3878,TMPRSS7,inhibition,7201
4,5361092,OPRK1,binding,4914


In [46]:
gene_gene['gene1_num'] = gene_gene[0].map(gene_num_dict)
gene_gene['gene2_num'] = gene_gene[1].map(gene_num_dict)
gene_gene.head()

Unnamed: 0,0,1,2,gene1_num,gene2_num
0,ARF5,AP4S1,binding,661,618
1,ARF5,AP4S1,reaction,661,618
2,ARF5,YKT6,binding,661,7747
3,ARF5,YKT6,catalysis,661,7747
4,ARF5,YKT6,reaction,661,7747


### Map diseases to numbers

In [48]:
diseases_num_dict = {disease:num for (disease, num) in zip(diseases[0], range(diseases.shape[0]))}

In [51]:
gene_disease['disease_num'] = gene_disease[1].map(diseases_num_dict)
gene_disease.head()

Unnamed: 0,0,1,gene_num,disease_num
0,A1BG,C0036341,0,523
1,NAT1,C0001973,4545,14
2,NAT1,C0005684,4545,74
3,NAT1,C0033578,4545,474
4,NAT1,C0376358,4545,982


In [54]:
drug_disease['disease_num'] = drug_disease['diseases'].map(diseases_num_dict)
drug_disease.head()

Unnamed: 0,compound_id,diseases,disease_num
0,224004,C0006142,84
1,2333,C0018099,233
2,2333,C0740394,1091
3,15671,C0020538,276
4,115358,C0024530,357


### Construct graph using dgl

In [87]:
graph_dict = {}

In [88]:
gene_disease_list = [(gene_num, disease_num) for (gene_num, disease_num) in 
                    zip(gene_disease['gene_num'], gene_disease['disease_num'])]
gene_disease_list[:10]

[(0, 523),
 (4545, 14),
 (4545, 74),
 (4545, 474),
 (4545, 982),
 (4546, 74),
 (4546, 84),
 (4546, 474),
 (4546, 962),
 (4546, 982)]

In [89]:
graph_dict['gene', 'associate', 'disease'] = gene_disease_list

#### gene-gene interaction

In [90]:
gene_gene_list = [(gene_num1, gene_num2) for (gene_num1, gene_num2) in 
                    zip(gene_gene['gene1_num'], gene_gene['gene2_num'])]
gene_gene_list[:10]

[(661, 618),
 (661, 618),
 (661, 7747),
 (661, 7747),
 (661, 7747),
 (661, 7626),
 (661, 7626),
 (661, 659),
 (661, 659),
 (661, 659)]

In [91]:
gene_interaction_types = gene_gene[2].unique()
gene_interaction_types

array(['binding', 'reaction', 'catalysis', 'activation', 'inhibition',
       'ptmod', 'expression'], dtype=object)

Subset gene_gene according to interaction type

In [92]:
for interaction in gene_interaction_types:
    subset = gene_gene[gene_gene[2] == interaction]
    subset_list = [(gene_num1, gene_num2) for (gene_num1, gene_num2) in 
                    zip(subset['gene1_num'], subset['gene2_num'])]
    graph_dict[('gene', interaction, 'gene')] = subset_list

In [93]:
graph_dict.keys()

dict_keys([('gene', 'associate', 'disease'), ('gene', 'binding', 'gene'), ('gene', 'reaction', 'gene'), ('gene', 'catalysis', 'gene'), ('gene', 'activation', 'gene'), ('gene', 'inhibition', 'gene'), ('gene', 'ptmod', 'gene'), ('gene', 'expression', 'gene')])

In [94]:
g = dgl.heterograph(graph_dict)

In [118]:
g

Graph(num_nodes={'disease': 1246, 'gene': 8321},
      num_edges={('gene', 'associate', 'disease'): 11413, ('gene', 'binding', 'gene'): 212652, ('gene', 'reaction', 'gene'): 224514, ('gene', 'catalysis', 'gene'): 207576, ('gene', 'activation', 'gene'): 67330, ('gene', 'inhibition', 'gene'): 22984, ('gene', 'ptmod', 'gene'): 25042, ('gene', 'expression', 'gene'): 14193},
      metagraph=[('gene', 'disease'), ('gene', 'gene'), ('gene', 'gene'), ('gene', 'gene'), ('gene', 'gene'), ('gene', 'gene'), ('gene', 'gene'), ('gene', 'gene')])