In [5]:
from __future__ import division
from __future__ import print_function
from operator import itemgetter
from itertools import combinations
import os
import time
import tensorflow as tf
import numpy as np
import pandas as pd
import networkx as nx
import scipy.sparse as sp
from sklearn import metrics
import matplotlib.pyplot as plt

In [6]:
from decagon.deep.optimizer import DecagonOptimizer
from decagon.deep.model import DecagonModel
from decagon.deep.minibatch import EdgeMinibatchIterator
from decagon.utility import rank_metrics, preprocessing

In [8]:
gene_net =  pd.read_csv("data/bio-decagon-ppi.csv")
print(gene_net.head())
G = nx.Graph()
for i in range(len(gene_net['Gene 1'])):
    G.add_edge(gene_net['Gene 1'][i], gene_net['Gene 2'][i])

n_genes = max(len(pd.unique(gene_net['Gene 1'])), len(pd.unique(gene_net['Gene 2'])))
gene_adj = nx.adjacency_matrix(G)
gene_degrees = np.array(gene_adj.sum(axis=0)).squeeze()

# type(gene_net)
# print(gene_adj)

   Gene 1  Gene 2
0  114787  375519
1  114787  285613
2  114787    7448
3  114787    4914
4  114787   51343


In [9]:
class my_dictionary(dict):
    def __init__(self):
        self = dict
    
    def add(self,key,value):
        self[key] = value

In [10]:
gene_drug_net = pd.read_csv("data/bio-decagon-targets-all.csv")
print(gene_drug_net.head())
n_drugs =  max(len(pd.unique(gene_drug_net['STITCH'])), len(pd.unique(gene_drug_net['Gene'])))
print(n_drugs)
List_of_nums = [x for x in range(1, n_drugs)]
gene_drug_net.head()
dict_obj = my_dictionary()
i = 1 
for drug in gene_drug_net['STITCH']:
    if drug not in dict_obj:
        dict_obj.add( drug , i)
        i += 1
gene_drug_graph = nx.Graph()
for i in range(len(gene_drug_net['Gene'])):
    gene_drug_graph.add_edge(gene_drug_net['Gene'][i], dict_obj[gene_drug_net['STITCH'][i]])
gene_drug_adj = nx.adjacency_matrix(gene_drug_graph)
drug_gene_adj = gene_drug_adj.transpose(copy=True)


         STITCH  Gene
0  CID000060752  3757
1  CID006918155  2908
2  CID103052762  3359
3  CID023668479  1230
4  CID000028864  1269
7795


In [15]:

import pandas as pd

drug_drug_net =  pd.read_csv("data/bio-decagon-combo.csv")
for drug in drug_drug_net['STITCH 1']:
    if drug not in dict_obj:
        dict_obj.add( drug , i)
        i += 1

for drug in drug_drug_net['STITCH 2']:
    if drug not in dict_obj:
        dict_obj.add( drug , i)
        i += 1        
n_drugdrug_rel_types = drug_drug_net['Polypharmacy Side Effect'].nunique()

n_drug_types = list(set(drug_drug_net['Polypharmacy Side Effect']))

drug_drug_adj_list = []

for i in range(n_drugdrug_rel_types):
    mat = np.zeros((n_drugs, n_drugs))
    for index, row in drug_drug_net.iterrows():
        if row['Polypharmacy Side Effect'] == n_drug_types[i]:
            mat[dict_obj[row['STITCH 1']], dict_obj[row['STITCH 2']]] = mat[dict_obj[row['STITCH 2']], dict_obj[row['STITCH 1']]] = 1
    drug_drug_adj_list.append(sp.csr_matrix(mat))
drug_degrees_list = [np.array(drug_adj.sum(axis=0)).squeeze() for drug_adj in drug_drug_adj_list]

print("Side Effects count: ", n_drugdrug_rel_types)
print(drug_drug_net.head())
# print(i)
# for drug in drug_drug_net['STITCH 1']:
#     if drug not in dict_obj:
#         dict_obj.add( drug , i)
#         i += 1
# for drug in drug_drug_net['STITCH 2']:
#     if drug not in dict_obj:
#         dict_obj.add( drug , i)
#         i += 1

KeyboardInterrupt: 

In [8]:
drug_drug_graph = nx.Graph()
for i in range(n_drugdrug_rel_types):
    drug1 = drug_drug_net['STITCH 1'][i]
    drug2 = drug_drug_net['STITCH 2'][i]
    drug_drug_graph.add_edge(dict_obj[drug1], dict_obj[drug2])
drug_adj = nx.adjacency_matrix(gene_drug_graph)

In [9]:
drug_drug_adj_list = []
drug_degrees_list = []

In [73]:
tmp = np.dot(drug_gene_adj, gene_drug_adj)
for i in range(n_drugdrug_rel_types):
    mat = np.zeros((n_drugs, n_drugs))
    for d1, d2 in combinations(list(range(n_drugs)), 2):
        if tmp[d1, d2] == i + 4:
            mat[d1, d2] = mat[d2, d1] = 1.
    drug_drug_adj_list.append(sp.csr_matrix(mat))
drug_degrees_list = [np.array(drug__adj.sum(axis=0)).squeeze() for drug__adj in drug_drug_adj_list]

In [10]:
adj_mats_orig = {
    (0, 0): [gene_adj, gene_adj.transpose(copy=True)],
    (0, 1): [gene_drug_adj],
    (1, 0): [drug_gene_adj],
    (1, 1): drug_drug_adj_list + [x.transpose(copy=True) for x in drug_drug_adj_list],
}
degrees = {
    0: [gene_degrees, gene_degrees],
    1: drug_degrees_list + drug_degrees_list,
}

# featureless (genes)
gene_feat = sp.identity(n_genes)
gene_nonzero_feat, gene_num_feat = gene_feat.shape
# print(gene_nonzero_feat, gene_num_feat)
gene_feat = preprocessing.sparse_to_tuple(gene_feat.tocoo())

# features (drugs)
drug_feat = sp.identity(n_drugs)
drug_nonzero_feat, drug_num_feat = drug_feat.shape
drug_feat = preprocessing.sparse_to_tuple(drug_feat.tocoo())

# data representation
num_feat = {
    0: gene_num_feat,
    1: drug_num_feat,
}

nonzero_feat = {
    0: gene_nonzero_feat,
    1: drug_nonzero_feat,
}
feat = {
    0: gene_feat,
    1: drug_feat,
}

edge_type2dim = {k: [adj.shape for adj in adjs] for k, adjs in adj_mats_orig.items()}
edge_type2decoder = {
    (0, 0): 'bilinear',
    (0, 1): 'bilinear',
    (1, 0): 'bilinear',
    (1, 1): 'dedicom',
}

edge_types = {k: len(v) for k, v in adj_mats_orig.items()}
num_edge_types = sum(edge_types.values())
print("Edge types:", "%d" % num_edge_types)


Edge types: 4


In [None]:
###########################################################
#
# Settings and placeholders
#
###########################################################
tf.compat.v1.disable_eager_execution()
flags = tf.compat.v1.flags
FLAGS = flags.FLAGS
val_test_size  = 0.05
# flags.DEFINE_integer('neg_sample_size', 1, 'Negative sample size.')
# flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.')
# flags.DEFINE_integer('epochs', 50, 'Number of epochs to train.')
# flags.DEFINE_integer('hidden1', 64, 'Number of units in hidden layer 1.')
# flags.DEFINE_integer('hidden2', 32, 'Number of units in hidden layer 2.')
# flags.DEFINE_float('weight_decay', 0, 'Weight for L2 loss on embedding matrix.')
# flags.DEFINE_float('dropout', 0.1, 'Dropout rate (1 - keep probability).')
# flags.DEFINE_float('max_margin', 0.1, 'Max margin parameter in hinge loss')
# flags.DEFINE_integer('batch_size', 512, 'minibatch size.')
# flags.DEFINE_boolean('bias', True, 'Bias term.')
# Important -- Do not evaluate/print validation performance every iteration as it can take
# substantial amount of time
PRINT_PROGRESS_EVERY = 150

print("Defining placeholders")
# placeholders = construct_placeholders(edge_types)

###########################################################
#
# Create minibatch iterator, model and optimizer
#
###########################################################

print("Create minibatch iterator")
minibatch = EdgeMinibatchIterator(
    adj_mats=adj_mats_orig,
    feat=feat,
    edge_types=edge_types,
    batch_size=FLAGS.batch_size,
    val_test_size=val_test_size
)

print("Create model")
model = DecagonModel(
    placeholders=placeholders,
    num_feat=num_feat,
    nonzero_feat=nonzero_feat,
    edge_types=edge_types,
    decoders=edge_type2decoder,
)

print("Create optimizer")
with tf.name_scope('optimizer'):
    opt = DecagonOptimizer(
        embeddings=model.embeddings,
        latent_inters=model.latent_inters,
        latent_varies=model.latent_varies,
        degrees=degrees,
        edge_types=edge_types,
        edge_type2dim=edge_type2dim,
        placeholders=placeholders,
        batch_size=FLAGS.batch_size,
        margin=FLAGS.max_margin
    )

In [12]:
print("Initialize session")
feed_dict = {}

Initialize session
