In [1]:
import networkx as nx
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import scipy.sparse as sp
import pandas as pd
import progressbar as pgb
import csv
import random
import os, sys
import shutil

In [9]:
### Hyperparameters of the model.
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.004, '')
flags.DEFINE_integer('feature_num', 3, '')
flags.DEFINE_integer('batch_size', 1, '')
flags.DEFINE_integer('max_steps', 1, '')
flags.DEFINE_boolean('dropout', False, '')
flags.DEFINE_integer('eval_every', 5, '')

In [None]:
def load_humanbase_coex_data(file_name):
    """
    """
    target_file = open(file_name, 'r')
    target_raws = target_file.readlines()
    print("Reading coexpression data {}. . . .".format(file_name))
    target_list = list(map(lambda x : x.rstrip().split('\t') , target_raws))
    
    print("Edge building . . . . .")
    coex_graph = nx.Graph()
    bar = pgb.ProgressBar(max_value=len(target_raws))
    prog = 0
    for i in target_list:
        try:
            coex_graph.add_edge(i[0], i[1], weight=float(i[2]))
        except:
            print("{} is not valid float".format(i[2]))
        prog += 1
        bar.update(prog)
    
    return coex_graph


def update_features_coex(file_name, coex_graph):
    """
    """
    genes = pd.read_csv(file_name)
    disease_name = file_name.rsplit('/')[1].split('_')[0]
    
    print("Update features from {}. . .".format(file_name))
    bar = pgb.ProgressBar(max_value=len(coex_graph.nodes))
    prog = 0
    for v in coex_graph.nodes:
        check_in = False
        for i in range(len(genes)):
            if str(v) == str(genes['geneid'].iloc[i]):
                nx.set_node_attributes(coex_graph, {v :{disease_name : float(genes['score'].iloc[i])}})
                check_in = True
                break
        if not check_in:
            nx.set_node_attributes(coex_graph, {v :{disease_name : 0.0}})
            check_in = False
        prog += 1
        bar.update(prog)
    
    return coex_graph


def visualize_network(net, edge_width_scale=1):
    """
    
    """
    pos = nx.spring_layout(net)
    
    weight_list = []
    for node1, node2, data in net.edges(data=True):
        weight_list.append(data['weight'])
    weight_set = set(weight_list)

    for w in weight_set:
        weighted_edges = [(node1, node2) for (node1, node2, edge_attr) in net.edges(data=True) if edge_attr['weight']==w]
        width = edge_width_scale*w*len(net)/sum(weight_set)
        nx.draw_networkx_edges(net, pos, edge_list=weighted_edges, width=width)
    nx.draw_networkx_nodes(net, pos, node_list=net.nodes(), node_size=20)
    
    plt.axis('off')
    plt.show()
    
    


In [10]:
### Make graph with coexpression data
coex_graph = load_humanbase_coex_data("data/tooth_top_0.15")

Reading coexpression data data/tooth_top_0.15. . . .


  0% (81398 of 9369607) |                | Elapsed Time: 0:00:00 ETA:   0:00:22

Edge building . . . . .


100% (9369607 of 9369607) |##############| Elapsed Time: 0:00:23 ETA:  00:00:00

In [33]:
### Give score features that represent relations with a disease for each vertex.
coex_graph = update_features_coex("data/Periodontitis_genes.csv", coex_graph)
coex_graph = update_features_coex("data/diabetes_genes.csv", coex_graph)
coex_graph = update_features_coex("data/rheumatoid_genes.csv", coex_graph)

  0% (51 of 22684) |                     | Elapsed Time: 0:00:00 ETA:   0:00:45

Update features from data/Periodontitis_genes.csv. . .


  6% (1524 of 22684) |#                  | Elapsed Time: 0:00:02 ETA:   0:00:40

KeyboardInterrupt: 

In [11]:
test = pd.read_csv("data/diabetes_genes.csv")
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1671 entries, 0 to 1670
Data columns (total 18 columns):
Disease           1671 non-null object
Disease_id        1671 non-null object
Gene              1671 non-null object
geneid            1671 non-null int64
UniProt           1527 non-null object
Gene_Full_Name    1671 non-null object
Protein_Class     820 non-null object
N_diseases_g      1671 non-null int64
DSI_g             1671 non-null float64
DPI_g             1671 non-null float64
pLI               1482 non-null float64
score             1671 non-null float64
EL_gda            1671 non-null object
EI_gda            1395 non-null float64
N_PMIDs           1671 non-null int64
N_SNPs_gda        1671 non-null int64
First_Ref         1650 non-null float64
Last_Ref          1650 non-null float64
dtypes: float64(7), int64(4), object(7)
memory usage: 235.1+ KB


In [8]:
#### Tensorflow Session and training.
sess = tf.Session()
sess.run(tf.global_variables_initializer())

LOG_DIR = os.path.join(os.path.dirname(os.path.abspath('')), 'tensorLog')

if os.path.exists(LOG_DIR) == False:
    os.mkdir(LOG_DIR)
else:
    shutil.rmtree(LOG_DIR, ignore_errors=True)
    os.mkdir(LOG_DIR)

merged = tf.summary.merge_all()
writer = tf.summary.FileWriter(LOG_DIR, graph=sess.graph)

GCN_model = graph_cnn(coex_graph, "coex_periodontitis")
is_train_step = tf.placeholder(tf.bool)

optimizer = tf.train.AdamOptimizer(learning_rate)
train_step = optimizer.minimize(GCN_model.loss)

GCN_model.vertex_inputs = GCN_model.reshape(GCN_model.X.shape)
GCN_model.vertex_labels = GCN_model.reshape(GCN_model.Y.shape)

for epoch in range(iterations):
    train_dict = {GCN_model.X: GCN_model.vertex_inputs,
                  GCN_model.Y: GCN_model.vertex_labels,
                  GCN_model.loss_weight: 1,
                  GCN_model.dropout:0.8,
                  is_train_step:True}
    summary, _ = sess.run([merged, train_step], feed_dict=train_dict)
    
    train_loss, train_preds = sess.run([GCN_model.loss, GCN_model.model_output()], feed_dict=train_dict)
    
    writer.add_summary(summary, epoch)
    


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


ValueError: Cannot feed value of shape (22684,) for Tensor 'vertex_input:0', which has shape '(22684, 1)'

In [None]:
GCN_model.labels

In [57]:
np.fromiter(dict(coex_graph.nodes(data='score')).values(), float).shape

(22684,)

In [6]:
### CHECK CONNECTED COMPONENTS OF COEX_GRAPH
nx.algorithms.number_connected_components(coex_graph)

1

In [9]:
np.diag(np.sqrt(np.fromiter(dict(coex_graph.degree).values(), float))) + 1

array([[31.757113  ,  1.        ,  1.        , ...,  1.        ,
         1.        ,  1.        ],
       [ 1.        , 46.08880127,  1.        , ...,  1.        ,
         1.        ,  1.        ],
       [ 1.        ,  1.        , 39.44476557, ...,  1.        ,
         1.        ,  1.        ],
       ...,
       [ 1.        ,  1.        ,  1.        , ...,  2.        ,
         1.        ,  1.        ],
       [ 1.        ,  1.        ,  1.        , ...,  1.        ,
         2.        ,  1.        ],
       [ 1.        ,  1.        ,  1.        , ...,  1.        ,
         1.        ,  2.        ]])

In [6]:
genes = pd.read_csv('diabetes_genes.csv')

genes = genes.drop(genes.index([[10]], axis='index')

In [37]:
nx.set_node_attributes(coex_graph, {'1': {'a':3, 'b':4}})

In [58]:
node_list = list(coex_graph.nodes())
#for n in node_list:
#    print(n,end="::")

In [19]:
nx.classes.function.neighbors(coex_graph,random.choice(list(coex_graph.nodes())))

<dict_keyiterator at 0x7f65774b1868>

In [52]:
for i in coex_graph.nodes(data=True)['1'].values():
    print(i)

3
4
0.0


In [54]:
random(coex_graph.nodes())

22684

In [64]:
a = open('ok.txt','w')
a.write("423\t123\n")
a.write("423\t123\n")
a.write("423\t123\n")
a.close()

In [65]:
import math
math.sqrt(3)

1.7320508075688772

In [68]:
[[1]][0][0]

1