In [26]:
import warnings
warnings.filterwarnings("ignore")

from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import pandas as pd
import numpy as np
import datetime
import time

import load_data

In [52]:
def fc_layer(input, size_in, size_out, name="fully-connected"):
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1))
        b = tf.Variable(tf.constant(0.1, shape=[size_out]))
        act = tf.matmul(input, w) + b
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        return act
    
    
def train_model(data, labels=["tissue", "gender", "tumor"], 
                learning_rate=1e-3, epochs=1000, pca_var=""):
    tf.reset_default_graph()
    LOGDIR = "/tmp/tcga_{0}/".format(str(datetime.datetime.today().date()))
    N_IN = data.train.X.shape[1]
    N_OUT = {}
    for label_name in labels:
        N_OUT[label_name] = data.train.y[label_name].shape[1]
    N_HIDDEN = int(np.mean(N_IN + sum(N_OUT.values())))
    
    x = tf.placeholder(tf.float32, [None, N_IN], name="x")
    y_true, y_pred = {}, {}
    xent, accuracy = {}, {}
    train_summ, test_summ = {}, {}
    for label_name in labels:
        y_true[label_name] = tf.placeholder(tf.float32, 
                                            [None, N_OUT[label_name]], 
                                            name=label_name)
        hidden = tf.nn.relu(fc_layer(x, N_IN, N_HIDDEN, name="fc-"+label_name), name="hidden")
        y_pred[label_name] = fc_layer(hidden, N_HIDDEN, N_OUT[label_name], name="softmax-"+label_name)
        xent[label_name] = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                labels=y_true[label_name], logits=y_pred[label_name]), name="xent")
        correct_prediction = tf.equal(tf.argmax(y_pred[label_name], 1), 
                                      tf.argmax(y_true[label_name], 1))
        accuracy[label_name] = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        with tf.name_scope(label_name):
            train_summ[label_name] = tf.summary.scalar("train_accuracy_" + label_name, 
                                                       accuracy[label_name])
            test_summ[label_name] = tf.summary.scalar("test_accuracy_" + label_name, 
                                                      accuracy[label_name])

    train_step = tf.train.AdamOptimizer(learning_rate).minimize(
        xent[labels[0]] + xent[labels[1]] + xent[labels[2]])

    sess = tf.Session()
    summ = tf.summary.merge_all()
    writer = tf.summary.FileWriter(LOGDIR + "pca{0}".format(pca_var))
    writer.add_graph(sess.graph)

    # training
    t0 = time.time()
    sess.run(tf.global_variables_initializer())

    epoch_completed = 0
    while data.train.epochs_completed <= epochs:
        batch_x, batch_y = data.train.next_batch(100)
        sess.run(train_step, feed_dict={x: batch_x, 
                                        y_true[labels[0]]: batch_y[labels[0]],
                                        y_true[labels[1]]: batch_y[labels[1]],
                                        y_true[labels[2]]: batch_y[labels[2]],
                                       })
        if data.train.epochs_completed > epoch_completed:
            epoch_completed += 1
            [train_s, s] = sess.run([train_summ, summ],
                feed_dict={x: tcga.train.X, 
                           y_true[labels[0]]: tcga.train.y[labels[0]],
                           y_true[labels[1]]: tcga.train.y[labels[1]],
                           y_true[labels[2]]: tcga.train.y[labels[2]],
                          })
            test_s = sess.run(test_summ,
                feed_dict={x: tcga.test.X, 
                           y_true[labels[0]]: tcga.test.y[labels[0]],
                           y_true[labels[1]]: tcga.test.y[labels[1]],
                           y_true[labels[2]]: tcga.test.y[labels[2]],
                          })
            for label_name in labels:
                writer.add_summary(train_s[label_name], epoch_completed)
                writer.add_summary(test_s[label_name], epoch_completed)
            writer.add_summary(s, epoch_completed)
            if epoch_completed % 10 == 0:
                print("epoch completed", epoch_completed)                
    print("training time:", time.time() - t0)

In [55]:
for PCA_variance in [0.9]:
    print(PCA_variance)
    tcga = load_data.read_data_sets("./data/mRNA_PCA_{0}_variance_MinMaxScaled.csv".format(PCA_variance))
    train_model(tcga, epochs=1000, pca_var=PCA_variance)

0.9
epoch completed 10
epoch completed 20
epoch completed 30
epoch completed 40
epoch completed 50
epoch completed 60
epoch completed 70
epoch completed 80
epoch completed 90
epoch completed 100
epoch completed 110
epoch completed 120
epoch completed 130
epoch completed 140
epoch completed 150
epoch completed 160
epoch completed 170
epoch completed 180
epoch completed 190
epoch completed 200
epoch completed 210
epoch completed 220
epoch completed 230
epoch completed 240
epoch completed 250
epoch completed 260
epoch completed 270
epoch completed 280
epoch completed 290
epoch completed 300
epoch completed 310
epoch completed 320
epoch completed 330
epoch completed 340
epoch completed 350
epoch completed 360
epoch completed 370
epoch completed 380
epoch completed 390
epoch completed 400
epoch completed 410
epoch completed 420
epoch completed 430
epoch completed 440
epoch completed 450
epoch completed 460
epoch completed 470
epoch completed 480
epoch completed 490
epoch completed 500
epoch