In [None]:
import warnings
warnings.filterwarnings("ignore")

from sklearn.preprocessing import LabelEncoder, LabelBinarizer
import tensorflow as tf
import pandas as pd
import numpy as np
import time
import datetime

import load_data

### One hidden layer

In [2]:
def weight_variable(shape):
    initial=tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


def fc_layer(input, size_in, size_out, name="fc"):
    with tf.name_scope(name):
        w = weight_variable([size_in, size_out])
        b = bias_variable([size_out])
        act = tf.matmul(input, w) + b
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        return act
    

def train_model(data, label="tissue", learning_rate=1e-3, extra=""):
    tf.reset_default_graph()
    LOGDIR = "/tmp/tcga_{0}/".format(str(datetime.datetime.today().date()))
    N_IN = data.train.X.shape[1]
    N_OUT = data.train.y[label].shape[1]
    N_HIDDEN = int(np.mean(N_IN + N_OUT)/2)
    
    x = tf.placeholder(tf.float32, [None, N_IN], name="x")
    y_true = tf.placeholder(tf.float32, [None, N_OUT], name="labels")
    hidden = tf.nn.relu(fc_layer(x, N_IN, N_HIDDEN), name="hidden")
    y_pred = fc_layer(hidden, N_HIDDEN, N_OUT, name="softmax")
    
    xent = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            labels=y_true, logits=y_pred), name="xent")
    tf.summary.scalar("xent", xent)

    train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent)

    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    train_accu_summ = tf.summary.scalar("train_accuracy", accuracy)
    test_accu_summ = tf.summary.scalar("test_accuracy", accuracy)

    sess = tf.Session()
    summ = tf.summary.merge_all()
    writer = tf.summary.FileWriter(LOGDIR + "pca{0}_{1}".format(extra, label))
    writer.add_graph(sess.graph)
    
    # training
    t0 = time.time()
    sess.run(tf.global_variables_initializer())

    for i in range(20001):
        batch_x, batch_y = data.train.next_batch(100)
        sess.run(train_step, feed_dict={x: batch_x, y_true: batch_y[label]})
        if i % 5 == 0:
            [train_accuracy, train_s, s] = sess.run([accuracy, train_accu_summ, summ],
                feed_dict={x: tcga.train.X, y_true: tcga.train.y[label]})
            [test_accuracy, test_s] = sess.run([accuracy, test_accu_summ],
                feed_dict={x: tcga.test.X, y_true: tcga.test.y[label]})
            writer.add_summary(train_s, i)
            writer.add_summary(test_s, i)
            writer.add_summary(s, i)
            if i % 100 == 0:
                print("step", i, "training accuracy", train_accuracy, 
                      "test_accuracy", test_accuracy)                
    print("training time:", time.time() - t0)

In [3]:
tcga = load_data.read_data_sets("./data/mRNA_lognorm_MinMaxScaled.csv")

In [None]:
for label_name in ["tissue", "gender", "tumor"]:
    print(label_name)
    tcga.train.reset_epoch()
    train_model(tcga, label=label_name, extra="_all")

tissue
step 0 training accuracy 0.113518 test_accuracy 0.107011
step 100 training accuracy 0.758327 test_accuracy 0.75738
step 200 training accuracy 0.852714 test_accuracy 0.854244
step 300 training accuracy 0.880834 test_accuracy 0.869926
step 400 training accuracy 0.878645 test_accuracy 0.858856
step 500 training accuracy 0.875764 test_accuracy 0.860701
step 600 training accuracy 0.856748 test_accuracy 0.861624
step 700 training accuracy 0.893973 test_accuracy 0.872694
step 800 training accuracy 0.834159 test_accuracy 0.822878
step 900 training accuracy 0.906419 test_accuracy 0.886531
step 1000 training accuracy 0.878645 test_accuracy 0.867159
step 1100 training accuracy 0.927625 test_accuracy 0.901291
step 1200 training accuracy 0.916215 test_accuracy 0.890221
step 1300 training accuracy 0.916331 test_accuracy 0.893911
step 1400 training accuracy 0.930736 test_accuracy 0.913284
step 1500 training accuracy 0.941224 test_accuracy 0.917897
step 1600 training accuracy 0.908609 test_accu