In [5]:
import pandas as pd
import numpy as np
import tensorflow as tf
import time
import sys
import datetime
from pprint import pprint as pp

sys.path.insert(0, '/home/molly/Desktop/DeepTCGA/')
import load_data

In [1]:
def fc_layer(A_prev, size_in, size_out, name="fully-connected"):
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal([size_in, size_out], mean=0, stddev=0.1))
        b = tf.Variable(tf.constant(0.1, shape=[size_out]))
        act = tf.matmul(A_prev, w) + b
        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("activations", act)
        return act, w, b


def build_model(x, N_IN, N_HIDDENS):
    parameters = {}
    
    a1, w1, b1 = fc_layer(x, N_IN, N_HIDDENS[0], name="fc1")
    parameters.update({"a1":a1, "w1": w1, "b1": b1})
    hidden1 = tf.nn.leaky_relu(a1, name="hidden1")
    
    a2, w2, b2 = fc_layer(hidden1, N_HIDDENS[0], N_HIDDENS[1], name="fc2")
    parameters.update({"a2":a2, "w2": w2, "b2": b2})
    hidden2 = tf.nn.leaky_relu(a2, name="hidden2")
    
    a3, w3, b3 = fc_layer(hidden2, N_HIDDENS[1], N_HIDDENS[2], name="fc3")
    parameters.update({"a3":a3, "w3": w3, "b3": b3})
    hidden3 = tf.nn.leaky_relu(a3, name="hidden3")
    
    x_recon, w4, b4 = fc_layer(hidden3, N_HIDDENS[2], N_IN, name="fc4")
    parameters.update({"w4": w4, "b4": b4})
    return x_recon, parameters


def back_prop(x, x_recon, learning_rate):
    loss = tf.reduce_mean(tf.square(x_recon - x))
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    return train_step


def feed_forward(x, parameters):
    w1, b1 = parameters["w1"], parameters["b1"]
    hidden1 = tf.nn.leaky_relu(tf.matmul(x, w1) + b1)
    w2, b2 = parameters["w2"], parameters["b2"]
    hidden2 = tf.nn.leaky_relu(tf.matmul(hidden1, w2) + b2)
    w3, b3 = parameters["w3"], parameters["b3"]
    hidden3 = tf.nn.leaky_relu(tf.matmul(hidden2, w3) + b3)
    w4, b4 = parameters["w4"], parameters["b4"]
    x_recon = tf.matmul(hidden3, w4) + b4
    return x_recon, hidden2


def mse(x, x_recon, name=""):
    mse = tf.reduce_mean(tf.square(x_recon-x))
    mse_summary = tf.summary.scalar(name + "_mse", mse)
    return mse, mse_summary

In [20]:
def train_model(data, batch_size=128, num_epoch=1000, learning_rate=1e-3, extra=""):
    tf.reset_default_graph()
    LOGDIR = "/tmp/tcga_{0}".format(datetime.datetime.today().date())
    N_IN = data.train.num_features
    N_OUT = data.train.num_features
    N_HIDDENS = [1000, 2, 1000]
    
    # train step
    (train_batch, train_iter, val_all, val_iter, 
        train_all, train_iter_all) = data.prep_batch(batch_size=batch_size, count_by="epoch")
    
    
    x = train_batch["X"]
    x_recon, parameters = build_model(x, N_IN, N_HIDDENS)
    train_step = back_prop(x, x_recon, learning_rate)
    
    # mse
    x_train, x_val = train_all["X"], val_all["X"]
    x_train_recon, train_latent = feed_forward(x_train, parameters)
    x_val_recon, val_latent = feed_forward(x_val, parameters)
    train_mse, train_summ = mse(x_train, x_train_recon, name="train")
    val_mse, val_summ = mse(x_val, x_val_recon, name="valiation")
    
    # run
    sess = tf.Session()
    summ = tf.summary.merge_all()
    writer = tf.summary.FileWriter(LOGDIR + "ae_{0}".format(extra))
    writer.add_graph(sess.graph)                            
    sess.run(tf.global_variables_initializer())

    for epoch in range(num_epoch):
        sess.run([train_iter.initializer])
        t0 = time.time()
        try:
            while True:
                sess.run(train_step)
        except tf.errors.OutOfRangeError:
            sess.run([train_iter_all.initializer, val_iter.initializer])
            [train_error, train_s, val_error, val_s] = sess.run(
            [train_mse, train_summ, val_mse, val_summ])
            writer.add_summary(train_s, epoch)
            writer.add_summary(val_s, epoch)
            if epoch % 10 == 0:
                print("epoch", epoch)
                print("training mse:", train_error, "validation mse", val_error)
                print("epoch time:", time.time()-t0)
    train_latent = sess.run(train_latent)
    val_latent = sess.run(val_latent)
    np.save("../results/AE/train_latent_complex2.npy", train_latent)
    np.save("../results/AE/val_latent_complex2.npy", val_latent)    
    sess.close()

In [12]:
tcga = load_data.read_data_sets("../data/mRNA_lognorm_StandardScaled.hdf")

In [22]:
train_model(tcga, num_epoch=1000, extra="1000_2_1000_hidden")

epoch 0
training mse: 1.0314863 validation mse 1.0242951
epoch time: 3.84420108795166
epoch 10
training mse: 0.8199897 validation mse 0.8192159
epoch time: 2.279664993286133
epoch 20
training mse: 0.79557216 validation mse 0.7906198
epoch time: 2.2998669147491455
epoch 30
training mse: 0.75466144 validation mse 0.75536805
epoch time: 2.2849061489105225
epoch 40
training mse: 0.7288006 validation mse 0.7389127
epoch time: 2.2525341510772705
epoch 50
training mse: 0.7132029 validation mse 0.7259026
epoch time: 2.2798421382904053
epoch 60
training mse: 0.6762423 validation mse 0.6928521
epoch time: 2.2760908603668213
epoch 70
training mse: 0.6667006 validation mse 0.68580997
epoch time: 2.287717819213867
epoch 80
training mse: 0.65544677 validation mse 0.6740033
epoch time: 2.300046920776367
epoch 90
training mse: 0.643133 validation mse 0.66727024
epoch time: 2.298670768737793
epoch 100
training mse: 0.6292525 validation mse 0.6528153
epoch time: 2.3039493560791016
epoch 110
training mse

epoch 920
training mse: 0.5159189 validation mse 0.633598
epoch time: 2.2924773693084717
epoch 930
training mse: 0.5189584 validation mse 0.6349299
epoch time: 2.3170957565307617
epoch 940
training mse: 0.51351506 validation mse 0.6332519
epoch time: 2.3079659938812256
epoch 950
training mse: 0.51358396 validation mse 0.6333953
epoch time: 2.3152217864990234
epoch 960
training mse: 0.5124461 validation mse 0.6304858
epoch time: 2.2796480655670166
epoch 970
training mse: 0.5136109 validation mse 0.63148826
epoch time: 2.276047945022583
epoch 980
training mse: 0.5089153 validation mse 0.63021404
epoch time: 2.283653497695923
epoch 990
training mse: 0.5113977 validation mse 0.63590187
epoch time: 2.2964727878570557
