In [9]:
import tensorflow as tf
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tensorflow.python.keras import backend as K

In [2]:
df = pd.read_csv('Gordon-2002_LungCancer.csv')
df = df.transpose()

mpm_df = df[df[0]=='MPM']
ad_df = df[df[0]=='AD']



df = df.reset_index(drop=True)

df = df.drop(0,axis=1)

df.columns = df.iloc[0]

df = df.drop(0)
df = df.reset_index(drop=True)
df = df.apply(pd.to_numeric)

df[df.columns] = StandardScaler().fit_transform(df[df.columns])
df.to_csv('patient_removed_type.csv')

In [3]:
#parameters for neural network
learning_param = 0.005
epochs = 5000
batch_size = 32
beta = 5

# network parameters
input_dimension = 1626
nn_dimension = 512

# how many latent variables do we want?
latent_var_dimension = 5

In [4]:
# initialization
def xavier (in_shape):
    val = tf.random_normal(shape=in_shape, stddev= 1./tf.sqrt(in_shape[0]/2.))
    return(val)

In [6]:
Weight = { "weight_matrix_encoder_hidden": tf.Variable(xavier([input_dimension,nn_dimension])),
            "weight_mean_hidden": tf.Variable(xavier([nn_dimension,latent_var_dimension])),
            "weight_std_hidden": tf.Variable(xavier([nn_dimension,latent_var_dimension])),
            "weight_matrix_decoder_hidden": tf.Variable(xavier([latent_var_dimension,nn_dimension])),
            "weight_decoder": tf.Variable(xavier([nn_dimension, input_dimension]))
        }

Bias = {    "bias_matrix_encoder_hidden": tf.Variable(xavier([nn_dimension])),
            "bias_mean_hidden": tf.Variable(xavier([latent_var_dimension])),
            "bias_std_hidden": tf.Variable(xavier([latent_var_dimension])),
            "bias_matrix_decoder_hidden": tf.Variable(xavier([nn_dimension])),
            "bias_decoder": tf.Variable(xavier([input_dimension]))
        }

In [7]:
# building variational autoencoder

# Encoder
patient_X = tf.placeholder(tf.float32, shape= [None, input_dimension])
Encoder_layer = tf.add(tf.matmul(patient_X, Weight["weight_matrix_encoder_hidden"]), Bias["bias_matrix_encoder_hidden"])
Encoder_layer = tf.nn.tanh(Encoder_layer)

Mean_layer = tf.add(tf.matmul(Encoder_layer, Weight["weight_mean_hidden"]), Bias["bias_mean_hidden"])
SD_layer = tf.add(tf.matmul(Encoder_layer, Weight["weight_std_hidden"]), Bias["bias_std_hidden"])

# reparameterize
epsilon = tf.random_normal(tf.shape(SD_layer),dtype = tf.float32, mean = 0.0, stddev = 1.0)

latent_layer = Mean_layer + tf.exp(0.5*SD_layer)*epsilon

# Decoder
Decoder_hidden = tf.add(tf.matmul(latent_layer, Weight["weight_matrix_decoder_hidden"]), Bias["bias_matrix_decoder_hidden"])
Decoder_hidden = tf.nn.tanh(Decoder_hidden)

Decoder_output_layer = tf.add(tf.matmul(Decoder_hidden, Weight["weight_decoder"]), Bias["bias_decoder"])

In [8]:
def next_batch(num, data):
    '''
    Return a total of `num` random samples and labels. 
    '''
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = [data.loc[i] for i in idx]
    return np.asarray(data_shuffle)

In [30]:
# loss function
def loss_function(original_data, reconstructed_data):
    network_loss = tf.sqrt(tf.reduce_mean((original_data - reconstructed_data)**2))
    #mean, var = tf.reduce_mean(original_data-reconstructed_data), np.var(original_data-reconstructed_data, dtype= np.float64)
    #print(mean, var)
    #x = [mean, tf.log(var)]
    #latent_loss = -0.5 * (1 + x[1] - K.square(x[0]) - K.exp(x[1]))
    #latent_loss = tf.reduce_sum(latent_loss, axis=-1) # sum over latent dimension
    #latent_loss = tf.reduce_mean(latent_loss, axis=0) # avg over batch

    # use beta to force less usage of vector space:
    #latent_loss = beta * latent_loss
    #latent_loss = latent_loss + x
    return network_loss

loss_value = loss_function(patient_X, Decoder_output_layer)
optimizer = tf.train.RMSPropOptimizer(learning_param).minimize(loss_value)

# initialize variables
init = tf.global_variables_initializer()

ValueError: setting an array element with a sequence.

In [20]:
# executing graph

# start session
saver = tf.train.Saver()

weight_matrix = {}

with tf.Session() as sess:
    sess.run(init)
    for i in range(epochs):
        x_batch = next_batch(batch_size, df)
        _, weight, loss = sess.run([optimizer,Weight, loss_value], feed_dict = {patient_X : x_batch})

        if i % 500 ==0:
            print("Loss is {0} at iteration {1}".format(loss, i))
        
        if i % 4999 ==0:
            weight_matrix = weight
    save_path = saver.save(sess, "./model.ckpt")    
    print("Model saved in path: %s" % save_path)      

Loss is [[ 1.3973942e+00 -1.5154779e-03  2.8845720e+00 ... -3.3387318e-02
   6.6457062e+00  3.9273143e+00]
 [ 2.8213658e+00  5.2856565e-01  4.7262254e+00 ...  3.8485530e-01
   6.5292249e+00  4.3811979e+00]] at iteration 0


KeyboardInterrupt: 

In [10]:
def reconstruct_data(df, patient_number = 1):
    ## Part 1: Reshape the series to the correct dimension i.e 1, 1626
    x_test = np.array(df.iloc[patient_number])
    
    x_test_1 = x_test.reshape((1, 1626))
    
    with tf.Session() as sess:     
        sess.run(init) 
        saver.restore(sess, "./model.ckpt")
        latent = sess.run(latent_layer, feed_dict={patient_X: x_test_1})
    return latent[0]

In [11]:
latent_df = {}
for i in range(len(df)):
    latent_df[i]= reconstruct_data(df,i)

INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tenso

INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tensorflow:Restoring parameters from ./model.ckpt
INFO:tenso

In [13]:
l_df = pd.DataFrame(list(latent_df.values()), index=list(latent_df.keys()))
l_df.to_csv('latent_var_data.csv')
print(l_df.head(10))

           0          1          2          3         4
0  -6.687513  17.964842  10.367953  -2.076651 -5.971411
1 -22.483128   8.749027  13.003904   6.866226  1.103345
2  -0.134073  10.255378  15.712632  -9.783230 -9.374829
3 -13.989144  17.080902   1.734007   7.211733 -2.207218
4   2.147603  19.993021   6.190531  -4.422985 -8.906800
5   1.983543  10.516049  19.775284  -7.650686 -2.023323
6  -2.806559  16.168756  20.608555   0.172623  0.876547
7   2.272878  14.706274  14.317532 -12.305567  1.449425
8   0.882069  19.924334  14.445507  -2.260811 -3.762520
9 -17.433243  16.541548  13.681810   0.973529 -8.925259
