In [1]:
import pickle
import numpy as np
import time
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
def load_data(filename):
    data = []
    gene_names = []
    data_labels = []
    lines = open(filename).readlines()
    sample_names = lines[0].replace('\n', '').split('\t')[1:]
    dx = 1

    for line in lines[dx:]:
        values = line.replace('\n', '').split('\t')
        gene = str.upper(values[0])
        gene_names.append(gene)
        data.append(values[1:])
    data = np.array(data, dtype='float32')
    data = np.transpose(data)

    return data, data_labels, sample_names, gene_names

In [4]:
def AE_dense_3layers(input_dim, first_layer_dim, second_layer_dim, third_layer_dim, activation_func, init='he_uniform'):
    print('input_dim = ', input_dim)
    print('first_layer_dim = ', first_layer_dim)
    print('second_layer_dim = ', second_layer_dim)
    print('third_layer_dim = ', third_layer_dim)
    print('init = ', init)

    model = Sequential()

    # Encoder
    model.add(Dense(first_layer_dim, input_shape=(input_dim,), activation=activation_func, kernel_initializer=init))
    model.add(Dense(second_layer_dim, activation=activation_func, kernel_initializer=init))
    model.add(Dense(third_layer_dim, activation=activation_func, kernel_initializer=init))

    # Decoder
    model.add(Dense(second_layer_dim, activation=activation_func, kernel_initializer=init))
    model.add(Dense(first_layer_dim, activation=activation_func, kernel_initializer=init))
    model.add(Dense(input_dim, activation=activation_func, kernel_initializer=init))

    return model

In [5]:
def save_weight_to_pickle(model, file_name):
    print('saving weights')
    weight_list = []
    for layer in model.layers:
        weight_list.append(layer.get_weights())
    with open(file_name, 'wb') as handle:
        pickle.dump(weight_list, handle)

In [6]:
if __name__ == '__main__':
    # Loading data
    data_exp_tcga, data_labels_exp_tcga, sample_names_exp_tcga, gene_names_exp_tcga = load_data("/content/drive/MyDrive/Colab Notebooks/Thesis/data/tcga_exp_data_paired_with_ccl.txt")
    print("\n\nDatasets successfully loaded.")

    input_dim = data_exp_tcga.shape[1]
    first_layer_dim = 100
    second_layer_dim = 50
    third_layer_dim = 25
    batch_size = 30
    epoch_size = 10
    activation_function = 'relu'
    init = 'he_uniform'
    model_save_name = "premodel_tcga_exp_%d_%d_%d" % (first_layer_dim, second_layer_dim, third_layer_dim)

    t = time.time()
    model = AE_dense_3layers(input_dim=input_dim, first_layer_dim=first_layer_dim, second_layer_dim=second_layer_dim, third_layer_dim=third_layer_dim, activation_func=activation_function, init=init)
    model.compile(loss='mse', optimizer='adam')

    model.fit(data_exp_tcga, data_exp_tcga, epochs=epoch_size, batch_size=batch_size, shuffle=True)

    cost = model.evaluate(data_exp_tcga, data_exp_tcga, verbose=0)
    print('\n\nAutoencoder training completed in %.1f mins.\n with testloss:%.4f' % ((time.time()-t)/60, cost))

    # Saving weights 
    save_weight_to_pickle(model, '/content/drive/MyDrive/Colab Notebooks/Thesis/results/' + model_save_name + '_demo.pickle')
    print("\nResults saved in /content/drive/MyDrive/Colab Notebooks/Thesis/results/%s_demo.pickle\n\n" % model_save_name)



Datasets successfully loaded.
input_dim =  6016
first_layer_dim =  100
second_layer_dim =  50
third_layer_dim =  25
init =  he_uniform
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Autoencoder training completed in 0.4 mins.
 with testloss:2.0794
saving weights

Results saved in /content/drive/MyDrive/Colab Notebooks/Thesis/results/premodel_tcga_exp_100_50_25_demo.pickle


