In [1]:
from __future__ import print_function 
import os, sys, h5py
import numpy as np
from six.moves import cPickle
from collections import OrderedDict

import tensorflow as tf

sys.path.append('..')
from tfomics import neuralnetwork as nn
from tfomics import utils, learn
from model_zoo import simple_genome_model

In [2]:

def load_data(filepath):
    # load training set
    print("loading data from: " + filepath)
    f = open(filepath, 'rb')
    print("loading train data")
    #train = cPickle.load(f)
    train = cPickle.load(f, encoding='latin1')
    print("loading cross-validation data")
    #cross_validation = cPickle.load(f)
    cross_validation = cPickle.load(f, encoding='latin1')
    print("loading test data")
    #test = cPickle.load(f)
    test = cPickle.load(f, encoding='latin1')
    f.close()

    X_train = train[0].transpose((0,2,1))
    y_train = train[1]
    X_valid = cross_validation[0].transpose((0,2,1))
    y_valid = cross_validation[1]
    X_test = test[0].transpose((0,2,1))
    y_test = test[1]
    
    X_train = X_train.reshape(X_train.shape + (1,)) 
    X_train = X_train.transpose([0,1,3,2]).astype(np.float32)
    y_train = y_train.astype(np.int32)

    X_valid = X_valid.reshape(X_valid.shape + (1,)) 
    X_valid = X_valid.transpose([0,1,3,2]).astype(np.float32)
    y_valid = y_valid.astype(np.int32)


    X_test = X_test.reshape(X_test.shape + (1,)) 
    X_test = X_test.transpose([0,1,3,2]).astype(np.float32)
    y_test = y_test.astype(np.int32)

    return X_train, y_train, X_valid, y_valid, X_test, y_test


# load data
filename = 'Localized_N=100000_S=200_M=50_G=20_data.pickle'
data_path = '/Users/juliankimura/Desktop/tensorflow/data'
#data_path = '/Users/juliankimura/Desktop/tensorflow/data'
filepath = os.path.join(data_path,filename)
X_train, y_train, X_valid, y_valid, X_test, y_test = load_data(filepath)

loading data from: /Users/juliankimura/Desktop/tensorflow/data/Localized_N=100000_S=200_M=50_G=20_data.pickle
loading train data
loading cross-validation data
loading test data


In [3]:
def load_data(data_path):
    dataset = h5py.File(data_path,'r')

    print("loading training data")
    X_train = np.expand_dims(dataset['X_train'], axis=3).transpose([0,2,3,1])
    y_train = np.array(dataset['Y_train'])
    index = np.where(y_train[:,0]==0)[0]
    y = np.zeros((y_train.shape[0],1))
    y[index] = 1
    y_train = np.hstack([y_train, y])
    
    print("loading validation data")  
    X_valid = np.expand_dims(dataset['X_valid'], axis=3).transpose([0,2,3,1])
    y_valid = np.array(dataset['Y_valid'])
    index = np.where(y_valid[:,0]==0)[0]
    y = np.zeros((y_valid.shape[0],1))
    y[index] = 1
    y_valid = np.hstack([y_valid, y])

    print("loading test data")
    X_test = np.expand_dims(dataset['X_test'], axis=3).transpose([0,2,3,1])
    y_test = np.array(dataset['Y_test'])
    index = np.where(y_test[:,0]==0)[0]
    y = np.zeros((y_test.shape[0],1))
    y[index] = 1
    y_test = np.hstack([y_test, y])

    return X_train, y_train, X_valid, y_valid, X_test, y_test 


data_path = '/Users/juliankimura/Desktop/genome'
file_name = 'K562_CEBPB_200_genome.h5'
X_train, y_train, X_valid, y_valid, X_test, y_test = load_data(os.path.join(data_path, file_name))


loading training data
loading validation data
loading test data


In [5]:
# get shapes
num_data, seq_length, _, dim = X_train.shape
input_shape=[None, seq_length, 1, dim]
num_labels = y_train.shape[1]   # number of labels (output units)

# load model
net, placeholders, optimization = simple_genome_model.model(input_shape, num_labels)

# build neural network class
nnmodel = nn.NeuralNet(net, placeholders)
nnmodel.inspect_layers()

# compile neural trainer
save_name = 'test'
save_path = utils.make_directory(data_path, 'results')
save_path = utils.make_directory(save_path, 'simple_genome')
filepath = os.path.join(save_path, save_name)
nntrainer = nn.NeuralTrainer(nnmodel, optimization, save='best', filepath=filepath)

----------------------------------------------------------------------------
Network architecture:
----------------------------------------------------------------------------
layer1: input
(?, 200, 1, 4)
layer2: conv1
(?, 200, 1, 25)
layer3: conv1_bias
(?, 200, 1, 25)
layer4: conv1_active
(?, 200, 1, 25)
layer5: conv1_pool
(?, 5, 1, 25)
layer6: dense1
(?, 128)
layer7: dense1_bias
(?, 128)
layer8: dense1_active
(?, 128)
layer9: dense1_dropout
(?, 128)
layer10: dense2
(?, 2)
layer11: dense2_bias
(?, 2)
layer12: output
(?, 2)
----------------------------------------------------------------------------


In [6]:
train = {'inputs': X_train, 'targets': y_train, 'keep_prob': 0.8, 'is_training': True}
valid = {'inputs': X_valid, 'targets': y_valid, 'keep_prob': 1, 'is_training': False}
data = {'train': train, 'valid': valid}
learn.train_minibatch(nntrainer, data, batch_size=128, num_epochs=500, 
                        patience=10, verbose=2, shuffle=True)

Epoch 1 out of 500 
  valid loss:		0.64088
  valid accuracy:	0.63416+/-0.00000
  valid auc-roc:	0.69270+/-0.00000
  valid auc-pr:		0.68950+/-0.00926
  lower cross-validation found
  saving model to:  /Users/juliankimura/Desktop/genome/results/simple_genome/test_best.ckpt
Epoch 2 out of 500 
  valid loss:		0.45978
  valid accuracy:	0.78861+/-0.00000
  valid auc-roc:	0.87035+/-0.00000
  valid auc-pr:		0.86463+/-0.01604
  lower cross-validation found
  saving model to:  /Users/juliankimura/Desktop/genome/results/simple_genome/test_best.ckpt
Epoch 3 out of 500 
  valid loss:		0.38353
  valid accuracy:	0.83161+/-0.00000
  valid auc-roc:	0.90874+/-0.00000
  valid auc-pr:		0.90476+/-0.00791
  lower cross-validation found
  saving model to:  /Users/juliankimura/Desktop/genome/results/simple_genome/test_best.ckpt
Epoch 4 out of 500 
  valid loss:		0.35516
  valid accuracy:	0.84992+/-0.00000
  valid auc-roc:	0.92169+/-0.00000
  valid auc-pr:		0.91637+/-0.01055
  lower cross-validation found
  sa

KeyboardInterrupt: 

In [None]:
test = {'inputs': X_test, 'targets': y_test, 'keep_prob': 1, 'is_training': False}
test_loss = nntrainer.test_model(test, batch_size=512)

In [None]:
nntrainer.close_sess()