In [1]:
from __future__ import print_function 
import os, sys, h5py
import numpy as np
from six.moves import cPickle
from collections import OrderedDict

import tensorflow as tf
sys.path.append('..')
from tfomics import neuralnetwork_dev as nn
from tfomics import neuralbuild as nb
from tfomics import utils, learn, init
from tfomics.build_network import build_network

# import models
#from model_zoo import test_model
from model_zoo import simple_genome_model

In [2]:
def load_data(data_path):
    dataset = h5py.File(data_path,'r')

    print("loading training data")
    X_train = np.expand_dims(dataset['X_train'], axis=3).transpose([0,2,3,1])
    y_train = np.array(dataset['Y_train'])
    index = np.where(y_train[:,0]==0)[0]
    y = np.zeros((y_train.shape[0],1))
    y[index] = 1
    y_train = np.hstack([y_train, y])
    
    print("loading validation data")  
    X_valid = np.expand_dims(dataset['X_valid'], axis=3).transpose([0,2,3,1])
    y_valid = np.array(dataset['Y_valid'])
    index = np.where(y_valid[:,0]==0)[0]
    y = np.zeros((y_valid.shape[0],1))
    y[index] = 1
    y_valid = np.hstack([y_valid, y])

    print("loading test data")
    X_test = np.expand_dims(dataset['X_test'], axis=3).transpose([0,2,3,1])
    y_test = np.array(dataset['Y_test'])
    index = np.where(y_test[:,0]==0)[0]
    y = np.zeros((y_test.shape[0],1))
    y[index] = 1
    y_test = np.hstack([y_test, y])

    return X_train, y_train, X_valid, y_valid, X_test, y_test 


data_path = '/Users/juliankimura/Desktop/genome'
file_name = 'K562_CEBPB_200_genome.h5'
X_train, y_train, X_valid, y_valid, X_test, y_test = load_data(os.path.join(data_path, file_name))



loading training data
loading validation data
loading test data


In [18]:

def model(input_shape, num_labels=None):

    # create model
    layer1 = {'layer': 'input',
            'input_shape': input_shape
            }
    layer2 = {'layer': 'conv1d', 
            'num_filters': 32,
            'filter_size': 11,
            #'norm': 'batch',
            'activation': 'relu',
            #'dropout': 0.1,
            'padding': 'SAME',
            'pool_size': 50,
            }
    layer3 = {'layer': 'dense', 
            'num_units': 48,
            #'norm': 'batch',
            'activation': 'relu',
            'dropout': 0.5
            }  
    layer4 = {'layer': 'dense', 
            'num_units': num_labels,
            'activation': 'softmax'
            }

    #from tfomics import build_network
    model_layers = [layer1, layer2, layer3, layer4]
    
    # optimization parameters
    optimization = {"objective": "categorical",
                  "optimizer": "adam",
                  "learning_rate": 0.001,      
                  "l2": 1e-6,
                  # "l1": 0, 
                  }
    return model_layers, optimization
# get shapes
num_data, height, width, dim = X_train.shape
input_shape=[None, height, width, dim]
num_labels = y_train.shape[1]  
model_layers, optimization = model(input_shape, num_labels)

model_layers, optimization = model(input_shape, num_labels)
nnbuild = nb.NeuralBuild(model_layers)
network, placeholders, hidden_feed_dict = nnbuild.get_network_build()

In [19]:

# build neural network class
nnmodel = nn.NeuralNet(network, placeholders, hidden_feed_dict)
nnmodel.inspect_layers()

# set output file paths

results_path = utils.make_directory(data_path, 'results')
results_path = utils.make_directory(results_path, 'tfomics')
output_name = 'test'
filepath = os.path.join(results_path, output_name)

# compile neural trainer
nntrainer = nn.NeuralTrainer(nnmodel, optimization, save='best', filepath=filepath)

----------------------------------------------------------------------------
Network architecture:
----------------------------------------------------------------------------
layer1: inputs
(?, 200, 1, 4)
layer2: conv1d_0
(?, 200, 1, 32)
layer3: conv1d_0_bias
(?, 200, 1, 32)
layer4: conv1d_0_active
(?, 200, 1, 32)
layer5: conv1d_0_pool
(?, 4, 1, 32)
layer6: dense_0
(?, 48)
layer7: dense_0_bias
(?, 48)
layer8: dense_0_active
(?, 48)
layer9: dense_0_dropout
(?, 48)
layer10: dense_1
(?, 2)
layer11: dense_1_bias
(?, 2)
layer12: output
(?, 2)
----------------------------------------------------------------------------


In [20]:
train = {'inputs': [X_train], 'targets': y_train}
valid = {'inputs': [X_valid], 'targets': y_valid}
data = {'train': train, 'valid': valid}
learn.train_minibatch(nntrainer, data, batch_size=200, num_epochs=50, 
                    patience=10, verbose=2, shuffle=True)

Epoch 1 out of 50 
  valid loss:		0.68266
  valid accuracy:	0.55295+/-0.00000
  valid auc-roc:	0.58801+/-0.00000
  valid auc-pr:		0.58354+/-0.00922
  lower cross-validation found
  saving model to:  /Users/juliankimura/Desktop/genome/results/tfomics/test_best.ckpt
Epoch 2 out of 50 
  valid loss:		0.62560
  valid accuracy:	0.63973+/-0.00000
  valid auc-roc:	0.71423+/-0.00000
  valid auc-pr:		0.71013+/-0.00366
  lower cross-validation found
  saving model to:  /Users/juliankimura/Desktop/genome/results/tfomics/test_best.ckpt
Epoch 3 out of 50 
  valid loss:		0.48826
  valid accuracy:	0.77349+/-0.00000
  valid auc-roc:	0.84912+/-0.00000
  valid auc-pr:		0.84547+/-0.00437
  lower cross-validation found
  saving model to:  /Users/juliankimura/Desktop/genome/results/tfomics/test_best.ckpt
Epoch 4 out of 50 
  valid loss:		0.43777
  valid accuracy:	0.81449+/-0.00000
  valid auc-roc:	0.87954+/-0.00000
  valid auc-pr:		0.87071+/-0.01166
  lower cross-validation found
  saving model to:  /Users

KeyboardInterrupt: 

In [21]:
def load_data(data_path):
    dataset = h5py.File(data_path,'r')

    print("loading training data")
    X_train = np.expand_dims(dataset['X_train'], axis=3).transpose([0,2,3,1])
    y_train = np.array(dataset['Y_train'])
    
    print("loading validation data")  
    X_valid = np.expand_dims(dataset['X_valid'], axis=3).transpose([0,2,3,1])
    y_valid = np.array(dataset['Y_valid'])

    print("loading test data")
    X_test = np.expand_dims(dataset['X_test'], axis=3).transpose([0,2,3,1])
    y_test = np.array(dataset['Y_test'])

    return X_train, y_train, X_valid, y_valid, X_test, y_test 


data_path = '/Users/juliankimura/Desktop/genome'
file_name = 'K562_CEBPB_200_genome.h5'
X_train, y_train, X_valid, y_valid, X_test, y_test = load_data(os.path.join(data_path, file_name))

loading training data
loading validation data
loading test data


In [25]:

def model(input_shape, num_labels=None):

    # create model
    layer1 = {'layer': 'input',
            'input_shape': input_shape,
            'name': 'input'
            }
    layer2 = {'layer': 'conv1d', 
            'num_filters': 32,
            'filter_size': 11,
            'norm': 'batch',
            'activation': 'relu',
            #'dropout': 0.1,
            'padding': 'SAME',
            'pool_size': 50,
            'name': 'conv1'
            }
    layer3 = {'layer': 'dense', 
            'num_units': 64,
            #'norm': 'batch',
            'activation': 'relu',
            'dropout': 0.5,
            'name': 'dense1'
            }  
    layer4 = {'layer': 'dense', 
            'num_units': num_labels,
            'activation': 'sigmoid',
            'name': 'dense2'
            }

    #from tfomics import build_network
    model_layers = [layer1, layer2, layer3, layer4]
    
    # optimization parameters
    optimization = {"objective": "binary",
                  "optimizer": "adam",
                  "learning_rate": 0.001,      
                  "l2": 1e-6,
                  # "l1": 0, 
                  }
    return model_layers, optimization
# get shapes
num_data, height, width, dim = X_train.shape
input_shape=[None, height, width, dim]
num_labels = y_train.shape[1]  
model_layers, optimization = model(input_shape, num_labels)

model_layers, optimization = model(input_shape, num_labels)
nnbuild = nb.NeuralBuild(model_layers)
network, placeholders, hidden_feed_dict = nnbuild.get_network_build()

In [26]:

# build neural network class
nnmodel = nn.NeuralNet(network, placeholders, hidden_feed_dict)
nnmodel.inspect_layers()

# set output file paths

results_path = utils.make_directory(data_path, 'results')
results_path = utils.make_directory(results_path, 'tfomics')
output_name = 'test'
filepath = os.path.join(results_path, output_name)

# compile neural trainer
nntrainer = nn.NeuralTrainer(nnmodel, optimization, save='best', filepath=filepath)

----------------------------------------------------------------------------
Network architecture:
----------------------------------------------------------------------------
layer1: input
(?, 200, 1, 4)
layer2: conv1
(?, 200, 1, 32)
layer3: conv1_batch
(?, 200, 1, 32)
layer4: conv1_active
(?, 200, 1, 32)
layer5: conv1_pool
(?, 4, 1, 32)
layer6: dense1
(?, 64)
layer7: dense1_bias
(?, 64)
layer8: dense1_active
(?, 64)
layer9: dense1_dropout
(?, 64)
layer10: dense2
(?, 1)
layer11: dense2_bias
(?, 1)
layer12: output
(?, 1)
----------------------------------------------------------------------------


In [27]:
train = {'inputs': [X_train], 'targets': y_train}
valid = {'inputs': [X_valid], 'targets': y_valid}
data = {'train': train, 'valid': valid}
learn.train_minibatch(nntrainer, data, batch_size=200, num_epochs=50, 
                    patience=10, verbose=2, shuffle=True)

Epoch 1 out of 50 
  valid loss:		0.67930
  valid accuracy:	0.55892+/-0.00000
  valid auc-roc:	0.58309+/-0.00000
  valid auc-pr:		0.56187+/-0.00000
  lower cross-validation found
  saving model to:  /Users/juliankimura/Desktop/genome/results/tfomics/test_best.ckpt
Epoch 2 out of 50 
  valid loss:		0.67169
  valid accuracy:	0.57564+/-0.00000
  valid auc-roc:	0.61194+/-0.00000
  valid auc-pr:		0.59432+/-0.00000
  lower cross-validation found
  saving model to:  /Users/juliankimura/Desktop/genome/results/tfomics/test_best.ckpt
Epoch 3 out of 50 
  valid loss:		0.63696
  valid accuracy:	0.63694+/-0.00000
  valid auc-roc:	0.68707+/-0.00000
  valid auc-pr:		0.65749+/-0.00000
  lower cross-validation found
  saving model to:  /Users/juliankimura/Desktop/genome/results/tfomics/test_best.ckpt
Epoch 4 out of 50 
  valid loss:		0.56802
  valid accuracy:	0.70183+/-0.00000
  valid auc-roc:	0.80170+/-0.00000
  valid auc-pr:		0.79828+/-0.00000
  lower cross-validation found
  saving model to:  /Users

KeyboardInterrupt: 