In [1]:
from __future__ import print_function 
import os, sys, h5py
import numpy as np
from six.moves import cPickle
from collections import OrderedDict

import tensorflow as tf
sys.path.append('..')
from tfomics import neuralnetwork_dev as nn
from tfomics import neuralbuild as nb
from tfomics import utils, learn, init
from tfomics.build_network import build_network

# import models
#from model_zoo import test_model
from model_zoo import simple_genome_model

In [2]:
def load_data(data_path):
    dataset = h5py.File(data_path,'r')

    print("loading training data")
    X_train = np.expand_dims(dataset['X_train'], axis=3).transpose([0,2,3,1])
    y_train = np.array(dataset['Y_train'])
    index = np.where(y_train[:,0]==0)[0]
    y = np.zeros((y_train.shape[0],1))
    y[index] = 1
    y_train = np.hstack([y_train, y])
    
    print("loading validation data")  
    X_valid = np.expand_dims(dataset['X_valid'], axis=3).transpose([0,2,3,1])
    y_valid = np.array(dataset['Y_valid'])
    index = np.where(y_valid[:,0]==0)[0]
    y = np.zeros((y_valid.shape[0],1))
    y[index] = 1
    y_valid = np.hstack([y_valid, y])

    print("loading test data")
    X_test = np.expand_dims(dataset['X_test'], axis=3).transpose([0,2,3,1])
    y_test = np.array(dataset['Y_test'])
    index = np.where(y_test[:,0]==0)[0]
    y = np.zeros((y_test.shape[0],1))
    y[index] = 1
    y_test = np.hstack([y_test, y])

    return X_train, y_train, X_valid, y_valid, X_test, y_test 


data_path = '/Users/juliankimura/Desktop/genome'
file_name = 'K562_CEBPB_200_genome.h5'
X_train, y_train, X_valid, y_valid, X_test, y_test = load_data(os.path.join(data_path, file_name))



loading training data
loading validation data
loading test data


In [10]:

def model(input_shape, num_labels=None):

    # create model
    layer1 = {'layer': 'input',
            'input_shape': input_shape
            }
    layer2 = {'layer': 'conv1d', 
            'num_filters': 25,
            'filter_size': 19,
            'norm': 'batch',
            'activation': 'relu',
            #'dropout': 0.1,
            'padding': 'SAME',
            'pool_size': 40,
            'dropout': 0.8
            }
    layer3 = {'layer': 'dense', 
            'num_units': 128,
            'norm': 'batch',
            'activation': 'relu',
            'dropout': 0.8
            }  
    layer4 = {'layer': 'dense', 
            'num_units': num_labels,
            'activation': 'softmax'
            }

    #from tfomics import build_network
    model_layers = [layer1, layer2, layer3, layer4]
    
    # optimization parameters
    optimization = {"objective": "categorical",
                  "optimizer": "adam",
                  "learning_rate": 0.001,      
                  "l2": 1e-6,
                  # "l1": 0, 
                  }
    return model_layers, optimization
# get shapes
num_data, height, width, dim = X_train.shape
input_shape=[None, height, width, dim]
num_labels = y_train.shape[1]  
model_layers, optimization = model(input_shape, num_labels)

model_layers, optimization = model(input_shape, num_labels)
nnbuild = nb.NeuralBuild(model_layers)
network, placeholders, hidden_feed_dict = nnbuild.get_network_build()

In [11]:

# build neural network class
nnmodel = nn.NeuralNet(network, placeholders, hidden_feed_dict)
nnmodel.inspect_layers()

# set output file paths

results_path = utils.make_directory(data_path, 'results')
results_path = utils.make_directory(results_path, 'tfomics')
output_name = 'test'
filepath = os.path.join(results_path, output_name)

# compile neural trainer
nntrainer = nn.NeuralTrainer(nnmodel, optimization, save='best', filepath=filepath)

----------------------------------------------------------------------------
Network architecture:
----------------------------------------------------------------------------
layer1: input
(?, 200, 1, 4)
layer2: conv1
(?, 200, 1, 25)
layer3: conv1_batch
(?, 200, 1, 25)
layer4: conv1_active
(?, 200, 1, 25)
layer5: conv1_pool
(?, 5, 1, 25)
layer6: conv1_dropout
(?, 5, 1, 25)
layer7: dense1
(?, 128)
layer8: dense1_batch
(?, 128)
layer9: dense1_active
(?, 128)
layer10: dense1_dropout
(?, 128)
layer11: dense2
(?, 1)
layer12: dense2_bias
(?, 1)
layer13: output
(?, 1)
----------------------------------------------------------------------------


In [12]:
train = {'inputs': [X_train], 'targets': y_train}
valid = {'inputs': [X_valid], 'targets': y_valid}
data = {'train': train, 'valid': valid}
learn.train_minibatch(nntrainer, data, batch_size=200, num_epochs=50, 
                    patience=10, verbose=2, shuffle=True)

Epoch 1 out of 50 
  valid loss:		0.96460
  valid accuracy:	0.52866+/-0.00000
  valid auc-roc:	0.57684+/-0.00000
  valid auc-pr:		0.56067+/-0.00000
  lower cross-validation found
  saving model to:  /Users/juliankimura/Desktop/genome/results/tfomics/test_best.ckpt
Epoch 2 out of 50 
  valid loss:		0.70663
  valid accuracy:	0.58678+/-0.00000
  valid auc-roc:	0.67332+/-0.00000
  valid auc-pr:		0.65611+/-0.00000
  lower cross-validation found
  saving model to:  /Users/juliankimura/Desktop/genome/results/tfomics/test_best.ckpt


KeyboardInterrupt: 

In [3]:
def load_data(data_path):
    dataset = h5py.File(data_path,'r')

    print("loading training data")
    X_train = np.expand_dims(dataset['X_train'], axis=3).transpose([0,2,3,1])
    y_train = np.array(dataset['Y_train'])
    
    print("loading validation data")  
    X_valid = np.expand_dims(dataset['X_valid'], axis=3).transpose([0,2,3,1])
    y_valid = np.array(dataset['Y_valid'])

    print("loading test data")
    X_test = np.expand_dims(dataset['X_test'], axis=3).transpose([0,2,3,1])
    y_test = np.array(dataset['Y_test'])

    return X_train, y_train, X_valid, y_valid, X_test, y_test 


data_path = '/Users/juliankimura/Desktop/genome'
file_name = 'K562_CEBPB_200_genome.h5'
X_train, y_train, X_valid, y_valid, X_test, y_test = load_data(os.path.join(data_path, file_name))

loading training data
loading validation data
loading test data


In [10]:

def model(input_shape, num_labels=None):

    # create model
    layer1 = {'layer': 'input',
            'input_shape': input_shape,
            'name': 'input'
            }
    layer2 = {'layer': 'conv1d', 
            'num_filters': 25,
            'filter_size': 19,
            'norm': 'batch',
            'activation': 'relu',
            #'dropout': 0.1,
            'padding': 'SAME',
            'pool_size': 40,
            'dropout': 0.8,
            'name': 'conv1'
            }
    layer3 = {'layer': 'dense', 
            'num_units': 128,
            'norm': 'batch',
            'activation': 'relu',
            'dropout': 0.8,
            'name': 'dense1'
            }  
    layer4 = {'layer': 'dense', 
            'num_units': num_labels,
            'activation': 'sigmoid',
            'name': 'dense2'
            }

    #from tfomics import build_network
    model_layers = [layer1, layer2, layer3, layer4]
    
    # optimization parameters
    optimization = {"objective": "binary",
                  "optimizer": "adam",
                  "learning_rate": 0.001,      
                  "l2": 1e-6,
                  # "l1": 0, 
                  }
    return model_layers, optimization
# get shapes
num_data, height, width, dim = X_train.shape
input_shape=[None, height, width, dim]
num_labels = y_train.shape[1]  
model_layers, optimization = model(input_shape, num_labels)

model_layers, optimization = model(input_shape, num_labels)
nnbuild = nb.NeuralBuild(model_layers)
network, placeholders, hidden_feed_dict = nnbuild.get_network_build()

In [11]:

# build neural network class
nnmodel = nn.NeuralNet(network, placeholders, hidden_feed_dict)
nnmodel.inspect_layers()

# set output file paths

results_path = utils.make_directory(data_path, 'results')
results_path = utils.make_directory(results_path, 'tfomics')
output_name = 'test'
filepath = os.path.join(results_path, output_name)

# compile neural trainer
nntrainer = nn.NeuralTrainer(nnmodel, optimization, save='best', filepath=filepath)

----------------------------------------------------------------------------
Network architecture:
----------------------------------------------------------------------------
layer1: input
(?, 200, 1, 4)
layer2: conv1
(?, 200, 1, 25)
layer3: conv1_batch
(?, 200, 1, 25)
layer4: conv1_active
(?, 200, 1, 25)
layer5: conv1_pool
(?, 5, 1, 25)
layer6: conv1_dropout
(?, 5, 1, 25)
layer7: dense1
(?, 128)
layer8: dense1_batch
(?, 128)
layer9: dense1_active
(?, 128)
layer10: dense1_dropout
(?, 128)
layer11: dense2
(?, 1)
layer12: dense2_bias
(?, 1)
layer13: output
(?, 1)
----------------------------------------------------------------------------


In [12]:
train = {'inputs': [X_train], 'targets': y_train}
valid = {'inputs': [X_valid], 'targets': y_valid}
data = {'train': train, 'valid': valid}
learn.train_minibatch(nntrainer, data, batch_size=200, num_epochs=50, 
                    patience=10, verbose=2, shuffle=True)

Epoch 1 out of 50 
  valid loss:		0.96460
  valid accuracy:	0.52866+/-0.00000
  valid auc-roc:	0.57684+/-0.00000
  valid auc-pr:		0.56067+/-0.00000
  lower cross-validation found
  saving model to:  /Users/juliankimura/Desktop/genome/results/tfomics/test_best.ckpt
Epoch 2 out of 50 
  valid loss:		0.70663
  valid accuracy:	0.58678+/-0.00000
  valid auc-roc:	0.67332+/-0.00000
  valid auc-pr:		0.65611+/-0.00000
  lower cross-validation found
  saving model to:  /Users/juliankimura/Desktop/genome/results/tfomics/test_best.ckpt


KeyboardInterrupt: 

In [None]:
train = {'inputs': [X_train], 'targets': y_train}
valid = {'inputs': [X_valid], 'targets': y_valid}
data = {'train': train, 'valid': valid}
learn.train_minibatch(nntrainer, data, batch_size=200, num_epochs=50, 
                    patience=10, verbose=2, shuffle=True)

In [None]:
def load_data(data_path):
    dataset = h5py.File(data_path,'r')

    print("loading training data")
    X_train = np.expand_dims(dataset['X_train'], axis=3).transpose([0,2,3,1])
    y_train = np.array(dataset['Y_train'])
    
    print("loading validation data")  
    X_valid = np.expand_dims(dataset['X_valid'], axis=3).transpose([0,2,3,1])
    y_valid = np.array(dataset['Y_valid'])

    print("loading test data")
    X_test = np.expand_dims(dataset['X_test'], axis=3).transpose([0,2,3,1])
    y_test = np.array(dataset['Y_test'])

    return X_train, y_train, X_valid, y_valid, X_test, y_test 


data_path = '/Users/juliankimura/Desktop/genome'
file_name = 'K562_CEBPB_200_genome.h5'
X_train, y_train, X_valid, y_valid, X_test, y_test = load_data(os.path.join(data_path, file_name))




In [None]:

def model(input_shape, num_labels=None):
    # design a neural network model

    inputs = utils.placeholder(shape=input_shape, name='input')
    is_training = tf.placeholder(tf.bool, name='is_training')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    targets = utils.placeholder(shape=(None,num_labels), name='output')

    # placeholder dictionary
    placeholders = {'inputs': inputs, 
                  'targets': targets, 
                  'keep_prob': keep_prob, 
                  'is_training': is_training}


    # create model
    layer1 = {'layer': 'input',
                        'inputs': inputs,
                        'name': 'input'
                        }
    layer2 = {'layer': 'conv2d', 
                        'num_filters': 25,
                        'filter_size': (19,1),
                        #'batch_norm': is_training,
                        'padding': 'SAME',
                        'activation': 'relu',
                        'pool_size': (40,1),
                        'name': 'conv1'
                        }
    layer3 = {'layer': 'residual-conv2d',
                        'filter_size': (5,1),
                        #'batch_norm': is_training,
                        'dropout': keep_prob,
                        'pool_size': (40,1),
                        'name': 'resid1'
                     }
    layer4 = {'layer': 'dense', 
                'num_units': 128,
                'activation': 'relu',
                #'batch_norm': is_training,
                'dropout': keep_prob,
                'name': 'dense1'
                }
    layer5 = {'layer': 'dense', 
                'num_units': num_labels,
                'activation': 'sigmoid',
                'name': 'dense2'
                }

    #from tfomics import build_network
    model_layers = [layer1, layer2, layer4, layer5]
    net = build_network(model_layers)

    # optimization parameters
    optimization = {"objective": "binary",
                    "optimizer": "adam",
                    "learning_rate": 0.001,      
                    "l2": 1e-6,
                    # "l1": 0, 
                    }

    return net, placeholders, optimization

# get shapes
num_data, height, width, dim = X_train.shape
input_shape=[None, height, width, dim]
num_labels = y_train.shape[1]  
network, placeholders, optimization = model(input_shape, num_labels)


In [None]:

# build neural network class
nnmodel = nn.NeuralNet(network, placeholders)
nnmodel.inspect_layers()

# set output file paths

results_path = utils.make_directory(data_path, 'results')
results_path = utils.make_directory(results_path, 'tfomics')
output_name = 'test'
filepath = os.path.join(results_path, output_name)

# compile neural trainer
nntrainer = nn.NeuralTrainer(nnmodel, optimization, save='best', filepath=filepath)

In [None]:
train = {'inputs': X_train, 'targets': y_train, 'keep_prob': 0.8, 'is_training': True}
valid = {'inputs': X_valid, 'targets': y_valid, 'keep_prob': 1, 'is_training': False}
data = {'train': train, 'valid': valid}
learn.train_minibatch(nntrainer, data, batch_size=128, num_epochs=500, 
                        patience=10, verbose=2, shuffle=True)

In [None]:
train = {'inputs': X_train, 'targets': y_train, 'keep_prob': 0.8, 'is_training': True}
valid = {'inputs': X_valid, 'targets': y_valid, 'keep_prob': 1, 'is_training': False}
data = {'train': train, 'valid': valid}
learn.train_minibatch(nntrainer, data, batch_size=128, num_epochs=500, 
                        patience=10, verbose=2, shuffle=True)