In [8]:
from __future__ import print_function 
import os, sys
import numpy as np
from six.moves import cPickle
from collections import OrderedDict

import tensorflow as tf

sys.path.append('..')
from tfomics import layers, utils, init, learn
from tfomics import neuralnetwork as nn
from model_zoo import simple_genome_model

In [4]:

def load_data(filepath):
    # load training set
    print("loading data from: " + filepath)
    f = open(filepath, 'rb')
    print("loading train data")
    train = cPickle.load(f)
    print("loading cross-validation data")
    cross_validation = cPickle.load(f)
    print("loading test data")
    test = cPickle.load(f)
    f.close()

    X_train = train[0].transpose((0,2,1))
    y_train = train[1]
    X_valid = cross_validation[0].transpose((0,2,1))
    y_valid = cross_validation[1]
    X_test = test[0].transpose((0,2,1))
    y_test = test[1]
    
    X_train = X_train.reshape(X_train.shape + (1,)) 
    X_train = X_train.transpose([0,1,3,2]).astype(np.float32)
    y_train = y_train.astype(np.int32)

    X_valid = X_valid.reshape(X_valid.shape + (1,)) 
    X_valid = X_valid.transpose([0,1,3,2]).astype(np.float32)
    y_valid = y_valid.astype(np.int32)


    X_test = X_test.reshape(X_test.shape + (1,)) 
    X_test = X_test.transpose([0,1,3,2]).astype(np.float32)
    y_test = y_test.astype(np.int32)

    return X_train, y_train, X_valid, y_valid, X_test, y_test


# load data
filename = 'Localized_N=100000_S=200_M=50_G=20_data.pickle'
data_path = '/home/peter/Code/tensorflow/data'
filepath = os.path.join(data_path,filename)
X_train, y_train, X_valid, y_valid, X_test, y_test = load_data(filepath)

# get shapes
num_data, seq_length, _, dim = X_train.shape
input_shape=[None, seq_length, 1, dim]
num_labels = y_train.shape[1]   # number of labels (output units)

loading data from: /home/peter/Code/tensorflow/data/Localized_N=100000_S=200_M=50_G=20_data.pickle
loading train data
loading cross-validation data
loading test data


In [10]:
# load model
net, placeholders, optimization = simple_genome_model.model(input_shape, num_labels)

# build neural network class
nnmodel = nn.NeuralNet(net, placeholders['inputs'])
nnmodel.inspect_layers()

# compile neural trainer
nntrainer = nn.NeuralTrainer(nnmodel, placeholders, optimization, save='best', filepath='test')


----------------------------------------------------------------------------
Network architecture:
----------------------------------------------------------------------------
layer1: input
(?, 200, 1, 4)
layer2: conv1
(?, 200, 1, 25)
layer3: conv1_batch
(?, 200, 1, 25)
layer4: conv1_active
(?, 200, 1, 25)
layer5: conv1_pool
(?, 20, 1, 25)
layer6: resid1_1resid
(?, 20, 1, 25)
layer7: resid1_1resid_norm
(?, 20, 1, 25)
layer8: resid1_1resid_active
(?, 20, 1, 25)
layer9: resid1_2resid
(?, 20, 1, 25)
layer10: resid1_2resid_norm
(?, 20, 1, 25)
layer11: resid1_residual
(?, 20, 1, 25)
layer12: resid1_resid
(?, 20, 1, 25)
layer13: conv2
(?, 15, 1, 50)
layer14: conv2_batch
(?, 15, 1, 50)
layer15: conv2_active
(?, 15, 1, 50)
layer16: conv2_pool
(?, 3, 1, 50)
layer17: conv2_dropout
(?, 3, 1, 50)
layer18: dense1
(?, 20)
layer19: dense1_bias
(?, 20)
layer20: dense1_active
(?, 20)
layer21: output
(?, 20)
----------------------------------------------------------------------------


In [11]:
# run session
sess = tf.Session()

# initialize variables
sess.run(tf.initialize_all_variables())

In [12]:
X = {'inputs': X_train, 'targets': y_train, 'keep_prob': 0.8, 'is_training': True}
X2 = {'inputs': X_valid, 'targets': y_valid, 'keep_prob': 1, 'is_training': False}
data = {'train': X, 'valid': X2}
learn.train_minibatch(sess, nntrainer, data, batch_size=128, num_epochs=500, 
                        patience=10, verbose=0, shuffle=True)

  valid loss:		0.15654
  valid accuracy:	0.95199+/-0.02725
  valid auc-roc:	0.75757+/-0.12887
  valid auc-pr:		0.25235+/-0.23867
lower cross-validation found
saving model to:  test_best.ckpt
  valid loss:		0.12004
  valid accuracy:	0.96087+/-0.02432
  valid auc-roc:	0.85159+/-0.11347
  valid auc-pr:		0.41739+/-0.33126
lower cross-validation found
saving model to:  test_best.ckpt
  valid loss:		0.08864
  valid accuracy:	0.96979+/-0.01914
  valid auc-roc:	0.91180+/-0.10029
  valid auc-pr:		0.57986+/-0.31967
lower cross-validation found
saving model to:  test_best.ckpt
  valid loss:		0.06423
  valid accuracy:	0.97746+/-0.01587
  valid auc-roc:	0.94846+/-0.08084
  valid auc-pr:		0.71967+/-0.29765
lower cross-validation found
saving model to:  test_best.ckpt
  valid loss:		0.04980
  valid accuracy:	0.98250+/-0.01350
  valid auc-roc:	0.96709+/-0.06011
  valid auc-pr:		0.78867+/-0.28691
lower cross-validation found
saving model to:  test_best.ckpt
  valid loss:		0.04072
  valid accuracy:	0.98

KeyboardInterrupt: 

In [13]:
X = {'inputs': X_test, 'targets': y_test, 'keep_prob': 1, 'is_training': False}
nntrainer.test_model(sess, X, batch_size=512)

  test  loss:		0.04127
  test  accuracy:	0.98600+/-0.01108
  test  auc-roc:	0.97779+/-0.04103
  test  auc-pr:		0.82986+/-0.27641


0.04126567915081978