In [None]:
#/bin/python
import sys
import os
import numpy as np
sys.path.append('..')
from src import NeuralNet
from src import train as fit
from src import make_directory 
from models import load_model
from data import load_data
np.random.seed(247) # for reproducibility

#------------------------------------------------------------------------------
# load data

name = 'MotifSimulation_categorical'
datapath = '/home/peter/Data/SequenceMotif'
filepath = os.path.join(datapath, 'N=100000_S=200_M=10_G=20_data.pickle')
train, valid, test = load_data(name, filepath)
shape = (None, train[0].shape[1], train[0].shape[2], train[0].shape[3])
num_labels = max(train[1])+1

#-------------------------------------------------------------------------------------

# load model parameters
model_name = "categorical_genome_motif_model"
nnmodel = NeuralNet(model_name, shape, num_labels)

In [None]:
from six.moves import cPickle
outputname = 'new'
filepath = os.path.join(datapath, 'Results', outputname)
savepath = filepath + "_best.pickle"
f = open(savepath, 'rb')
best_parameters = cPickle.load(f)
f.close()

In [None]:
nnmodel.reinitialize()

# load model parameters for a given training epoch
savepath = filepath + "_epoch_" + str(1) + ".pickle"
f = open(savepath, 'rb')
best_parameters = cPickle.load(f)
f.close()

# get test metrics 
nnmodel.set_model_parameters(best_parameters)

In [None]:

def get_performance(savepath):
    with open(savepath, 'rb') as f:
        name = cPickle.load(f)
        cost = cPickle.load(f)
        metric = cPickle.load(f)
        metric_std = cPickle.load(f)
        roc = cPickle.load(f)
        pr = cPickle.load(f)
    return cost, metric, metric_std, roc, pr

savepath = filepath + "_train_performance.pickle"
train_cost, train_metric, train_metric_std, train_roc, trian_pr = get_performance(savepath)

savepath = filepath + "_cross-validation_performance.pickle"
valid_cost, valid_metric, valid_metric_std, valid_roc, valid_pr = get_performance(savepath)

savepath = filepath + "_test_performance.pickle"
best_cost, best_metric, best_metric_std, best_roc, best_pr = get_performance(savepath)

savepath = filepath + "_test_all_performance.pickle"
test_cost, test_metric, test_metric_std, test_roc, test_pr = get_performance(savepath)



In [None]:
from matplotlib import pyplot as plt
def plot_loss(loss):
    """Plot trainig/validation/test loss during training"""

    fig = plt.figure()
    num_data_types = len(loss)
    if num_data_types == 2:
        plt.plot(loss[0], label='train loss', linewidth=2)
        plt.plot(loss[1], label='valid loss', linewidth=2)
    elif num_data_types == 3:
        plt.plot(loss[0], label='train loss', linewidth=2)
        plt.plot(loss[1], label='valid loss', linewidth=2)
        plt.plot(loss[2], label='test loss', linewidth=2)

    plt.xlabel('epoch', fontsize=22)
    plt.ylabel('loss', fontsize=22)
    plt.legend(loc='best', frameon=False, fontsize=18)
    return plt

plt = plot_loss([train_cost, valid_cost, test_cost])
plt.show()


In [None]:
def plot_conv_weights(layer, figsize=(6, 6)):
    """nolearn's plot the weights of a specific layer"""

    fig = plt.figure()
    W = layer.W.get_value()
    shape = W.shape
    nrows = np.ceil(np.sqrt(shape[0])).astype(int)
    ncols = nrows

    for feature_map in range(shape[1]):
        figs, axes = plt.subplots(nrows, ncols, figsize=figsize)

        for ax in axes.flatten():
            ax.set_xticks([])
            ax.set_yticks([])
            ax.axis('off')

        for i, (r, c) in enumerate(product(range(nrows), range(ncols))):
            if i >= shape[0]:
                break
            axes[r, c].imshow(W[i, feature_map], cmap='gray',
                              interpolation='nearest')
    return plt


plot_conv_weights(layer)
plt.show()


In [None]:
nnmodel.get_layer()

In [None]:
from lasagne.layers import get_all_layers

In [None]:
my = get_all_layers(nnmodel)

In [None]:
my

In [None]:
network, input_var, target_var, optimization = load_model(model_name, shape, num_labels)

In [None]:
network.get_output_for('conv1')

In [None]:

from lasagne.layers import DenseLayer
from lasagne.layers import DropoutLayer
from lasagne.layers import InputLayer
from lasagne.layers import LocalResponseNormalization2DLayer as NormLayer
from lasagne.layers import NonlinearityLayer
from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer
from lasagne.layers import MaxPool2DLayer as PoolLayer
from lasagne.nonlinearities import softmax, sigmoid
import theano.tensor as T

In [None]:

input_var = T.tensor4('inputs')
target_var = T.dmatrix('targets')

net = {}
net['input'] = InputLayer(input_var=input_var, shape=shape)
net['conv1'] = ConvLayer(net['input'],
                         num_filters=200,
                         filter_size=(12, 1),
                         stride=(1, 1))
net['pool1'] = PoolLayer(net['conv1'],
                         pool_size=(4, 1),
                         stride=(4, 1))
net['conv2'] = ConvLayer(net['pool1'],
                         num_filters=200,
                         filter_size=(8, 1))
net['pool2'] = PoolLayer(net['conv2'],
                         pool_size=(4,1),
                         stride=(4,1),
                         ignore_border=False)
net['fc4'] = DenseLayer(net['pool2'], num_units=200)
net['drop4'] = DropoutLayer(net['fc4'], p=0.5)
net['fc5'] = DenseLayer(net['drop4'], num_units=num_labels, nonlinearity=None)
net['prob'] = NonlinearityLayer(net['fc5'], sigmoid)

In [None]:
from lasagne.init import Constant, Normal, Uniform, GlorotNormal
from lasagne.init import GlorotUniform, HeNormal, HeUniform

input_var = T.tensor4('inputs')
target_var = T.ivector('targets')

# create model
layer1 = {'layer': 'input',
          'input_var': input_var,
          'shape': shape,
          'name': 'input'
         }
layer2 = {'layer': 'convolution', 
          'num_filters': 200, 
          'filter_size': (8, 1),
          'W': GlorotUniform(),
          'b': None,
          'norm': 'batch', 
          'activation': 'prelu',
          'pool_size': (4, 1),
          'name': 'conv1'
          }
layer3 = {'layer': 'convolution', 
          'num_filters': 200, 
          'filter_size': (8, 1),
          'W': GlorotUniform(),
          'b': None,
          'dropout': .2,
          'norm': 'batch', 
          'activation': 'prelu',
          'pool_size': (4, 1),
          'name': 'conv2'
          }
layer4 = {'layer': 'dense', 
          'num_units': 200, 
          'default': True,
          'W': GlorotUniform(),
          'b': Constant(0.05), 
          'dropout': .5,
          'norm': 'batch',
          'activation': 'prelu',
          'name': 'dense3'
          }
layer5 = {'layer': 'dense', 
          'num_units': num_labels, 
          'default': True,
          'W': GlorotUniform(),
          'b': Constant(0.05),
          'activation': 'softmax',
          'name': 'output'
          }

model_layers = [layer1, layer2, layer3, layer4, layer5]

In [None]:
from build_network import build_network
network = build_network(model_layers)

In [None]:
network

In [None]:
network['output'].W.get_value().shape

In [None]:
x= layers.get_all_params(network['output'], trainable=True) 

In [None]:
grad = calculate_gradient(network, cost, params, weight_norm=optimization["weight_norm"])

In [None]:
network.output_shape['input']

In [1]:
#/bin/python
import sys
import os
import numpy as np
sys.path.append('..')
from src import NeuralNet
from src import train as fit
from src import make_directory 
from models import load_model
from data import load_data
np.random.seed(247) # for reproducibility

#------------------------------------------------------------------------------
# load data

name = 'MotifSimulation_categorical'
datapath = '/home/peter/Data/SequenceMotif'
filepath = os.path.join(datapath, 'N=100000_S=200_M=10_G=20_data.pickle')
train, valid, test = load_data(name, filepath)
shape = (None, train[0].shape[1], train[0].shape[2], train[0].shape[3])
num_labels = max(train[1])+1

#-------------------------------------------------------------------------------------

# load model parameters
model_name = "categorical_genome_motif_model"
nnmodel = NeuralNet(model_name, shape, num_labels)

Using gpu device 0: GeForce GTX 980 (CNMeM is disabled, CuDNN 4007)


loading data from: /home/peter/Data/SequenceMotif/N=100000_S=200_M=10_G=20_data.pickle
loading train data
loading cross-validation data
loading test data


In [3]:
from lasagne import layers, objectives, updates, regularization

import theano
import theano.tensor as T


def build_cost(network, target_var, prediction, optimization):
    """ setup cost function with weight decay regularization """

    if optimization["objective"] == 'categorical':
        cost = objectives.categorical_crossentropy(prediction, target_var)

    elif optimization["objective"] == 'binary':
        cost = objectives.binary_crossentropy(prediction, target_var)

    elif optimization["objective"] == 'mse':
        cost = objectives.squared_error(prediction, target_var)

    #cost = cost.mean()
    cost = objectives.aggregate(cost, mode='mean')

    # weight-decay regularization
    if "l1" in optimization:
        l1_penalty = regularization.regularize_network_params(network, regularization.l1) * optimization["l1"]
        cost += l1_penalty
    if "l2" in optimization:
        l2_penalty = regularization.regularize_network_params(network, regularization.l2) * optimization["l2"]        
        cost += l2_penalty 

    return cost


def calculate_gradient(cost, params, weight_norm=[]):
    """ calculate gradients with option to clip norm """

    grad = T.grad(cost, params)

    # gradient clipping option
    if weight_norm:
        grad = updates.total_norm_constraint(grad, weight_norm)

    return grad


def build_updates(grad, params, update_params):
    """ setup optimization algorithm """

    if update_params['optimizer'] == 'sgd':
        update_op = updates.sgd(grad, params, learning_rate=update_params['learning_rate']) 

    elif update_params['optimizer'] == 'nesterov_momentum':
        update_op = updates.nesterov_momentum(grad, params, 
                                    learning_rate=update_params['learning_rate'], 
                                    momentum=update_params['momentum'])

    elif update_params['optimizer'] == 'adagrad':
        if "learning_rate" in update_params:
            update_op = updates.adagrad(grad, params, 
                              learning_rate=update_params['learning_rate'], 
                              epsilon=update_params['epsilon'])
        else:
            update_op = updates.adagrad(grad, params)

    elif update_params['optimizer'] == 'rmsprop':
        if "learning_rate" in update_params:
            update_op = updates.rmsprop(grad, params, 
                              learning_rate=update_params['learning_rate'], 
                              rho=update_params['rho'], 
                              epsilon=update_params['epsilon'])
        else:
            update_op = updates.rmsprop(grad, params)

    elif update_params['optimizer'] == 'adam':
        if "learning_rate" in update_params:
            update_op = updates.adam(grad, params, 
                            learning_rate=update_params['learning_rate'], 
                            beta1=update_params['beta1'], 
                            beta2=update_params['beta2'], 
                            epsilon=update['epsilon'])
        else:
            update_op = updates.adam(grad, params)

    return update_op


In [4]:

network, input_var, target_var, optimization = load_model(model_name, shape, num_labels)
prediction = layers.get_output(network["output"], deterministic=False)


cost = build_cost(network, target_var, prediction, optimization)


# calculate and clip gradients
params = layers.get_all_params(network["output"], trainable=True)    
if "weight_norm" in optimization:
    grad = calculate_gradient(cost, params, weight_norm=optimization["weight_norm"])
else:
    grad = calculate_gradient(cost, params)

# setup parameter updates
update_op = build_updates(grad, params, optimization)

# test/validation set 
test_prediction = layers.get_output(network["output"], deterministic=True)
test_cost = build_cost(network, target_var, test_prediction, optimization)

# create theano function
train_fun = theano.function([input_var, target_var], [cost, prediction], updates=update_op)
test_fun = theano.function([input_var, target_var], [test_cost, test_prediction])

In [5]:
cost, prediction = test_fun(test[0].astype(np.float32), test[1].astype(np.int32)) 
cost

array(3.0125436782836914, dtype=float32)

In [6]:
network

{'conv1': <lasagne.layers.special.ParametricRectifierLayer at 0x7f2dabc59310>,
 'conv1_pool': <lasagne.layers.pool.MaxPool2DLayer at 0x7f2dabf82610>,
 'conv2': <lasagne.layers.special.ParametricRectifierLayer at 0x7f2dabff6210>,
 'conv2_dropout': <lasagne.layers.noise.DropoutLayer at 0x7f2dabff6750>,
 'conv2_pool': <lasagne.layers.pool.MaxPool2DLayer at 0x7f2dabff65d0>,
 'dense3': <lasagne.layers.special.ParametricRectifierLayer at 0x7f2dabff6bd0>,
 'dense3_dropout': <lasagne.layers.noise.DropoutLayer at 0x7f2dabf82750>,
 'input': <lasagne.layers.input.InputLayer at 0x7f2dabff6a10>,
 'output': <lasagne.layers.special.NonlinearityLayer at 0x7f2dabfd7f10>}