In [1]:
#/bin/python
import sys
import os
import numpy as np
sys.path.append('..')
from src import NeuralNet
from src import train as fit
from src import make_directory 
from models import load_model
from data import load_data
from six.moves import cPickle
np.random.seed(247) # for reproducibility
import theano
import theano.tensor as T
from lasagne import layers, objectives, updates, regularization
import matplotlib.pyplot as plt
%matplotlib inline


Using gpu device 0: GeForce GTX 980 (CNMeM is disabled, CuDNN 4007)


In [2]:
name = 'MotifSimulation_correlated'
datapath = '/home/peter/Data/SequenceMotif'
filepath = os.path.join(datapath, 'synthetic_correlated_motifs_100000.hdf5')

In [3]:
train, valid, test = load_data(name, filepath)
shape = (None, train[0].shape[1], train[0].shape[2], train[0].shape[3])
num_labels = np.round(train[1].shape[1])

# calculate correlations
labels = np.vstack([train[1], valid[1]])
N = labels.shape[0]
rho = np.zeros((num_labels, num_labels))
for i in range(num_labels):
    p_i = np.sum(labels[:,i])/N
    for j in range(i):
        p_j = np.sum(labels[:,j])/N    
        p_ij = np.sum(labels[:,i]*labels[:,j])/N
        norm = np.sqrt(p_i*(1-p_i)) * np.sqrt(p_j*(1-p_j))
        rho[i,j] = (p_ij - p_i*p_j)/norm
f = open('/home/peter/Code/Deepomics/examples/rho.pickle','wb')
cPickle.dump(rho, f)
f.close()

plt.imshow(optimization["rho"])

In [38]:
model_name = "binary_genome_motif_model"
network, input_var, target_var, optimization = load_model(model_name, shape, num_labels)

In [39]:
def build_loss(prediction, target_var, optimization):
    loss = -(target_var*T.log(prediction) + (1.0-target_var)*T.log(1.0-prediction))
    
    prediction = T.clip(prediction, 1e-7, 1-1e-7)
    u = (target_var - prediction)/T.sqrt(prediction*(1-prediction))
    diag = T.diag(T.dot(optimization["rho"], u.T).dot(u))
    loss += T.log(1+T.sum(diag[1:]))
    return loss.mean(), diag, u


# build loss function
prediction = layers.get_output(network['output'], deterministic=False)
loss, diag, u = build_loss(target_var, prediction, optimization)

# calculate and clip gradients
params = layers.get_all_params(network['output'], trainable=True)    
grad = T.grad(loss, params)

# setup parameter updates
update_op = updates.adam(grad, params, 
                        learning_rate=optimization['learning_rate'], 
                        beta1=optimization['beta1'], 
                        beta2=optimization['beta2'], 
                        epsilon=optimization['epsilon'])

# test/validation set 
test_prediction = layers.get_output(network['output'], deterministic=True)
test_loss, diag2, u2 = build_loss(target_var, test_prediction, optimization)

# create theano function
train_fun = theano.function([input_var, target_var], [loss, prediction, diag, u, grad], updates=update_op)
test_fun = theano.function([input_var, target_var], [test_loss, test_prediction])

In [40]:
def batch_generator(X, y, batch_size=128, shuffle=True):
    """python generator to get a randomized minibatch"""
    """
    while True:
        idx = np.random.choice(len(y), N)
        yield X[idx].astype('float32'), y[idx].astype('int32')
    """
    if shuffle:
        indices = np.arange(len(X))
        np.random.shuffle(indices)
    for start_idx in range(0, len(X)-batch_size+1, batch_size):
        if shuffle:
            excerpt = indices[start_idx:start_idx+batch_size]
        else:
            excerpt = slice(start_idx, start_idx+batch_size)
        yield X[excerpt], y[excerpt]
        
batch_size = 100
num_batches = train[0].shape[0] // batch_size
batches = batch_generator(train[0], train[1], batch_size)
value = 0
    

In [41]:
X, y = next(batches)
loss, prediction, diag, u = train_fun(X, y)
print loss

inf


In [46]:
loss = -(y*np.log(prediction) + (1.0-y)*np.log(1.0-prediction))
prediction = np.clip(prediction, 1e-7, 1-1e-7)
u = (y - prediction)/np.sqrt(prediction*(1-prediction))
diag = np.diag(np.dot(optimization["rho"], u.T).dot(u))
loss += np.log(1+np.sum(diag[1:]))
loss.mean()

11.830082

In [None]:

loss = objectives.aggregate(loss, mode='mean')