In [93]:
#/bin/python
import sys
import os
import numpy as np
sys.path.append('..')
from src import NeuralNet
from src import train as fit
from src import make_directory 
from models import load_model
from data import load_data
np.random.seed(247) # for reproducibility

#------------------------------------------------------------------------------
# load data


name = 'MotifSimulation_binary'
datapath = '/home/peter/Data/SequenceMotif'
filepath = os.path.join(datapath, 'N=100000_S=200_M=10_G=20_data.pickle')
train, valid, test = load_data(name, filepath)
shape = (None, train[0].shape[1], train[0].shape[2], train[0].shape[3])
num_labels = np.round(train[1].shape[1])

#-------------------------------------------------------------------------------------

# load model parameters
model_name = "binary_genome_motif_model"
nnmodel = NeuralNet(model_name, shape, num_labels)

loading data from: /home/peter/Data/SequenceMotif/N=100000_S=200_M=10_G=20_data.pickle
loading train data
loading cross-validation data
loading test data


In [94]:
from lasagne.layers import get_all_layers
all_layers = get_all_layers(nnmodel.network['output'])

def print_layers():
    print '-----------------------------------------------------------------------'
    print 'Network architecture:'
    print '-----------------------------------------------------------------------'
    counter = 1
    for layer in all_layers:
        output_shape = layer.output_shape
        params = layer.get_params()

        print 'layer'+str(counter) + ': '+ str(layer)
        print 'shape:' +  str(output_shape)
        if params:
            all_params = ''
            for param in params:
                all_params += str(param) + ', '
            print 'parameters: ' + str(all_params[0:-2])
        counter += 1
    print '-----------------------------------------------------------------------'

print_layers()

-----------------------------------------------------------------------
Network architecture:
-----------------------------------------------------------------------
layer1: <lasagne.layers.input.InputLayer object at 0x7f0ba44198d0>
shape:(None, 4, 200, 1)
layer2: <lasagne.layers.conv.Conv2DLayer object at 0x7f0ba4419850>
shape:(None, 200, 193, 1)
parameters: W
layer3: <lasagne.layers.normalization.BatchNormLayer object at 0x7f0ba4b34650>
shape:(None, 200, 193, 1)
parameters: beta, gamma, mean, inv_std
layer4: <lasagne.layers.special.NonlinearityLayer object at 0x7f0ba4419610>
shape:(None, 200, 193, 1)
layer5: <lasagne.layers.pool.MaxPool2DLayer object at 0x7f0ba548d9d0>
shape:(None, 200, 48, 1)
layer6: <lasagne.layers.conv.Conv2DLayer object at 0x7f0ba548d4d0>
shape:(None, 200, 41, 1)
parameters: W
layer7: <lasagne.layers.normalization.BatchNormLayer object at 0x7f0ba548dd90>
shape:(None, 200, 41, 1)
parameters: beta, gamma, mean, inv_std
layer8: <lasagne.layers.special.NonlinearityLa

In [95]:
from six.moves import cPickle
outputname = 'binary'
filepath = os.path.join(datapath, 'Results', outputname)
savepath = filepath + "_best.pickle"
f = open(savepath, 'rb')
best_parameters = cPickle.load(f)
f.close()

In [96]:
nnmodel.reinitialize()

# load model parameters for a given training epoch
savepath = filepath + "_epoch_" + str(1) + ".pickle"
f = open(savepath, 'rb')
best_parameters = cPickle.load(f)
f.close()

# get test metrics 
nnmodel.set_model_parameters(best_parameters)

In [97]:

def get_performance(savepath):
    with open(savepath, 'rb') as f:
        name = cPickle.load(f)
        cost = cPickle.load(f)
        metric = cPickle.load(f)
        metric_std = cPickle.load(f)
        roc = cPickle.load(f)
        pr = cPickle.load(f)
    return cost, metric, metric_std, roc, pr

savepath = filepath + "_train_performance.pickle"
train_cost, train_metric, train_metric_std, train_roc, trian_pr = get_performance(savepath)

savepath = filepath + "_cross-validation_performance.pickle"
valid_cost, valid_metric, valid_metric_std, valid_roc, valid_pr = get_performance(savepath)

savepath = filepath + "_test_all_performance.pickle"
test_cost, test_metric, test_metric_std, test_roc, test_pr = get_performance(savepath)


In [123]:
from matplotlib import pyplot as plt
def plot_loss(loss):
    """Plot trainig/validation/test loss during training"""

    fig = plt.figure()
    num_data_types = len(loss)
    if num_data_types == 2:
        plt.plot(loss[0], label='train loss', linewidth=2)
        plt.plot(loss[1], label='valid loss', linewidth=2)
    elif num_data_types == 3:
        plt.plot(loss[0], label='train loss', linewidth=2)
        plt.plot(loss[1], label='valid loss', linewidth=2)
        plt.plot(loss[2], label='test loss', linewidth=2)

    plt.xlabel('epoch', fontsize=22)
    plt.ylabel('loss', fontsize=22)
    plt.legend(loc='best', frameon=False, fontsize=18)
    return fig, plt

fig, plt = plot_loss([train_cost, valid_cost, test_cost])
plt.show()
fig.savefig('test.pdf', format='pdf')

In [141]:
savepath = filepath + "_test_performance.pickle"
final_cost, final_metric, final_metric_std, final_roc, final_pr = get_performance(savepath)

def plot_roc_all(final_roc):
    """Plot trainig/validation/test loss during training"""

    fig = plt.figure()
    for i in range(len(final_roc)):
        plt.plot(final_roc[i][0],final_roc[i][1], label=str(i))
    plt.xlabel('False positive rate', fontsize=22)
    plt.ylabel('True positive rate', fontsize=22)
    ax = plt.gca()
    ax.xaxis.label.set_fontsize(17)
    ax.yaxis.label.set_fontsize(17)
    map(lambda xl: xl.set_fontsize(13), ax.get_xticklabels())
    map(lambda yl: yl.set_fontsize(13), ax.get_yticklabels())
    plt.tight_layout()
    plt.legend(loc='best', frameon=False, fontsize=14)
    return fig, plt

fig, plt = plot_roc_all(final_roc)
plt.show()



In [139]:
def plot_pr_all(final_pr):
    """Plot trainig/validation/test loss during training"""

    fig = plt.figure()
    for i in range(len(final_roc)):
        plt.plot(final_pr[i][0],final_pr[i][1])
    plt.xlabel('Recall', fontsize=22)
    plt.ylabel('Product', fontsize=22)
    #plt.legend(loc='best', frameon=False, fontsize=14)
    return fig, plt

fig, plt = plot_pr_all(final_pr)
ax = plt.gca()
ax.xaxis.label.set_fontsize(17)
ax.yaxis.label.set_fontsize(17)
map(lambda xl: xl.set_fontsize(13), ax.get_xticklabels())
map(lambda yl: yl.set_fontsize(13), ax.get_yticklabels())
plt.tight_layout()
plt.show()
out_pdf = 'my.pdf'
#plt.savefig(out_pdf)
#plt.close()


#fig.savefig('test.pdf', format='pdf')

In [105]:
network = nnmodel.network
keys = network.keys()
filter_layers = []
for key in keys:
    if 'conv' in key:
        if hasattr(network[key], 'W'):
            filter_layers.append(key) 
filter_layers

['conv2', 'conv1']

In [107]:
import matplotlib.image as img

def plot_conv_weights(layer, figsize=(6, 6)):
    """nolearn's plot the weights of a specific layer"""

    W =  np.squeeze(layer.W.get_value())
    shape = W.shape
    nrows = np.ceil(np.sqrt(shape[0])).astype(int)
    ncols = nrows

    figs, axes = plt.subplots(nrows, ncols, figsize=figsize,frameon=False)

    for ax in axes.flatten():
        ax.set_xticks([])
        ax.set_yticks([])
        ax.axis('off')

    for i, ax in enumerate(axes.ravel()):
        if i >= shape[0]:
            break
        im = ax.imshow(W[i], cmap='gray', interpolation='nearest')

    return figs, axes

layer = network['conv2']
figs, axes = plot_conv_weights(layer)
figs.tight_layout()
plt.subplots_adjust(wspace=.001)
plt.show()

In [108]:
network = nnmodel.network
keys = network.keys()
filter_layers = []
for key in keys:
    if 'dense' in key:
        if hasattr(network[key], 'W'):
            filter_layers.append(key) 
filter_layers


['dense']

In [110]:
def plot_weights(weights):
    fig = plt.figure(figsize=(6, 6))
    plt.imshow(weights.T, cmap='gray')
    plt.xticks([])
    plt.yticks([])
    plt.show()
    return plt

layer = network['dense']
weights = layer.W.get_value()
plt = plot_weights(weights)
plt.show()

In [None]:
import theano.tensor as T
import theano
from lasagne.layers import get_output

def plot_conv_activity(activity, figsize=(6, 8)):
    """nolearn's plot the acitivities of a specific layer.
        x : numpy.ndarray (1 data point) """

    fig = plt.figure()
    shape = activity.shape
    nrows = np.ceil(np.sqrt(shape[1])).astype(int)
    ncols = nrows

    figs, axes = plt.subplots(nrows + 1, ncols, figsize=figsize)
    axes[0, ncols // 2].imshow(1 - x[0][0], cmap='gray', interpolation='nearest')
    axes[0, ncols // 2].set_title('original')

    for ax in axes.flatten():
        ax.set_xticks([])
        ax.set_yticks([])
        ax.axis('off')

    for i, ax in enumerate(axes.ravel()):    
        if i >= shape[1]:
            break
        ndim = activity[0][i].ndim
        if ndim != 2:
            raise ValueError("Wrong number of dimensions, image data should "
                             "have 2, instead got {}".format(ndim))
        ax.imshow(-activity[0][i], cmap='gray', interpolation='nearest')
    plt.show()
    return plt

layer = network['conv1']
x = np.expand_dims(test[0][0].astype(np.float32), axis=0)

# compile theano function
input_var = T.tensor4('input').astype(theano.config.floatX)
get_activity = theano.function([input_var], get_output(layer, input_var))

# get activation info
activity = get_activity(x)
plot_conv_activity(activity)
activity.shape



In [None]:
activity = np.squeeze(activity)

fig = plt.figure()
plt.imshow(activity, cmap='gray')
plt.xticks([])
plt.yticks([])
plt.show()
activity.shape


In [None]:

layer = network['conv1']
W =  np.squeeze(layer.W.get_value())
weights = W[0]

weights = weights/sum()

In [None]:
np.savetxt('test.table', W[0], delimiter='\t')

AttributeError: 'str' object has no attribute 'input_layer'

In [None]:

def occlusion_heatmap(net, x, target, square_length=7):
    """An occlusion test that checks an image for its critical parts.
    In this function, a square part of the image is occluded (i.e. set
    to 0) and then the net is tested for its propensity to predict the
    correct label. One should expect that this propensity shrinks of
    critical parts of the image are occluded. If not, this indicates
    overfitting.
    Depending on the depth of the net and the size of the image, this
    function may take awhile to finish, since one prediction for each
    pixel of the image is made.
    Currently, all color channels are occluded at the same time. Also,
    this does not really work if images are randomly distorted by the
    batch iterator.
    See paper: Zeiler, Fergus 2013
    Parameters
    ----------
    net : NeuralNet instance
      The neural net to test.
    x : np.array
      The input data, should be of shape (1, c, x, y). Only makes
      sense with image data.
    target : int
      The true value of the image. If the net makes several
      predictions, say 10 classes, this indicates which one to look
      at.
    square_length : int (default=7)
      The length of the side of the square that occludes the image.
      Must be an odd number.
    Results
    -------
    heat_array : np.array (with same size as image)
      An 2D np.array that at each point (i, j) contains the predicted
      probability of the correct class if the image is occluded by a
      square with center (i, j).
    """
    if (x.ndim != 4) or x.shape[0] != 1:
        raise ValueError("This function requires the input data to be of "
                         "shape (1, c, x, y), instead got {}".format(x.shape))
    if square_length % 2 == 0:
        raise ValueError("Square length has to be an odd number, instead "
                         "got {}.".format(square_length))

    num_classes = get_output_shape(net.layers_[-1])[1]
    img = x[0].copy()
    bs, col, s0, s1 = x.shape

    heat_array = np.zeros((s0, s1))
    pad = square_length // 2 + 1
    x_occluded = np.zeros((s1, col, s0, s1), dtype=img.dtype)
    probs = np.zeros((s0, s1, num_classes))

    # generate occluded images
    for i in range(s0):
        # batch s1 occluded images for faster prediction
        for j in range(s1):
            x_pad = np.pad(img, ((0, 0), (pad, pad), (pad, pad)), 'constant')
            x_pad[:, i:i + square_length, j:j + square_length] = 0.
            x_occluded[j] = x_pad[:, pad:-pad, pad:-pad]
        y_proba = net.predict_proba(x_occluded)
        probs[i] = y_proba.reshape(s1, num_classes)

    # from predicted probabilities, pick only those of target class
    for i in range(s0):
        for j in range(s1):
            heat_array[i, j] = probs[i, j, target]
    return heat_array



def plot_occlusion(net, X, target, square_length=7, figsize=(9, None)):
    """Plot which parts of an image are particularly import for the
    net to classify the image correctly.
    See paper: Zeiler, Fergus 2013
    Parameters
    ----------
    net : NeuralNet instance
      The neural net to test.
    X : numpy.array
      The input data, should be of shape (b, c, 0, 1). Only makes
      sense with image data.
    target : list or numpy.array of ints
      The true values of the image. If the net makes several
      predictions, say 10 classes, this indicates which one to look
      at. If more than one sample is passed to X, each of them needs
      its own target.
    square_length : int (default=7)
      The length of the side of the square that occludes the image.
      Must be an odd number.
    figsize : tuple (int, int)
      Size of the figure.
    Plots
    -----
    Figure with 3 subplots: the original image, the occlusion heatmap,
    and both images super-imposed.
    """
    return _plot_heat_map(net, X, figsize, lambda net, X, n: occlusion_heatmap(net, X, target[n], square_length))
