Copyright (C) Egon Kidmose 2015-2017

This file is part of lstm-rnn-correlation.

lstm-rnn-correlation is free software: you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.

lstm-rnn-correlation is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with lstm-rnn-correlation. If not, see
<http://www.gnu.org/licenses/>.


# Learning how to tie weights together
The purpose of this notebook is to learn/demonstrate how the weights of two layers can be tied together, such that the weights are always the same.

         IN
       /    \ 
     L1      L2
     |        |
    OUT1  == OUT2

In [None]:
from __future__ import print_function

import sys
import os
import time

import numpy as np
import theano
import theano.tensor as T

import lasagne
from lasagne.layers import *
from lasagne.nonlinearities import *

In [None]:
def load_dataset():
    def get_xors(n):
        inputs = np.random.randint(0, 2, (n, 2)).astype(bool)
        return inputs, inputs[:,0]^inputs[:,1]
    
    X_train, y_train = get_xors(1000)
    X_val, y_val = get_xors(1000)
    X_test, y_test = get_xors(1000)
    return X_train, y_train, X_val, y_val, X_test, y_test

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]

In [None]:
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()

input_var = T.imatrix('inputs')
target_var = T.ivector('targets')

# Create training network
net = InputLayer(shape=(None, 2), input_var=input_var, name='INPUT-LAYER')
net = DenseLayer(net, num_units=3, nonlinearity=sigmoid, name='SIGMOID-LAYER')
net = DenseLayer(net, num_units=2, nonlinearity=softmax, name='OUTPUT-LAYER')

# Create an identical test network, with tied weights
test_net = InputLayer(shape=(None, 2), input_var=input_var, name='TEST-INPUT-LAYER')
for l in lasagne.layers.get_all_layers(net):
    print("{} ({}):".format(l.name, l))
    if isinstance(l, InputLayer):
        print(' - skipping')
    elif isinstance(l, DenseLayer):
        test_net = DenseLayer(
            test_net, num_units=l.num_units, nonlinearity=l.nonlinearity, name='TEST-'+l.name, 
            W=l.W, l.b=b,
        )
        print(' - added layer: {} ({})'.format(get_all_layers(test_net)[-1], get_all_layers(test_net)[-1].name))
    else:
        raise ValueError("Unhandled layer")

In [None]:
# Training
prediction = lasagne.layers.get_output(net)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
params = lasagne.layers.get_all_params(net, trainable=True)
updates = lasagne.updates.sgd(loss, params, learning_rate=0.1)

# Testing
test_prediction = lasagne.layers.get_output(test_net, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                        target_var)
test_loss = test_loss.mean()
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                  dtype=theano.config.floatX)

train_fn = theano.function([input_var, target_var], loss, updates=updates)
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

In [None]:
print("Starting training...")
num_epochs = 5000
for epoch in range(num_epochs):
    train_err = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(X_train, y_train, 100, shuffle=True):
        inputs, targets = batch
        train_err += train_fn(inputs, targets)
        train_batches += 1

    val_err = 0
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(X_val, y_val, 100, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        val_err += err
        val_acc += acc
        val_batches += 1

    """print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))"""

test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 100, shuffle=False):
    inputs, targets = batch
    err, acc = val_fn(inputs, targets)
    test_err += err
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))

In [None]:
np.set_printoptions(precision=1) 
print('trained network paramters:')
for l in get_all_layers(net):
    print(l)
    print(' {}'.format(l.name))
    for p in l.get_params():
        print(' {}'.format(p))
        print('  {}'.format(p.get_value()))


print()

print('test network paramters:')
for l in get_all_layers(test_net):
    print(l)
    print(' {}'.format(l.name))
    for p in l.get_params():
        print(' {}'.format(p))
        print('  {}'.format(p.get_value()))

