In [1]:
import os
import sys
import traceback

import numpy
import theano
import theano.tensor as t_func
from theano.tensor.shared_randomstreams import RandomStreams

try:
    import cPickle as pickle
except ImportError:
    import pickle as pickle

from ExpUtils import *
from theano_func.CostFunc import get_cost_type_semi
from theano_func.source import optimizers
from theano_func.source import costs
from theano_func.models.fnn_mnist_semisup import FNN_MNIST
from collections import OrderedDict
import load_data
%load_ext autoreload
%autoreload 2

In [None]:
def train(args):

    numpy.random.seed(int(args['seed']))

    dataset = load_data.load_mnist_for_semi_sup(n_l=int(args['size']),
                                                n_v=int(args['num_validation_samples']))

    x_train, t_train, ul_x_train = dataset[0]
    x_test, t_test = dataset[2]

    numpy.random.seed(int(args['seed']))
    layer_sizes = [int(layer_size) for layer_size in args['layer_sizes'].split('-')]
    model = FNN_MNIST(layer_sizes=layer_sizes)

    x = t_func.matrix()
    ul_x = t_func.matrix()
    t = t_func.ivector()

    cost_semi = get_cost_type_semi(model, x, t, ul_x, args)
    nll = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_test)
    error = costs.error(x=x, t=t, forward_func=model.forward_test)

    optimizer = optimizers.ADAM(cost=cost_semi, params=model.params, alpha=float(args['lr']))

    index = t_func.iscalar()
    ul_index = t_func.iscalar()
    batch_size = int(args['batch_size'])
    ul_batch_size = int(args['ul_batch_size'])

    f_train = theano.function(inputs=[index, ul_index], outputs=cost_semi, updates=optimizer.updates,
                              givens={
                                  x: x_train[batch_size * index:batch_size * (index + 1)],
                                  t: t_train[batch_size * index:batch_size * (index + 1)],
                                  ul_x: ul_x_train[ul_batch_size * ul_index:ul_batch_size * (ul_index + 1)]},
                              on_unused_input='ignore')
    f_nll_train = theano.function(inputs=[index], outputs=nll,
                                  givens={
                                      x: x_train[batch_size * index:batch_size * (index + 1)],
                                      t: t_train[batch_size * index:batch_size * (index + 1)]})
    f_nll_test = theano.function(inputs=[index], outputs=nll,
                                 givens={
                                     x: x_test[batch_size * index:batch_size * (index + 1)],
                                     t: t_test[batch_size * index:batch_size * (index + 1)]})

    f_error_train = theano.function(inputs=[index], outputs=error,
                                    givens={
                                        x: x_train[batch_size * index:batch_size * (index + 1)],
                                        t: t_train[batch_size * index:batch_size * (index + 1)]})
    f_error_test = theano.function(inputs=[index], outputs=error,
                                   givens={
                                       x: x_test[batch_size * index:batch_size * (index + 1)],
                                       t: t_test[batch_size * index:batch_size * (index + 1)]})

    f_lr_decay = theano.function(inputs=[], outputs=optimizer.alpha,
                                 updates={optimizer.alpha: theano.shared(
                                     numpy.array(args['lr_decay']).astype(
                                         theano.config.floatX)) * optimizer.alpha})

    # Shuffle training set
    randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=x_train.shape[0])
    update_permutation = OrderedDict()
    update_permutation[x_train] = x_train[randix]
    update_permutation[t_train] = t_train[randix]
    f_permute_train_set = theano.function(inputs=[], outputs=x_train, updates=update_permutation)

    # Shuffle unlabeled training set
    ul_randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=ul_x_train.shape[0])
    update_ul_permutation = OrderedDict()
    update_ul_permutation[ul_x_train] = ul_x_train[ul_randix]
    f_permute_ul_train_set = theano.function(inputs=[], outputs=ul_x_train, updates=update_ul_permutation)

    statuses = {'nll_train': [], 'error_train': [], 'nll_test': [], 'error_test': []}

    n_train = x_train.get_value().shape[0]
    n_test = x_test.get_value().shape[0]
    n_ul_train = ul_x_train.get_value().shape[0]

    l_i = 0
    ul_i = 0
    for epoch in range(int(args['epochs'])):

        f_permute_train_set()
        f_permute_ul_train_set()
        for it in range(int(args['iterations'])):
            f_train(l_i, ul_i)
            l_i = 0 if l_i >= n_train / batch_size - 1 else l_i + 1
            ul_i = 0 if ul_i >= n_ul_train / ul_batch_size - 1 else ul_i + 1

        sum_nll_train = numpy.sum(numpy.array([f_nll_train(i) for i in range(n_train // batch_size)])) * batch_size
        sum_error_train = numpy.sum(numpy.array([f_error_train(i) for i in range(n_train // batch_size)]))
        sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in range(n_test // batch_size)])) * batch_size
        sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in range(n_test // batch_size)]))
        statuses['nll_train'].append(sum_nll_train / n_train)
        statuses['error_train'].append(sum_error_train)
        statuses['nll_test'].append(sum_nll_test / n_test)
        statuses['error_test'].append(sum_error_test)
        wlog("[Epoch] %d" % epoch)
        acc = 1 - 1.0*statuses['error_test'][-1]/n_test
        wlog("nll_train: %f error_train : %d nll_test : %f error_test : %d accuracy:%f" % (
            statuses['nll_train'][-1], statuses['error_train'][-1], statuses['nll_test'][-1],
            statuses['error_test'][-1], acc))
        if args["vis"]:
            saver.writer.add_scalar("Train/Loss", statuses['nll_train'][-1], epoch)
            saver.writer.add_scalar("Train/Error", statuses['error_train'][-1], epoch)
            saver.writer.add_scalar("Test/Loss", statuses['nll_test'][-1], epoch)
            saver.writer.add_scalar("Test/Acc", acc, epoch)
        f_lr_decay()
    # fine_tune batch stat
    f_fine_tune = theano.function(inputs=[ul_index], outputs=model.forward_for_finetuning_batch_stat(x),
                                  givens={x: ul_x_train[ul_batch_size * ul_index:ul_batch_size * (ul_index + 1)]})
    [f_fine_tune(i) for i in range(n_ul_train // ul_batch_size)]

    sum_nll_train = numpy.sum(numpy.array([f_nll_train(i) for i in range(n_train // batch_size)])) * batch_size
    sum_error_train = numpy.sum(numpy.array([f_error_train(i) for i in range(n_train // batch_size)]))
    sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in range(n_test // batch_size)])) * batch_size
    sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in range(n_test // batch_size)]))
    statuses['nll_train'].append(sum_nll_train / n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test / n_test)
    statuses['error_test'].append(sum_error_test)
    wlog("final nll_train: %f error_train: %d nll_test: %f error_test: %d accuracy:%f" % (
        statuses['nll_train'][-1], statuses['error_train'][-1], statuses['nll_test'][-1],
        statuses['error_test'][-1], 1 - 1.0*statuses['error_test'][-1]/n_test))
    if args["vis"]:
        saver.writer.add_scalar("Train/Loss", statuses['nll_train'][-1], epoch)
        saver.writer.add_scalar("Train/Error", statuses['error_train'][-1], epoch)
        saver.writer.add_scalar("Test/Loss", statuses['nll_test'][-1], epoch)
        saver.writer.add_scalar("Test/Acc", acc, epoch)

    error_test = numpy.array(statuses['error_test'])
    saver.save_npy(numpy.array(statuses['nll_train']), "train_nll")
    saver.save_npy(numpy.array(statuses['error_train']), "train_error")
    saver.save_npy(numpy.array(statuses['nll_test']), "test_nll")
    saver.save_npy(error_test, "test_error")
    saver.save_npy(1 - 1.0 * error_test / n_test, "test_acc")


arg = {'seed': 1,
       'size': 100,
       'vis': False,
       'epochs': 100,
       'iterations': 400,
       'lr_decay': 0.95,
       'batch_size': 100,
       'ul_batch_size': 250,
       'num_validation_samples': 1000,
       'layer_sizes': '784-1200-1200-10',
       'dataset': 'mnist',
       'epsilon': 0.3,
       'cost_type': 'MLE',
       'norm_constraint': 'L2',
       'num_power_iter': 1,
       'lamb': 1,
       'lr': 0.001
      }
train(arg)

2019-03-08 23:00:42,961 - <ipython-input-2-7791375fbe33>[line:97]: [Epoch] 0
2019-03-08 23:00:43,076 - <ipython-input-2-7791375fbe33>[line:101]: nll_train: 0.113660 error_train : 0 nll_test : 0.988779 error_test : 2475 accuracy:0.752500
2019-03-08 23:01:14,481 - <ipython-input-2-7791375fbe33>[line:97]: [Epoch] 1
2019-03-08 23:01:14,482 - <ipython-input-2-7791375fbe33>[line:101]: nll_train: 0.059409 error_train : 0 nll_test : 0.908701 error_test : 2475 accuracy:0.752500
