In [1]:
import os
import sys
import traceback

import numpy
import theano
import theano.tensor as t_func
from theano.tensor.shared_randomstreams import RandomStreams

try:
    import cPickle as pickle
except ImportError:
    import pickle as pickle

from ExpUtils import *
from theano_func.CostFunc import get_cost_type_semi
from theano_func.source import optimizers
from theano_func.source import costs
from theano_func.models.fnn_mnist_semisup import FNN_MNIST
from collections import OrderedDict
from theano_func import load_data
%load_ext autoreload
%autoreload 2

In [2]:
def train(args):

    numpy.random.seed(int(args['seed']))

    dataset = load_data.load_mnist_for_semi_sup(n_l=int(args['size']),
                                                n_v=int(args['num_validation_samples']))

    x_train, t_train, ul_x_train = dataset[0]
    x_test, t_test = dataset[2]

    numpy.random.seed(int(args['seed']))
    layer_sizes = [int(layer_size) for layer_size in args['layer_sizes'].split('-')]
    model = FNN_MNIST(layer_sizes=layer_sizes)

    x = t_func.matrix()
    ul_x = t_func.matrix()
    t = t_func.ivector()

    cost_semi = get_cost_type_semi(model, x, t, ul_x, args)
    nll = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_test)
    error = costs.error(x=x, t=t, forward_func=model.forward_test)

    optimizer = optimizers.ADAM(cost=cost_semi, params=model.params, alpha=float(args['lr']))

    index = t_func.iscalar()
    ul_index = t_func.iscalar()
    batch_size = int(args['batch_size'])
    ul_batch_size = int(args['ul_batch_size'])

    f_train = theano.function(inputs=[index, ul_index], outputs=cost_semi, updates=optimizer.updates,
                              givens={
                                  x: x_train[batch_size * index:batch_size * (index + 1)],
                                  t: t_train[batch_size * index:batch_size * (index + 1)],
                                  ul_x: ul_x_train[ul_batch_size * ul_index:ul_batch_size * (ul_index + 1)]},
                              on_unused_input='ignore')
    f_nll_train = theano.function(inputs=[index], outputs=nll,
                                  givens={
                                      x: x_train[batch_size * index:batch_size * (index + 1)],
                                      t: t_train[batch_size * index:batch_size * (index + 1)]})
    f_nll_test = theano.function(inputs=[index], outputs=nll,
                                 givens={
                                     x: x_test[batch_size * index:batch_size * (index + 1)],
                                     t: t_test[batch_size * index:batch_size * (index + 1)]})

    f_error_train = theano.function(inputs=[index], outputs=error,
                                    givens={
                                        x: x_train[batch_size * index:batch_size * (index + 1)],
                                        t: t_train[batch_size * index:batch_size * (index + 1)]})
    f_error_test = theano.function(inputs=[index], outputs=error,
                                   givens={
                                       x: x_test[batch_size * index:batch_size * (index + 1)],
                                       t: t_test[batch_size * index:batch_size * (index + 1)]})

    f_lr_decay = theano.function(inputs=[], outputs=optimizer.alpha,
                                 updates={optimizer.alpha: theano.shared(
                                     numpy.array(args['lr_decay']).astype(
                                         theano.config.floatX)) * optimizer.alpha})

    # Shuffle training set
    randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=x_train.shape[0])
    update_permutation = OrderedDict()
    update_permutation[x_train] = x_train[randix]
    update_permutation[t_train] = t_train[randix]
    f_permute_train_set = theano.function(inputs=[], outputs=x_train, updates=update_permutation)

    # Shuffle unlabeled training set
    ul_randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=ul_x_train.shape[0])
    update_ul_permutation = OrderedDict()
    update_ul_permutation[ul_x_train] = ul_x_train[ul_randix]
    f_permute_ul_train_set = theano.function(inputs=[], outputs=ul_x_train, updates=update_ul_permutation)

    statuses = {'nll_train': [], 'error_train': [], 'nll_test': [], 'error_test': []}

    n_train = x_train.get_value().shape[0]
    n_test = x_test.get_value().shape[0]
    n_ul_train = ul_x_train.get_value().shape[0]

    l_i = 0
    ul_i = 0
    for epoch in range(int(args['epochs'])):

        f_permute_train_set()
        f_permute_ul_train_set()
        for it in range(int(args['iterations'])):
            f_train(l_i, ul_i)
            l_i = 0 if l_i >= n_train / batch_size - 1 else l_i + 1
            ul_i = 0 if ul_i >= n_ul_train / ul_batch_size - 1 else ul_i + 1

        sum_nll_train = numpy.sum(numpy.array([f_nll_train(i) for i in range(n_train // batch_size)])) * batch_size
        sum_error_train = numpy.sum(numpy.array([f_error_train(i) for i in range(n_train // batch_size)]))
        sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in range(n_test // batch_size)])) * batch_size
        sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in range(n_test // batch_size)]))
        statuses['nll_train'].append(sum_nll_train / n_train)
        statuses['error_train'].append(sum_error_train)
        statuses['nll_test'].append(sum_nll_test / n_test)
        statuses['error_test'].append(sum_error_test)
        wlog("[Epoch] %d" % epoch)
        acc = 1 - 1.0*statuses['error_test'][-1]/n_test
        wlog("nll_train: %f error_train : %d nll_test : %f error_test : %d accuracy:%f" % (
            statuses['nll_train'][-1], statuses['error_train'][-1], statuses['nll_test'][-1],
            statuses['error_test'][-1], acc))
        if args["vis"]:
            saver.writer.add_scalar("Train/Loss", statuses['nll_train'][-1], epoch)
            saver.writer.add_scalar("Train/Error", statuses['error_train'][-1], epoch)
            saver.writer.add_scalar("Test/Loss", statuses['nll_test'][-1], epoch)
            saver.writer.add_scalar("Test/Acc", acc, epoch)
        f_lr_decay()
    # fine_tune batch stat
    f_fine_tune = theano.function(inputs=[ul_index], outputs=model.forward_for_finetuning_batch_stat(x),
                                  givens={x: ul_x_train[ul_batch_size * ul_index:ul_batch_size * (ul_index + 1)]})
    [f_fine_tune(i) for i in range(n_ul_train // ul_batch_size)]

    sum_nll_train = numpy.sum(numpy.array([f_nll_train(i) for i in range(n_train // batch_size)])) * batch_size
    sum_error_train = numpy.sum(numpy.array([f_error_train(i) for i in range(n_train // batch_size)]))
    sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in range(n_test // batch_size)])) * batch_size
    sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in range(n_test // batch_size)]))
    statuses['nll_train'].append(sum_nll_train / n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test / n_test)
    statuses['error_test'].append(sum_error_test)
    wlog("final nll_train: %f error_train: %d nll_test: %f error_test: %d accuracy:%f" % (
        statuses['nll_train'][-1], statuses['error_train'][-1], statuses['nll_test'][-1],
        statuses['error_test'][-1], 1 - 1.0*statuses['error_test'][-1]/n_test))

    error_test = numpy.array(statuses['error_test'])

os.environ['THEANO_FLAGS'] = "device=cuda%s,floatX=float32" % "1"

In [4]:

arg = {'seed': 1,
       'size': 100,
       'vis': False,
       'epochs': 100,
       'iterations': 400,
       'lr_decay': 0.95,
       'batch_size': 100,
       'ul_batch_size': 250,
       'num_validation_samples': 1000,
       'layer_sizes': '784-1200-1200-10',
       'dataset': 'mnist',
       'epsilon': 0.3,
       'cost_type': 'MLE',
       'norm_constraint': 'L2',
       'num_power_iter': 1,
       'lamb': 1,
       'lr': 0.001
      }
train(arg)

2019-03-11 04:39:11,744 - <ipython-input-2-df82ab415009>[line:97]: [Epoch] 0
2019-03-11 04:39:11,746 - <ipython-input-2-df82ab415009>[line:101]: nll_train: 0.113660 error_train : 0 nll_test : 0.988779 error_test : 2475 accuracy:0.752500
2019-03-11 04:40:02,140 - <ipython-input-2-df82ab415009>[line:97]: [Epoch] 1
2019-03-11 04:40:02,141 - <ipython-input-2-df82ab415009>[line:101]: nll_train: 0.059409 error_train : 0 nll_test : 0.908701 error_test : 2475 accuracy:0.752500
2019-03-11 04:40:54,038 - <ipython-input-2-df82ab415009>[line:97]: [Epoch] 2
2019-03-11 04:40:54,039 - <ipython-input-2-df82ab415009>[line:101]: nll_train: 0.036733 error_train : 0 nll_test : 0.865169 error_test : 2475 accuracy:0.752500
2019-03-11 04:41:44,834 - <ipython-input-2-df82ab415009>[line:97]: [Epoch] 3
2019-03-11 04:41:44,835 - <ipython-input-2-df82ab415009>[line:101]: nll_train: 0.025031 error_train : 0 nll_test : 0.838034 error_test : 2475 accuracy:0.752500
2019-03-11 04:42:35,506 - <ipython-input-2-df82ab415

2019-03-11 05:11:21,801 - <ipython-input-2-df82ab415009>[line:97]: [Epoch] 35
2019-03-11 05:11:21,803 - <ipython-input-2-df82ab415009>[line:101]: nll_train: 0.000141 error_train : 0 nll_test : 0.798885 error_test : 2473 accuracy:0.752700
2019-03-11 05:12:16,400 - <ipython-input-2-df82ab415009>[line:97]: [Epoch] 36
2019-03-11 05:12:16,402 - <ipython-input-2-df82ab415009>[line:101]: nll_train: 0.000126 error_train : 0 nll_test : 0.801445 error_test : 2473 accuracy:0.752700
2019-03-11 05:13:11,845 - <ipython-input-2-df82ab415009>[line:97]: [Epoch] 37
2019-03-11 05:13:11,846 - <ipython-input-2-df82ab415009>[line:101]: nll_train: 0.000113 error_train : 0 nll_test : 0.804019 error_test : 2473 accuracy:0.752700
2019-03-11 05:14:06,957 - <ipython-input-2-df82ab415009>[line:97]: [Epoch] 38
2019-03-11 05:14:06,959 - <ipython-input-2-df82ab415009>[line:101]: nll_train: 0.000102 error_train : 0 nll_test : 0.806600 error_test : 2473 accuracy:0.752700
2019-03-11 05:15:04,319 - <ipython-input-2-df82a

2019-03-11 05:38:58,407 - <ipython-input-2-df82ab415009>[line:97]: [Epoch] 70
2019-03-11 05:38:58,408 - <ipython-input-2-df82ab415009>[line:101]: nll_train: 0.000013 error_train : 0 nll_test : 0.867603 error_test : 2474 accuracy:0.752600
2019-03-11 05:39:43,176 - <ipython-input-2-df82ab415009>[line:97]: [Epoch] 71
2019-03-11 05:39:43,178 - <ipython-input-2-df82ab415009>[line:101]: nll_train: 0.000012 error_train : 0 nll_test : 0.868637 error_test : 2474 accuracy:0.752600
2019-03-11 05:40:28,245 - <ipython-input-2-df82ab415009>[line:97]: [Epoch] 72
2019-03-11 05:40:28,247 - <ipython-input-2-df82ab415009>[line:101]: nll_train: 0.000012 error_train : 0 nll_test : 0.869627 error_test : 2474 accuracy:0.752600
2019-03-11 05:41:13,799 - <ipython-input-2-df82ab415009>[line:97]: [Epoch] 73
2019-03-11 05:41:13,802 - <ipython-input-2-df82ab415009>[line:101]: nll_train: 0.000012 error_train : 0 nll_test : 0.870575 error_test : 2474 accuracy:0.752600
2019-03-11 05:41:59,996 - <ipython-input-2-df82a

In [7]:

arg = {'seed': 1,
       'size': 1000,
       'vis': False,
       'epochs': 100,
       'iterations': 400,
       'lr_decay': 0.95,
       'batch_size': 100,
       'ul_batch_size': 250,
       'num_validation_samples': 1000,
       'layer_sizes': '784-1200-1200-10',
       'dataset': 'mnist',
       'epsilon': 0.3,
       'cost_type': 'MLE',
       'norm_constraint': 'L2',
       'num_power_iter': 1,
       'lamb': 1,
       'lr': 0.001
      }
train(arg)

2019-03-11 01:40:13,528 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 0
2019-03-11 01:40:13,531 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.127680 error_train : 0 nll_test : 0.473476 error_test : 704 accuracy:0.929600
2019-03-11 01:41:03,025 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 1
2019-03-11 01:41:03,028 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.063812 error_train : 0 nll_test : 0.392298 error_test : 725 accuracy:0.927500
2019-03-11 01:41:51,856 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 2
2019-03-11 01:41:51,858 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.040191 error_train : 0 nll_test : 0.344783 error_test : 697 accuracy:0.930300
2019-03-11 01:42:37,339 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 3
2019-03-11 01:42:37,341 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.026402 error_train : 0 nll_test : 0.318617 error_test : 697 accuracy:0.930300
2019-03-11 01:43:23,362 - <ipython-input-6-aee519e8df5c>

2019-03-11 02:07:33,178 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 35
2019-03-11 02:07:33,180 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.000158 error_train : 0 nll_test : 0.243054 error_test : 722 accuracy:0.927800
2019-03-11 02:08:19,538 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 36
2019-03-11 02:08:19,540 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.000145 error_train : 0 nll_test : 0.245175 error_test : 732 accuracy:0.926800
2019-03-11 02:09:05,825 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 37
2019-03-11 02:09:05,827 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.000137 error_train : 0 nll_test : 0.244926 error_test : 728 accuracy:0.927200
2019-03-11 02:09:52,091 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 38
2019-03-11 02:09:52,093 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.000116 error_train : 0 nll_test : 0.247328 error_test : 719 accuracy:0.928100
2019-03-11 02:10:36,051 - <ipython-input-6-aee519e8d

2019-03-11 02:34:44,930 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 70
2019-03-11 02:34:44,931 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.000020 error_train : 0 nll_test : 0.263617 error_test : 735 accuracy:0.926500
2019-03-11 02:35:30,961 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 71
2019-03-11 02:35:30,963 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.000020 error_train : 0 nll_test : 0.266969 error_test : 730 accuracy:0.927000
2019-03-11 02:36:17,490 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 72
2019-03-11 02:36:17,492 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.000019 error_train : 0 nll_test : 0.265538 error_test : 725 accuracy:0.927500
2019-03-11 02:37:03,996 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 73
2019-03-11 02:37:03,998 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.000018 error_train : 0 nll_test : 0.266069 error_test : 738 accuracy:0.926200
2019-03-11 02:37:46,480 - <ipython-input-6-aee519e8d

In [8]:

arg = {'seed': 1,
       'size': 10000,
       'vis': False,
       'epochs': 100,
       'iterations': 400,
       'lr_decay': 0.95,
       'batch_size': 100,
       'ul_batch_size': 250,
       'num_validation_samples': 1000,
       'layer_sizes': '784-1200-1200-10',
       'dataset': 'mnist',
       'epsilon': 0.3,
       'cost_type': 'MLE',
       'norm_constraint': 'L2',
       'num_power_iter': 1,
       'lamb': 1,
       'lr': 0.001
      }
train(arg)

2019-03-11 03:50:32,033 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 0
2019-03-11 03:50:32,035 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.154206 error_train : 0 nll_test : 0.265320 error_test : 283 accuracy:0.971700
2019-03-11 03:51:51,301 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 1
2019-03-11 03:51:51,303 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.070784 error_train : 0 nll_test : 0.179951 error_test : 257 accuracy:0.974300
2019-03-11 03:53:06,957 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 2
2019-03-11 03:53:06,959 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.039508 error_train : 0 nll_test : 0.144496 error_test : 245 accuracy:0.975500
2019-03-11 03:54:25,682 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 3
2019-03-11 03:54:25,683 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.027010 error_train : 0 nll_test : 0.124050 error_test : 246 accuracy:0.975400
2019-03-11 03:55:33,338 - <ipython-input-6-aee519e8df5c>

2019-03-11 04:23:39,605 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 35
2019-03-11 04:23:39,607 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.000174 error_train : 0 nll_test : 0.082221 error_test : 222 accuracy:0.977800
2019-03-11 04:24:29,983 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 36
2019-03-11 04:24:29,985 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.000154 error_train : 0 nll_test : 0.084303 error_test : 234 accuracy:0.976600
2019-03-11 04:25:19,331 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 37
2019-03-11 04:25:19,332 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.000152 error_train : 0 nll_test : 0.084445 error_test : 230 accuracy:0.977000
2019-03-11 04:26:10,681 - <ipython-input-6-aee519e8df5c>[line:97]: [Epoch] 38
2019-03-11 04:26:10,683 - <ipython-input-6-aee519e8df5c>[line:101]: nll_train: 0.000130 error_train : 0 nll_test : 0.084206 error_test : 234 accuracy:0.976600
2019-03-11 04:27:01,937 - <ipython-input-6-aee519e8d

KeyboardInterrupt: 

In [None]:

arg = {'seed': 1,
       'size': 50000,
       'vis': False,
       'epochs': 100,
       'iterations': 400,
       'lr_decay': 0.95,
       'batch_size': 100,
       'ul_batch_size': 250,
       'num_validation_samples': 1000,
       'layer_sizes': '784-1200-1200-10',
       'dataset': 'mnist',
       'epsilon': 0.3,
       'cost_type': 'MLE',
       'norm_constraint': 'L2',
       'num_power_iter': 1,
       'lamb': 1,
       'lr': 0.001
      }
train(arg)