In [1]:
import os
import sys
import time
import theano
import lasagne
import pickle
import numpy as np

import theano.tensor as T

from utils import iterate_minibatches

Using gpu device 0: GRID K520 (CNMeM is disabled)


Couldn't import dot_parser, loading of dot files will not be possible.


In [2]:
def build_custom_cnn(input_var=None, widths=None, drop_input=.2,
                     drop_hidden=.5):
    # By default, this creates the same network as `build_mlp`, but it can be
    # customized with respect to the number and size of hidden layers. This
    # mostly showcases how creating a network in Python code can be a lot more
    # flexible than a configuration file. Note that to make the code easier,
    # all the layers are just called `network` -- there is no need to give them
    # different names if all we return is the last one we created anyway; we
    # just used different names above for clarity.

    widths = widths if widths is not None else [100]

    # Input layer and dropout (with shortcut `dropout` for `DropoutLayer`):
    network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
                                        input_var=input_var)
    if drop_input:
        network = lasagne.layers.dropout(network, p=drop_input)
    # Hidden layers and dropout:
    nonlin = lasagne.nonlinearities.rectify

    
    # Convolutional layer with 32 kernels of size 5x5. Strided and padded
    # convolutions are supported as well; see the docstring.
    network = lasagne.layers.Conv2DLayer(
            network, num_filters=64, filter_size=(5, 5),
            stride=1, pad=2,
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.GlorotUniform())
    # Expert note: Lasagne provides alternative convolutional layers that
    # override Theano's choice of which implementation to use; for details
    # please see http://lasagne.readthedocs.org/en/latest/user/tutorial.html.
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))

    # Another convolution with 32 5x5 kernels, and another 2x2 pooling:
    network = lasagne.layers.Conv2DLayer(
            network, num_filters=128, filter_size=(5, 5),
            stride=1, pad=2,
            nonlinearity=lasagne.nonlinearities.rectify)
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))

    
    for width in widths:
        network = lasagne.layers.DenseLayer(
                network, width, nonlinearity=nonlin)
        if drop_hidden:
            network = lasagne.layers.dropout(network, p=drop_hidden)
    # Output layer:
    softmax = lasagne.nonlinearities.softmax
    network = lasagne.layers.DenseLayer(network, 10, nonlinearity=softmax)
    return network

In [52]:
def generate_train_acc(input_X,
                       target_y,
                       widths,
                       drop_hidden=0.5,
                       learning_rate=1e-4,
                       model_name=None,
                       fixed_cnn=True,
                       reset_trainable=True):

    dense_output = build_custom_cnn(input_X, widths=widths, drop_hidden=drop_hidden)

    if model_name is not None and os.path.isfile(os.path.join('models', model_name + '.npz')):
        with np.load(os.path.join('models', model_name + '.npz')) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            lasagne.layers.set_all_param_values(dense_output, param_values)
    if fixed_cnn:
        for layer in lasagne.layers.get_all_layers(dense_output):
            if isinstance(layer, lasagne.layers.conv.Conv2DLayer):
                [layer.add_param(param,
                                 shape=param.get_value(borrow=True).shape,
                                 name=param.name,
                                 trainable=False,
                                 regularizable=False) for param in layer.get_params()]

    if reset_trainable:
        for layer in lasagne.layers.get_all_layers(dense_output):
            [param.set_value(lasagne.init.GlorotUniform().sample(param.get_value(borrow=True).shape)) \
             for param in layer.get_params(trainable=True) if len(param.get_value(borrow=True).shape) > 1]
            [param.set_value(lasagne.init.Constant(0.).sample(param.get_value(borrow=True).shape)) \
             for param in layer.get_params(trainable=True) if len(param.get_value(borrow=True).shape) == 1]
    #предсказание нейронки (theano-преобразование)
    y_predicted = lasagne.layers.get_output(dense_output)

    #все веса нейронки (shared-переменные)
    all_weights = lasagne.layers.get_all_params(dense_output, trainable=True)
    print(all_weights, lasagne.layers.get_all_params(dense_output))

    #функция ошибки - средняя кроссэнтропия
    loss = lasagne.objectives.categorical_crossentropy(y_predicted,target_y).mean()
    #loss += lasagne.regularization.l2(0.001)
    accuracy = lasagne.objectives.categorical_accuracy(y_predicted,target_y).mean()

    #сразу посчитать словарь обновлённых значений с шагом по градиенту, как раньше
    updates = lasagne.updates.adam(loss, all_weights, learning_rate=learning_rate)

    #функция, которая обучает сеть на 1 шаг и возвращащет значение функции потерь и точности
    train_fun = theano.function([input_X,target_y],[loss,accuracy],updates=updates)
    accuracy_fun = theano.function([input_X,target_y],accuracy)
    return dense_output, train_fun, accuracy_fun

In [58]:
def run_from_model(X_train,y_train,X_val,y_val,X_test,y_test, **kwargs):
    batch_size = kwargs.get('batch_size', 200)
    widths = kwargs.get('widths', [1024, 1024])
    num_epochs = kwargs.get('num_epochs', 4)
    model_name_from = kwargs.get('model_name_from', 'default_dense_model')
    model_name = kwargs.get('model_name', 'default_changed_dense_model')
    snapshot_frequency = kwargs.get('snap_freq', 5)
    print_frequency = kwargs.get('print_freq', 10)
    learning_rate = kwargs.get('learning_rate', 1e-4)
    restart = kwargs.get('restart', True)
    drop_hidden = kwargs.get('drop_hidden', 0.5)
    fixed_cnn = kwargs.get('fixed_cnn', False)
    reset_trainable = kwargs.get('reset_trainable', True)
    network, train, acc = generate_train_acc(T.tensor4("X"),
                                             T.vector("target Y integer", dtype='int32'),
                                             widths,
                                             drop_hidden,
                                             learning_rate,
                                             model_name_from,
                                             fixed_cnn,
                                             reset_trainable)
    if os.path.isfile(os.path.join('models', model_name + '.npz')) and not restart:
        result = pickle.load(open(os.path.join('models', model_name + '.dict'), 'r'))
        start_epoch = len(result['train_err'])
    else:
        start_epoch = 0
        result = {}
        result['train_err'] = []
        result['train_acc'] = []
        result['val_acc'] = []
        result['epoch_times'] = []
    
    # Just profile if you need
    #pr = cProfile.Profile()
    #pr.enable()
    max_val_acc = 0.0
    for epoch in range(start_epoch, num_epochs + start_epoch):
        train_err = 0
        train_acc = 0
        train_batches = 0
        start_time = time.time()

        for batch in iterate_minibatches(X_train, y_train,batch_size):
            inputs, targets = batch
            train_err_batch, train_acc_batch= train(inputs, targets)
            train_err += train_err_batch
            #train_acc.append(train_acc_batch)
            train_acc += train_acc_batch
            train_batches += 1
        #print(train_acc)
        #train_acc = np.sum(train_acc)
        # And a full pass over the validation data:
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(X_val, y_val, batch_size):
            inputs, targets = batch
            val_acc += acc(inputs, targets)
            val_batches += 1

        # Then we print the results for this epoch:
        if epoch % print_frequency == 0:
            print("for dense")
            print("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, start_epoch + num_epochs, time.time() - start_time))
            print("  training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches))
            print("  train accuracy:\t\t{:.2f} %".format(
                train_acc / train_batches * 100))
            print("  validation accuracy:\t\t{:.2f} %".format(
                val_acc / val_batches * 100))
            sys.stdout.flush()
        result["train_err"].append(train_err / train_batches)
        result["train_acc"].append(train_acc / train_batches * 100)
        result["val_acc"].append(val_acc / val_batches * 100)
        if result["val_acc"][-1] > max_val_acc:
            np.savez(os.path.join('models', model_name + '_best.npz'), *lasagne.layers.get_all_param_values(network))
            max_val_acc = result["val_acc"][-1]
        result['epoch_times'].append(time.time() - start_time)
        
        if epoch % snapshot_frequency == 0 or epoch + 1 == num_epochs + start_epoch:
            np.savez(os.path.join('models', model_name + '.npz'), *lasagne.layers.get_all_param_values(network))
            with open(os.path.join('models', model_name + '.dict'), 'wb') as pickle_file:
                pickle.dump(result, pickle_file)
            
            
    # Just profile if you need
    #pr.disable()
    #pr.print_stats(sort='cumtime')

In [59]:
from mnist.mnist import load_dataset
X_train,y_train,X_val,y_val,X_test,y_test = load_dataset()
print(X_train.shape,y_train.shape)

((50000, 1, 28, 28), (50000,))


In [None]:
params = {
    'num_epochs': 300,
    'learning_rate': 1e-5,
    'snap_freq': 1,
    'print_freq': 5,
    'model_name_from': 'adam_1e-4_1024_dense_model_best',
    'model_name': 'adam_fixed_cnn_1e-4_1024_dense_model',
    'drop_hidden': 0.85,
    'fixed_cnn': True,
    'reset_trainable': True,
    'restart': True
}

run_from_model(X_train,y_train,X_val,y_val,X_test,y_test, **params)

([W, b, W, b, W, b], [W, b, W, b, W, b, W, b, W, b])
for dense
Epoch 1 of 300 took 12.347s
  training loss (in-iteration):		2.263318
  train accuracy:		15.20 %
  validation accuracy:		22.08 %
for dense
Epoch 6 of 300 took 12.312s
  training loss (in-iteration):		0.839576
  train accuracy:		80.31 %
  validation accuracy:		84.14 %
for dense
Epoch 11 of 300 took 12.318s
  training loss (in-iteration):		0.318835
  train accuracy:		92.18 %
  validation accuracy:		93.38 %
for dense
Epoch 16 of 300 took 12.315s
  training loss (in-iteration):		0.191302
  train accuracy:		95.13 %
  validation accuracy:		95.57 %
for dense
Epoch 21 of 300 took 12.314s
  training loss (in-iteration):		0.135469
  train accuracy:		96.44 %
  validation accuracy:		96.88 %
for dense
Epoch 26 of 300 took 12.317s
  training loss (in-iteration):		0.104594
  train accuracy:		97.23 %
  validation accuracy:		97.21 %
for dense
Epoch 31 of 300 took 12.311s
  training loss (in-iteration):		0.086367
  train accuracy:		97.71 %
 

In [None]:
ls models

In [39]:
def check_model(X_train,y_train,X_val,y_val,X_test,y_test, **kwargs):
    batch_size = kwargs.get('batch_size', 200)
    widths = kwargs.get('widths', [1024, 1024])
    num_epochs = kwargs.get('num_epochs', 4)
    model_name = kwargs.get('model_name', 'default_dense_model')
    snapshot_frequency = kwargs.get('snap_freq', 5)
    print_frequency = kwargs.get('print_freq', 10)
    learning_rate = kwargs.get('learning_rate', 1e-4)
    restart = kwargs.get('restart', True)
    drop_hidden = kwargs.get('drop_hidden', 0.5)
    fixed_cnn = kwargs.get('fixed_cnn', False)
    reset_trainable = kwargs.get('reset_trainable', True)
    network, train, acc = generate_train_acc(T.tensor4("X"),
                                             T.vector("target Y integer", dtype='int32'),
                                             widths,
                                             drop_hidden,
                                             learning_rate,
                                             model_name,
                                             fixed_cnn=False,
                                             reset_trainable=False)
    start_epoch = 0
    result = {}
    result['train_err'] = []
    result['train_acc'] = []
    result['val_acc'] = []
    result['epoch_times'] = []
    
    # Just profile if you need
    #pr = cProfile.Profile()
    #pr.enable()
    max_val_acc = 0.0
    for epoch in range(start_epoch, num_epochs + start_epoch):
        train_err = 0
        train_acc = 0
        train_batches = 0
        start_time = time.time()
        #for batch in iterate_minibatches(X_train, y_train,batch_size):
        #    inputs, targets = batch
        #    train_err_batch, train_acc_batch= train(inputs, targets)
        #    train_err += train_err_batch
        #    train_acc += train_acc_batch
        #    train_batches += 1

        # And a full pass over the validation data:
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(X_val, y_val, batch_size):
            inputs, targets = batch
            val_acc += acc(inputs, targets)
            val_batches += 1

        # Then we print the results for this epoch:
        if epoch % print_frequency == 0 or epoch > 0:
            print("for dense")
            print("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, start_epoch + num_epochs, time.time() - start_time))
            #print("  training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches))
            #print("  train accuracy:\t\t{:.2f} %".format(
            #    train_acc / train_batches * 100))
            print("  validation accuracy:\t\t{:.2f} %".format(
                val_acc / val_batches * 100))
            sys.stdout.flush()
        #result["train_err"].append(train_err / train_batches)
        #result["train_acc"].append(train_acc / train_batches * 100)
        #result["val_acc"].append(val_acc / val_batches * 100)
        #if result["val_acc"][-1] > max_val_acc:
        #    np.savez(os.path.join('models', model_name + '_best.npz'), *lasagne.layers.get_all_param_values(network))
        #    max_val_acc = result["val_acc"][-1]
        #result['epoch_times'].append(time.time() - start_time)
        
        #if epoch % snapshot_frequency == 0 or epoch + 1 == num_epochs + start_epoch:
        #    np.savez(os.path.join('models', model_name + '.npz'), *lasagne.layers.get_all_param_values(network))
        #    with open(os.path.join('models', model_name + '.dict'), 'wb') as pickle_file:
        #        pickle.dump(result, pickle_file)
        print('All')   
            
    # Just profile if you need
    #pr.disable()
    #pr.print_stats(sort='cumtime')

In [40]:
params = {
    'num_epochs': 300,
    'learning_rate': 1e-7,
    'snap_freq': 1,
    'print_freq': 5,
    'model_name': 'adam_1e-4_1024_dense_model_best',
    'drop_hidden': 0.8,
    'fixed_cnn': True,
    'reset_trainable': False,
    'restart': True
}

check_model(X_train,y_train,X_val,y_val,X_test,y_test, **params)

([W, b, W, b, W, b, W, b, W, b], [W, b, W, b, W, b, W, b, W, b])
for dense
Epoch 1 of 300 took 1.575s
  validation accuracy:		99.11 %
All
for dense
Epoch 2 of 300 took 1.568s
  validation accuracy:		99.06 %
All
for dense
Epoch 3 of 300 took 1.569s
  validation accuracy:		99.10 %
All
for dense
Epoch 4 of 300 took 1.568s
  validation accuracy:		99.15 %
All
for dense
Epoch 5 of 300 took 1.570s
  validation accuracy:		99.13 %
All
for dense
Epoch 6 of 300 took 1.570s
  validation accuracy:		99.10 %
All
for dense
Epoch 7 of 300 took 1.570s
  validation accuracy:		99.08 %
All
for dense
Epoch 8 of 300 took 1.572s
  validation accuracy:		99.07 %
All
for dense
Epoch 9 of 300 took 1.569s
  validation accuracy:		99.15 %
All
for dense
Epoch 10 of 300 took 1.568s
  validation accuracy:		99.16 %
All
for dense
Epoch 11 of 300 took 1.572s
  validation accuracy:		99.09 %
All
for dense
Epoch 12 of 300 took 1.573s
  validation accuracy:		99.19 %
All
for dense
Epoch 13 of 300 took 1.572s
  validation accur

KeyboardInterrupt: 

In [51]:
def run_kron_from_model(X_train,y_train,X_val,y_val,X_test,y_test, **kwargs):
    batch_size = kwargs.get('batch_size', 200)
    widths = kwargs.get('widths', [1024, 1024])
    num_epochs = kwargs.get('num_epochs', 4)
    model_name_from = kwargs.get('model_name_from', 'default_dense_model')
    model_name = kwargs.get('model_name', 'default_changed_dense_model')
    ranks = kwargs.get('ranks', arange(10))
    snapshot_frequency = kwargs.get('snap_freq', 5)
    print_frequency = kwargs.get('print_freq', 10)
    learning_rate = kwargs.get('learning_rate', 1e-4)
    restart = kwargs.get('restart', True)
    drop_hidden = kwargs.get('drop_hidden', 0.5)
    fixed_cnn = kwargs.get('fixed_cnn', False)
    reset_trainable = kwargs.get('reset_trainable', True)
    network, train, acc = generate_train_acc(T.tensor4("X"),
                                             T.vector("target Y integer", dtype='int32'),
                                             widths,
                                             drop_hidden,
                                             learning_rate,
                                             model_name_from,
                                             fixed_cnn,
                                             reset_trainable)
    if os.path.isfile(os.path.join('models', model_name + '.npz')) and not restart:
        result = pickle.load(open(os.path.join('models', model_name + '.dict'), 'r'))
        start_epoch = len(result['train_err'])
    else:
        start_epoch = 0
        result = {}
        result['train_err'] = []
        result['train_acc'] = []
        result['val_acc'] = []
        result['epoch_times'] = []
    
    # Just profile if you need
    #pr = cProfile.Profile()
    #pr.enable()
    max_val_acc = 0.0
    for epoch in range(start_epoch, num_epochs + start_epoch):
        train_err = 0
        train_acc = 0
        train_batches = 0
        start_time = time.time()

        for batch in iterate_minibatches(X_train, y_train,batch_size):
            inputs, targets = batch
            train_err_batch, train_acc_batch= train(inputs, targets)
            train_err += train_err_batch
            #train_acc.append(train_acc_batch)
            train_acc += train_acc_batch
            train_batches += 1
        #print(train_acc)
        #train_acc = np.sum(train_acc)
        # And a full pass over the validation data:
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(X_val, y_val, batch_size):
            inputs, targets = batch
            val_acc += acc(inputs, targets)
            val_batches += 1

        # Then we print the results for this epoch:
        if epoch % print_frequency == 0:
            print("for dense")
            print("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, start_epoch + num_epochs, time.time() - start_time))
            print("  training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches))
            print("  train accuracy:\t\t{:.2f} %".format(
                train_acc / train_batches * 100))
            print("  validation accuracy:\t\t{:.2f} %".format(
                val_acc / val_batches * 100))
            sys.stdout.flush()
        result["train_err"].append(train_err / train_batches)
        result["train_acc"].append(train_acc / train_batches * 100)
        result["val_acc"].append(val_acc / val_batches * 100)
        if result["val_acc"][-1] > max_val_acc:
            np.savez(os.path.join('models', model_name + '_best.npz'), *lasagne.layers.get_all_param_values(network))
            max_val_acc = result["val_acc"][-1]
        result['epoch_times'].append(time.time() - start_time)
        
        if epoch % snapshot_frequency == 0 or epoch + 1 == num_epochs + start_epoch:
            np.savez(os.path.join('models', model_name + '.npz'), *lasagne.layers.get_all_param_values(network))
            with open(os.path.join('models', model_name + '.dict'), 'wb') as pickle_file:
                pickle.dump(result, pickle_file)
            
            
    # Just profile if you need
    #pr.disable()
    #pr.print_stats(sort='cumtime')

NameError: name 'array' is not defined