Permalink
Fetching contributors…
Cannot retrieve contributors at this time
141 lines (114 sloc) 6.03 KB
# pylearn2 tutorial example: cifar_grbm_smd.yaml by Ian Goodfellow
#
# Read the README file before reading this file
#
# This is an example of yaml file, which is the main way that an experimenter
# interacts with pylearn2.
#
# A yaml file is very similar to a python dictionary, with a bit of extra
# syntax.
# The !obj tag allows us to create a specific class of object. The text after
# the : indicates what class should be loaded. This is followed by a pair of
# braces containing the arguments to that class's __init__ method.
#
# Here, we allocate a Train object, which represents the main loop of the
# training script. The train script will run this loop repeatedly. Each time
# through the loop, the model is trained on data from a training dataset, then
# saved to file.
!obj:pylearn2.train.Train {
# The !pkl tag is used to create an object from a pkl file. Here we retrieve
# the dataset made by make_dataset.py and use it as our training dataset.
dataset: !pkl: "cifar10_preprocessed_train.pkl",
# Next we make the model to be trained. It is a Binary Gaussian RBM
model: !obj:pylearn2.models.rbm.GaussianBinaryRBM {
# The RBM needs 192 visible units (its inputs are 8x8 patches with 3
# color channels)
nvis : 192,
# We'll use 400 hidden units for this RBM. That's a small number but we
# want this example script to train quickly.
nhid : 400,
# The elements of the weight matrices of the RBM will be drawn
# independently from U(-0.05, 0.05)
irange : 0.05,
# There are many ways to parameterize a GRBM. Here we use a
# parameterization that makes the correspondence to denoising
# autoencoders more clear.
energy_function_class : !obj:pylearn2.energy_functions.rbm_energy.grbm_type_1 {},
# Some learning algorithms are capable of estimating the standard
# deviation of the visible units of a GRBM successfully, others are not
# and just fix the standard deviation to 1. We're going to show off
# and learn the standard deviation.
learn_sigma : True,
# Learning works better if we provide a smart initialization for the
# parameters. Here we start sigma at .4 , which is about the same
# standard deviation as the training data. We start the biases on the
# hidden units at -2, which will make them have fairly sparse
# activations.
init_sigma : .4,
init_bias_hid : -2.,
# Some GRBM training algorithms can't handle the visible units being
# noisy and just use their mean for all computations. We will show off
# and not use that hack here.
mean_vis : False,
# One hack we will make is we will scale back the gradient steps on the
# sigma parameter. This way we don't need to worry about sigma getting
# too small prematurely (if it gets too small too fast the learning
# signal gets weak).
sigma_lr_scale : 1e-3
},
# Next we need to specify the training algorithm that will be used to train
# the model. Here we use stochastic gradient descent.
algorithm: !obj:pylearn2.training_algorithms.sgd.SGD {
# The learning rate determines how big of steps the learning algorithm
# takes. Here we use fairly big steps initially because we have a
# learning rate adjustment scheme that will scale them down if
# necessary.
learning_rate : 1e-1,
# Each gradient step will be based on this many examples
batch_size : 5,
# We'll monitor our progress by looking at the first 20 batches of the
# training dataset. This is an estimate of the training error. To be
# really exhaustive, we could use the entire training set instead,
# or to avoid overfitting, we could use held out data instead.
monitoring_batches : 20,
monitoring_dataset : !pkl: "cifar10_preprocessed_train.pkl",
# Here we specify the objective function that stochastic gradient
# descent should minimize. In this case we use denoising score
# matching, which makes this RBM behave as a denoising autoencoder.
# See
# Pascal Vincent. "A Connection Between Score Matching and Denoising
# Auutoencoders." Neural Computation, 2011
# for details.
cost : !obj:pylearn2.costs.ebm_estimation.SMD {
# Denoising score matching uses a corruption process to transform
# the raw data. Here we use additive gaussian noise.
corruptor : !obj:pylearn2.corruption.GaussianCorruptor {
stdev : 0.4
},
},
# We'll use the monitoring dataset to figure out when to stop training.
#
# In this case, we stop if there is less than a 1% decrease in the
# training error in the last epoch. You'll notice that the learned
# features are a bit noisy. If you'd like nice smooth features you can
# make this criterion stricter so that the model will train for longer.
# (setting N to 10 should make the weights prettier, but will make it
# run a lot longer)
termination_criterion : !obj:pylearn2.termination_criteria.MonitorBased {
prop_decrease : 0.01,
N : 1,
},
# Let's throw a learning rate adjuster into the training algorithm.
# To do this we'll use an "extension," which is basically an event
# handler that can be registered with the Train object.
# This particular one is triggered on each epoch.
# It will shrink the learning rate if the objective goes up and increase
# the learning rate if the objective decreases too slowly. This makes
# our learning rate hyperparameter less important to get right.
# This is not a very mathematically principled approach, but it works
# well in practice.
},
extensions : [!obj:pylearn2.training_algorithms.sgd.MonitorBasedLRAdjuster {}],
#Finally, request that the model be saved after each epoch
save_freq : 1
}