diff --git a/examples/cifar10/cifar10.jl b/examples/cifar10/cifar10.jl index 2ef638f..a4b3f95 100644 --- a/examples/cifar10/cifar10.jl +++ b/examples/cifar10/cifar10.jl @@ -47,16 +47,15 @@ lr_policy = LRPolicy.Staged( (5000, LRPolicy.Fixed(0.00001)), ) solver_params = SolverParameters(max_iter=70000, - regu_coef=0.004, mom_policy=MomPolicy.Fixed(0.9), lr_policy=lr_policy) + regu_coef=0.004, mom_policy=MomPolicy.Fixed(0.9), lr_policy=lr_policy, + load_from="snapshots") solver = SGD(solver_params) # report training progress every 200 iterations add_coffee_break(solver, TrainingSummary(), every_n_iter=200) # save snapshots every 5000 iterations -add_coffee_break(solver, - Snapshot("snapshots", auto_load=true), - every_n_iter=5000) +add_coffee_break(solver, Snapshot("snapshots"), every_n_iter=5000) # show performance on test data every 1000 iterations test_net = Net("CIFAR10-test", backend, [data_tt_layer, common_layers..., acc_layer]) diff --git a/examples/cifar10/convert.jl b/examples/cifar10/convert.jl index 018b42e..fa6171d 100644 --- a/examples/cifar10/convert.jl +++ b/examples/cifar10/convert.jl @@ -17,7 +17,7 @@ for (key, sources) in datasets dset_data = d_create(h5, "data", datatype(Float32), dataspace(width, height, channels, batch_size * length(sources))) dset_label = d_create(h5, "label", datatype(Float32), - dataspace(1,1,1, batch_size * length(sources))) + dataspace(1, batch_size * length(sources))) for n = 1:length(sources) open("cifar-10-batches-bin/$(sources[n])") do f @@ -47,7 +47,7 @@ for (key, sources) in datasets index = (n-1)*batch_size+1:n*batch_size dset_data[:,:,:,index] = img - dset_label[:,:,:,index] = label + dset_label[:,index] = label end end diff --git a/examples/mnist/mnist_dropout_fc.jl b/examples/mnist/mnist_dropout_fc.jl index 2a55975..4b66407 100644 --- a/examples/mnist/mnist_dropout_fc.jl +++ b/examples/mnist/mnist_dropout_fc.jl @@ -68,25 +68,24 @@ drop_layers = [drop_input, drop_fc1, drop_fc2] # put training net together, note that the correct ordering will automatically be established by the constructor net = Net("MNIST-train", backend, [data_layer, common_layers..., drop_layers..., loss_layer]) +base_dir = "snapshots_dropout_fc" # we let the learning rate decrease by 0.998 in each epoch (=600 batches of size 100) # and let the momentum increase linearly from 0.5 to 0.9 over 500 epochs # which is equivalent to an increase step of 0.0008 # training is done for 2000 epochs params = SolverParameters(max_iter=600*2000, regu_coef=0.0, mom_policy=MomPolicy.Linear(0.5, 0.0008, 600, 0.9), - lr_policy=LRPolicy.Step(0.1, 0.998, 600)) + lr_policy=LRPolicy.Step(0.1, 0.998, 600), + load_from=base_dir) solver = SGD(params) -base_dir = "snapshots_dropout_fc" setup_coffee_lounge(solver, save_into="$base_dir/statistics.jld", every_n_iter=5000) # report training progress every 100 iterations add_coffee_break(solver, TrainingSummary(), every_n_iter=100) # save snapshots every 5000 iterations -add_coffee_break(solver, - Snapshot(base_dir, auto_load=true), - every_n_iter=5000) +add_coffee_break(solver, Snapshot(base_dir), every_n_iter=5000) # show performance on test data every 600 iterations (one epoch) data_layer_test = HDF5DataLayer(name="test-data", source="data/test.txt", batch_size=100)