Skip to content

Commit

Permalink
[egs] rotation for image augmentation in CIFAR example (#1955)
Browse files Browse the repository at this point in the history
  • Loading branch information
YiwenShaoStephen authored and danpovey committed Nov 2, 2017
1 parent 3ea5340 commit 148c884
Show file tree
Hide file tree
Showing 3 changed files with 302 additions and 0 deletions.
142 changes: 142 additions & 0 deletions egs/cifar/v1/local/nnet3/run_resnet_1d.sh
@@ -0,0 +1,142 @@
#!/bin/bash

# 1d is as 1c but adding rotation in image augmentation.

# local/nnet3/compare.sh exp/resnet1c_cifar10 exp/resnet1d_cifar10
# System resnet1c_cifar10 resnet1d_cifar10
# final test accuracy: 0.9514 0.9537
# final train accuracy: 1 0.9966
# final test objf: -0.157244 -0.139607
# final train objf: -0.00751868 -0.0219607
# num-parameters: 1322730 1322730

# local/nnet3/compare.sh exp/resnet1c_cifar100 exp/resnet1d_cifar100
# System resnet1c_cifar100 resnet1d_cifar100
# final test accuracy: 0.7627 0.7687
# final train accuracy: 0.96 0.9276
# final test objf: -0.862205 -0.812203
# final train objf: -0.174973 -0.265734
# num-parameters: 1345860 1345860
# steps/info/nnet3_dir_info.pl exp/resnet1c_cifar10{,0}
# exp/resnet1d_cifar10: num-iters=133 nj=1..2 num-params=1.3M dim=96->10 combine=-0.04->-0.03 loglike:train/valid[87,132,final]=(-0.153,-0.044,-0.022/-0.25,-0.173,-0.140) accuracy:train/valid[87,132,final]=(0.946,0.9880,0.9966/0.921,0.946,0.954)
# exp/resnet1d_cifar100: num-iters=133 nj=1..2 num-params=1.3M dim=96->100 combine=-0.33->-0.29 loglike:train/valid[87,132,final]=(-0.81,-0.37,-0.27/-1.15,-0.95,-0.81) accuracy:train/valid[87,132,final]=(0.760,0.897,0.928/0.68,0.737,0.769)

# Set -e here so that we catch if any executable fails immediately
set -euo pipefail



# training options
stage=0
train_stage=-10
dataset=cifar10
srand=0
reporting_email=
affix=1d


# End configuration section.
echo "$0 $@" # Print the command line for logging

. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh

if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi



dir=exp/resnet${affix}_${dataset}

egs=exp/${dataset}_egs2

if [ ! -d $egs ]; then
echo "$0: expected directory $egs to exist. Run the get_egs.sh commands in the"
echo " run.sh before this script."
exit 1
fi

# check that the expected files are in the egs directory.

for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
$egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
$egs/info/output_dim; do
if [ ! -e $f ]; then
echo "$0: expected file $f to exist."
exit 1;
fi
done


mkdir -p $dir/log


if [ $stage -le 1 ]; then
mkdir -p $dir
echo "$0: creating neural net configs using the xconfig parser";

num_targets=$(cat $egs/info/output_dim)

# Note: we hardcode in the CNN config that we are dealing with 32x3x color
# images.


nf1=48
nf2=96
nf3=256
nb3=128

a="num-minibatches-history=40.0"
common="$a required-time-offsets=0 height-offsets=-1,0,1"
res_opts="$a bypass-source=batchnorm"

mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=96 name=input
conv-layer name=conv1 $a height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1
res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts
res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts
conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2
res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts
res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts
conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3
res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3
output-layer name=output learning-rate-factor=0.1 dim=$num_targets
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi


if [ $stage -le 2 ]; then

steps/nnet3/train_raw_dnn.py --stage=$train_stage \
--cmd="$train_cmd" \
--image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --rotation-degree=30 --rotation-prob=0.5 --num-channels=3" \
--trainer.srand=$srand \
--trainer.max-param-change=2.0 \
--trainer.num-epochs=100 \
--egs.frames-per-eg=1 \
--trainer.optimization.num-jobs-initial=1 \
--trainer.optimization.num-jobs-final=2 \
--trainer.optimization.initial-effective-lrate=0.003 \
--trainer.optimization.final-effective-lrate=0.0003 \
--trainer.optimization.minibatch-size=256,128,64 \
--trainer.optimization.proportional-shrink=50.0 \
--trainer.shuffle-buffer-size=2000 \
--egs.dir="$egs" \
--use-gpu=true \
--reporting.email="$reporting_email" \
--dir=$dir || exit 1;
fi


exit 0;
142 changes: 142 additions & 0 deletions egs/cifar/v1/local/nnet3/run_resnet_1e.sh
@@ -0,0 +1,142 @@
#!/bin/bash

# 1e is as 1d but with more filters and epochs.

# local/nnet3/compare.sh exp/resnet1d_cifar10/ exp/resnet1e_cifar10/
# System resnet1d_cifar10 resnet1e_cifar10
# final test accuracy: 0.9537 0.9583
# final train accuracy: 0.9966 0.9994
# final test objf: -0.139607 -0.124945
# final train objf: -0.0219607 -0.00603407
# num-parameters: 1322730 3465194

# local/nnet3/compare.sh exp/resnet1d_cifar100 exp/resnet1e_cifar100
# System resnet1d_cifar100 resnet1e_cifar100
# final test accuracy: 0.7687 0.7914
# final train accuracy: 0.9276 0.9922
# final test objf: -0.812203 -0.786857
# final train objf: -0.265734 -0.0514912
# num-parameters: 1345860 3511364
# steps/info/nnet3_dir_info.pl exp/resnet1c_cifar10{,0}
# exp/resnet1e_cifar10: num-iters=186 nj=1..2 num-params=3.5M dim=96->10 combine=-0.01->-0.01 loglike:train/valid[123,185,final]=(-0.109,-0.026,-0.0060/-0.21,-0.167,-0.125) accuracy:train/valid[123,185,final]=(0.963,0.9936,0.9994/0.930,0.949,0.958)
# exp/resnet1e_cifar100/: num-iters=186 nj=1..2 num-params=3.5M dim=96->100 combine=-0.09->-0.07 loglike:train/valid[123,185,final]=(-0.53,-0.109,-0.051/-1.06,-0.93,-0.79) accuracy:train/valid[123,185,final]=(0.844,0.9730,0.9922/0.713,0.760,0.791)

# Set -e here so that we catch if any executable fails immediately
set -euo pipefail



# training options
stage=0
train_stage=-10
dataset=cifar10
srand=0
reporting_email=
affix=1e


# End configuration section.
echo "$0 $@" # Print the command line for logging

. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh

if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi



dir=exp/resnet${affix}_${dataset}

egs=exp/${dataset}_egs2

if [ ! -d $egs ]; then
echo "$0: expected directory $egs to exist. Run the get_egs.sh commands in the"
echo " run.sh before this script."
exit 1
fi

# check that the expected files are in the egs directory.

for f in $egs/egs.1.ark $egs/train_diagnostic.egs $egs/valid_diagnostic.egs $egs/combine.egs \
$egs/info/feat_dim $egs/info/left_context $egs/info/right_context \
$egs/info/output_dim; do
if [ ! -e $f ]; then
echo "$0: expected file $f to exist."
exit 1;
fi
done


mkdir -p $dir/log


if [ $stage -le 1 ]; then
mkdir -p $dir
echo "$0: creating neural net configs using the xconfig parser";

num_targets=$(cat $egs/info/output_dim)

# Note: we hardcode in the CNN config that we are dealing with 32x3x color
# images.


nf1=48
nf2=96
nf3=512
nb3=256

a="num-minibatches-history=40.0"
common="$a required-time-offsets=0 height-offsets=-1,0,1"
res_opts="$a bypass-source=batchnorm"

mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=96 name=input
conv-layer name=conv1 $a height-in=32 height-out=32 time-offsets=-1,0,1 required-time-offsets=0 height-offsets=-1,0,1 num-filters-out=$nf1
res-block name=res2 num-filters=$nf1 height=32 time-period=1 $res_opts
res-block name=res3 num-filters=$nf1 height=32 time-period=1 $res_opts
conv-layer name=conv4 height-in=32 height-out=16 height-subsample-out=2 time-offsets=-1,0,1 $common num-filters-out=$nf2
res-block name=res5 num-filters=$nf2 height=16 time-period=2 $res_opts
res-block name=res6 num-filters=$nf2 height=16 time-period=2 $res_opts
conv-layer name=conv7 height-in=16 height-out=8 height-subsample-out=2 time-offsets=-2,0,2 $common num-filters-out=$nf3
res-block name=res8 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
res-block name=res9 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
res-block name=res10 num-filters=$nf3 num-bottleneck-filters=$nb3 height=8 time-period=4 $res_opts
channel-average-layer name=channel-average input=Append(2,6,10,14,18,22,24,28) dim=$nf3
output-layer name=output learning-rate-factor=0.1 dim=$num_targets
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi


if [ $stage -le 2 ]; then

steps/nnet3/train_raw_dnn.py --stage=$train_stage \
--cmd="$train_cmd" \
--image.augmentation-opts="--horizontal-flip-prob=0.5 --horizontal-shift=0.1 --vertical-shift=0.1 --rotation-degree=30 --rotation-prob=0.5 --num-channels=3" \
--trainer.srand=$srand \
--trainer.max-param-change=2.0 \
--trainer.num-epochs=140 \
--egs.frames-per-eg=1 \
--trainer.optimization.num-jobs-initial=1 \
--trainer.optimization.num-jobs-final=2 \
--trainer.optimization.initial-effective-lrate=0.003 \
--trainer.optimization.final-effective-lrate=0.0003 \
--trainer.optimization.minibatch-size=256,128,64 \
--trainer.optimization.proportional-shrink=50.0 \
--trainer.shuffle-buffer-size=2000 \
--egs.dir="$egs" \
--use-gpu=true \
--reporting.email="$reporting_email" \
--dir=$dir || exit 1;
fi


exit 0;
18 changes: 18 additions & 0 deletions src/nnet3bin/nnet3-egs-augment-image.cc
Expand Up @@ -35,13 +35,17 @@ struct ImageAugmentationConfig {
BaseFloat horizontal_flip_prob;
BaseFloat horizontal_shift;
BaseFloat vertical_shift;
BaseFloat rotation_degree;
BaseFloat rotation_prob;
std::string fill_mode_string;

ImageAugmentationConfig():
num_channels(1),
horizontal_flip_prob(0.0),
horizontal_shift(0.0),
vertical_shift(0.0),
rotation_degree(0.0),
rotation_prob(0.0),
fill_mode_string("nearest") { }


Expand All @@ -57,6 +61,10 @@ struct ImageAugmentationConfig {
po->Register("vertical-shift", &vertical_shift,
"Maximum allowed vertical shift as proportion of image "
"height. Padding is with closest pixel.");
po->Register("rotation-degree", &rotation_degree,
"Maximum allowed degree to rotate the image");
po->Register("rotation-prob", &rotation_prob,
"Probability of doing rotation");
po->Register("fill-mode", &fill_mode_string, "Mode for dealing with "
"points outside the image boundary when applying transformation. "
"Choices = {nearest, reflect}");
Expand All @@ -68,6 +76,8 @@ struct ImageAugmentationConfig {
horizontal_flip_prob <= 1);
KALDI_ASSERT(horizontal_shift >= 0 && horizontal_shift <= 1);
KALDI_ASSERT(vertical_shift >= 0 && vertical_shift <= 1);
KALDI_ASSERT(rotation_degree >=0 && rotation_degree <= 180);
KALDI_ASSERT(rotation_prob >=0 && rotation_prob <= 1);
KALDI_ASSERT(fill_mode_string == "nearest" || fill_mode_string == "reflect");
}

Expand Down Expand Up @@ -231,6 +241,14 @@ void PerturbImage(const ImageAugmentationConfig &config,
// [ cos(theta) -sin(theta) 0
// sin(theta) cos(theta) 0
// 0 0 1 ]
if (RandUniform() <= config.rotation_prob) {
BaseFloat theta = (2 * config.rotation_degree * RandUniform() -
config.rotation_degree) / 180.0 * M_PI;
rotation_mat(0, 0) = cos(theta);
rotation_mat(0, 1) = -sin(theta);
rotation_mat(1, 0) = sin(theta);
rotation_mat(1, 1) = cos(theta);
}

Matrix<BaseFloat> shear_mat(3, 3, kUndefined);
shear_mat.SetUnit();
Expand Down

0 comments on commit 148c884

Please sign in to comment.