update latest numbers

tensorpack · Apr 21, 2016 · b5a238a · b5a238a
1 parent 9380b5f
commit b5a238a
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 7 deletions.
diff --git a/examples/ResNet/README.md b/examples/ResNet/README.md
@@ -2,5 +2,6 @@
 ## ResNet
 
 Implements the paper "Deep Residual Learning for Image Recognition", [http://arxiv.org/abs/1512.03385](http://arxiv.org/abs/1512.03385)
+with the variants proposed in "Identity Mappings in Deep Residual Networks", [https://arxiv.org/abs/1603.05027](https://arxiv.org/abs/1603.05027).
 
 ![cifar10](https://github.com/ppwwyyxx/tensorpack/raw/master/examples/ResNet/cifar10-resnet.png)
diff --git a/examples/ResNet/cifar10-resnet.py b/examples/ResNet/cifar10-resnet.py
@@ -24,10 +24,10 @@
 Identity Mappings in Deep Residual Networks, arxiv:1603.05027
 
 I can reproduce the results for
-n=5, about 7.2% val error after 93k step with 2 TitanX (6.8it/s)
-n=18, about 6.05% val error after 62k step with 2 TitanX (about 10hr)
-n=30: a 182-layer network, about 5.5% val error after 51k step with 2 GPUs
-This model uses the whole training set instead of a 95:5 train-val split.
+n=5, about 7.1% val error after 67k step with 2 TitanX (6.1it/s)
+n=18, about 6.0% val error after 62k step with 2 TitanX (about 10hr)
+n=30: a 182-layer network, about 5.6% val error after 51k step with 2 GPUs
+This model uses the whole training set instead of a train-val split.
 """
 
 BATCH_SIZE = 128
@@ -168,7 +168,7 @@ def get_config():
                                       [(1, 0.1), (82, 0.01), (123, 0.001), (300, 0.0002)])
         ]),
         session_config=sess_config,
-        model=Model(n=5),
+        model=Model(n=18),
         step_per_epoch=step_per_epoch,
         max_epoch=500,
     )

diff --git a/tensorpack/models/batch_norm.py b/tensorpack/models/batch_norm.py
@@ -12,10 +12,10 @@
 
 # http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
 # TF batch_norm only works for 4D tensor right now: #804
-# decay: being too close to 1 leads to slow start-up, but ends up better
+# decay: being too close to 1 leads to slow start-up. torch use 0.9.
 # eps: torch: 1e-5. Lasagne: 1e-4
 @layer_register(log_shape=False)
-def BatchNorm(x, use_local_stat=True, decay=0.999, epsilon=1e-5):
+def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5):
     """
     Batch normalization layer as described in: