Incorporated ottonemo's review suggestion.

Also: - Typos. - Removed not used stuff.
skorch-dev · Dec 8, 2017 · 95891a1 · 95891a1
1 parent b1402d4
commit 95891a1
Showing 1 changed file with 66 additions and 68 deletions.
diff --git a/notebooks/MNIST.ipynb b/notebooks/MNIST.ipynb
@@ -91,7 +91,7 @@
    "source": [
     "## Preprocessing Data\n",
     "Each image of the MNIST dataset is encoded in a 784 dimensional vector, representing a 28 x 28 pixel image. Each pixel has a value between 0 and 255, corresponding to the grey-value of a pixel.<br />\n",
-    "If the above ```featch_mldata``` is used to load MNIST, ```data``` and ```target``` are encoded as ```uint8```, which cannot be processed by Skorch. Skorch accepts only ```float32``` or ```int64``` as types. As a consequence ```data``` is transformed to ```float32``` and scaled to [0, 1]. ```target``` is transformed to ```int64```. "
+    "The above ```featch_mldata``` method to load MNIST returns ```data``` and ```target``` as ```uint8```, which cannot be processed by PyTorch. PyTorch accepts only ```float32``` or ```int64``` as types. As a consequence ```data``` is transformed to ```float32``` and ```target``` to ```int64```. "
    ]
   },
   {
@@ -104,6 +104,13 @@
     "y = mnist.target.astype('int64')"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As we will use ReLU as activation in combination with softmax over the output layer, we need to scale `X` down. An often use range is [0, 1]."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 6,
@@ -121,7 +128,7 @@
     {
      "data": {
       "text/plain": [
-       "(0.0, 0.13092543, 0.30844888, 1.0)"
+       "(0.0, 1.0)"
       ]
      },
      "execution_count": 7,
@@ -130,7 +137,14 @@
     }
    ],
    "source": [
-    "X.min(), X.mean(), X.std(), X.max()"
+    "X.min(), X.max()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note: data is not normalized."
    ]
   },
   {
@@ -176,7 +190,7 @@
    "metadata": {},
    "source": [
     "## Build Neural Network with Torch\n",
-    "Simple, fully connected neural network with one hidden layer. Input layer has 784 dimensions (28x28), hidden layer has 98 (= 784 / 8) neurons, output layer 10 neurons, representing digits 0 - 9."
+    "Simple, fully connected neural network with one hidden layer. Input layer has 784 dimensions (28x28), hidden layer has 98 (= 784 / 8) and output layer 10 neurons, representing digits 0 - 9."
    ]
   },
   {
@@ -249,20 +263,17 @@
     "            input_dim=mnist_dim,\n",
     "            hidden_dim=hidden_dim,\n",
     "            output_dim=output_dim,\n",
-    "            nonlin=F.relu,\n",
     "            dropout=0.5,\n",
     "    ):\n",
     "        super(ClassifierModule, self).__init__()\n",
-    "        self.nonlin = nonlin\n",
     "        self.dropout = nn.Dropout(dropout)\n",
     "\n",
     "        self.hidden = nn.Linear(input_dim, hidden_dim)\n",
     "        self.output = nn.Linear(hidden_dim, output_dim)\n",
     "\n",
     "    def forward(self, X, **kwargs):\n",
-    "        X = self.nonlin(self.hidden(X))\n",
+    "        X = F.relu(self.hidden(X))\n",
     "        X = self.dropout(X)\n",
-    "        #X = F.relu(self.hidden(X))\n",
     "        X = F.softmax(self.output(X))\n",
     "        return X"
    ]
@@ -308,41 +319,31 @@
      "text": [
       "  epoch    train_loss    valid_acc    valid_loss     dur\n",
       "-------  ------------  -----------  ------------  ------\n",
-      "      1        \u001b[36m0.8588\u001b[0m       \u001b[32m0.9036\u001b[0m        \u001b[35m0.3782\u001b[0m  1.3989\n",
-      "      2        \u001b[36m0.4356\u001b[0m       \u001b[32m0.9226\u001b[0m        \u001b[35m0.2868\u001b[0m  1.6909\n",
-      "      3        \u001b[36m0.3639\u001b[0m       \u001b[32m0.9328\u001b[0m        \u001b[35m0.2432\u001b[0m  1.6691\n",
-      "      4        \u001b[36m0.3271\u001b[0m       \u001b[32m0.9404\u001b[0m        \u001b[35m0.2174\u001b[0m  1.5587\n",
-      "      5        \u001b[36m0.2963\u001b[0m       \u001b[32m0.9465\u001b[0m        \u001b[35m0.1957\u001b[0m  1.6026\n",
-      "      6        \u001b[36m0.2745\u001b[0m       \u001b[32m0.9499\u001b[0m        \u001b[35m0.1798\u001b[0m  1.5944\n",
-      "      7        \u001b[36m0.2576\u001b[0m       \u001b[32m0.9510\u001b[0m        \u001b[35m0.1730\u001b[0m  1.5852\n",
-      "      8        \u001b[36m0.2460\u001b[0m       \u001b[32m0.9550\u001b[0m        \u001b[35m0.1579\u001b[0m  1.6465\n",
-      "      9        \u001b[36m0.2332\u001b[0m       \u001b[32m0.9560\u001b[0m        \u001b[35m0.1498\u001b[0m  1.3179\n",
-      "     10        \u001b[36m0.2254\u001b[0m       \u001b[32m0.9594\u001b[0m        \u001b[35m0.1429\u001b[0m  1.6584\n",
-      "     11        \u001b[36m0.2186\u001b[0m       \u001b[32m0.9606\u001b[0m        \u001b[35m0.1371\u001b[0m  1.9561\n",
-      "     12        \u001b[36m0.2100\u001b[0m       \u001b[32m0.9609\u001b[0m        \u001b[35m0.1322\u001b[0m  1.6223\n",
-      "     13        \u001b[36m0.2030\u001b[0m       \u001b[32m0.9619\u001b[0m        \u001b[35m0.1290\u001b[0m  1.4144\n",
-      "     14        \u001b[36m0.1924\u001b[0m       0.9619        \u001b[35m0.1254\u001b[0m  1.5964\n",
-      "     15        \u001b[36m0.1892\u001b[0m       \u001b[32m0.9628\u001b[0m        0.1260  1.6148\n",
-      "     16        \u001b[36m0.1874\u001b[0m       \u001b[32m0.9639\u001b[0m        \u001b[35m0.1187\u001b[0m  1.6000\n",
-      "     17        \u001b[36m0.1829\u001b[0m       \u001b[32m0.9649\u001b[0m        \u001b[35m0.1164\u001b[0m  1.5877\n",
-      "     18        \u001b[36m0.1785\u001b[0m       \u001b[32m0.9658\u001b[0m        \u001b[35m0.1158\u001b[0m  1.5540\n",
-      "     19        \u001b[36m0.1760\u001b[0m       \u001b[32m0.9670\u001b[0m        \u001b[35m0.1117\u001b[0m  1.5644\n",
-      "     20        \u001b[36m0.1719\u001b[0m       0.9669        \u001b[35m0.1105\u001b[0m  1.6020\n"
+      "      1        \u001b[36m0.8588\u001b[0m       \u001b[32m0.9036\u001b[0m        \u001b[35m0.3782\u001b[0m  1.5758\n",
+      "      2        \u001b[36m0.4356\u001b[0m       \u001b[32m0.9226\u001b[0m        \u001b[35m0.2868\u001b[0m  1.6797\n",
+      "      3        \u001b[36m0.3639\u001b[0m       \u001b[32m0.9328\u001b[0m        \u001b[35m0.2432\u001b[0m  1.5908\n",
+      "      4        \u001b[36m0.3271\u001b[0m       \u001b[32m0.9404\u001b[0m        \u001b[35m0.2174\u001b[0m  1.3834\n",
+      "      5        \u001b[36m0.2963\u001b[0m       \u001b[32m0.9465\u001b[0m        \u001b[35m0.1957\u001b[0m  1.5864\n",
+      "      6        \u001b[36m0.2745\u001b[0m       \u001b[32m0.9499\u001b[0m        \u001b[35m0.1798\u001b[0m  1.5951\n",
+      "      7        \u001b[36m0.2576\u001b[0m       \u001b[32m0.9510\u001b[0m        \u001b[35m0.1730\u001b[0m  1.6888\n",
+      "      8        \u001b[36m0.2460\u001b[0m       \u001b[32m0.9550\u001b[0m        \u001b[35m0.1579\u001b[0m  1.7371\n",
+      "      9        \u001b[36m0.2332\u001b[0m       \u001b[32m0.9560\u001b[0m        \u001b[35m0.1498\u001b[0m  1.7204\n",
+      "     10        \u001b[36m0.2254\u001b[0m       \u001b[32m0.9594\u001b[0m        \u001b[35m0.1429\u001b[0m  1.5211\n",
+      "     11        \u001b[36m0.2186\u001b[0m       \u001b[32m0.9606\u001b[0m        \u001b[35m0.1371\u001b[0m  1.5950\n",
+      "     12        \u001b[36m0.2100\u001b[0m       \u001b[32m0.9609\u001b[0m        \u001b[35m0.1322\u001b[0m  1.5359\n",
+      "     13        \u001b[36m0.2030\u001b[0m       \u001b[32m0.9619\u001b[0m        \u001b[35m0.1290\u001b[0m  1.5880\n",
+      "     14        \u001b[36m0.1924\u001b[0m       0.9619        \u001b[35m0.1254\u001b[0m  1.6800\n",
+      "     15        \u001b[36m0.1892\u001b[0m       \u001b[32m0.9628\u001b[0m        0.1260  1.7016\n",
+      "     16        \u001b[36m0.1874\u001b[0m       \u001b[32m0.9639\u001b[0m        \u001b[35m0.1187\u001b[0m  1.4930\n",
+      "     17        \u001b[36m0.1829\u001b[0m       \u001b[32m0.9649\u001b[0m        \u001b[35m0.1164\u001b[0m  1.6020\n",
+      "     18        \u001b[36m0.1785\u001b[0m       \u001b[32m0.9658\u001b[0m        \u001b[35m0.1158\u001b[0m  1.8048\n",
+      "     19        \u001b[36m0.1760\u001b[0m       \u001b[32m0.9670\u001b[0m        \u001b[35m0.1117\u001b[0m  1.5888\n",
+      "     20        \u001b[36m0.1719\u001b[0m       0.9669        \u001b[35m0.1105\u001b[0m  1.6014\n"
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<skorch.net.NeuralNetClassifier at 0x7f6239f7c278>"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
-    "net.fit(X_train, y_train)"
+    "net.fit(X_train, y_train);"
    ]
   },
   {
@@ -385,14 +386,21 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "An accuracy of nearly 97% for network with only one hidden layer is not too bad"
+    "An accuracy of nearly 97% for a network with only one hidden layer is not too bad"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Convolutional Network"
+    "# Convolutional Network\n",
+    "PyTorch expects a 4 dimensional tensor as input for its 2D convolution layer. The dimensions represent:\n",
+    "* Batch size\n",
+    "* Number of channel\n",
+    "* Height\n",
+    "* Width\n",
+    "\n",
+    "As initial batch size the number of examples needs to be provided. MNIST data has only one channel. As stated above, each MNIST vector represents a 28x28 pixel image. Hence, the resulting shape for PyTorch tensor needs to be (x, 1, 28, 28). "
    ]
   },
   {
@@ -471,7 +479,7 @@
     "    def forward(self, x):\n",
     "        x = F.relu(F.max_pool2d(self.conv1(x), 2))\n",
     "        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))\n",
-    "        x = x.view(-1, x.size(1) * x.size(2) * x.size(3)) # flatten over channel, width and height = 1600\n",
+    "        x = x.view(-1, x.size(1) * x.size(2) * x.size(3)) # flatten over channel, height and width = 1600\n",
     "        x = F.relu(self.fc1(x))\n",
     "        x = F.dropout(x, training=self.training)\n",
     "        x = self.fc2(x)\n",
@@ -505,36 +513,26 @@
      "text": [
       "  epoch    train_loss    valid_acc    valid_loss      dur\n",
       "-------  ------------  -----------  ------------  -------\n",
-      "      1        \u001b[36m0.4442\u001b[0m       \u001b[32m0.9747\u001b[0m        \u001b[35m0.0861\u001b[0m  20.1571\n",
-      "      2        \u001b[36m0.1471\u001b[0m       \u001b[32m0.9802\u001b[0m        \u001b[35m0.0631\u001b[0m  19.8584\n",
-      "      3        \u001b[36m0.1094\u001b[0m       \u001b[32m0.9845\u001b[0m        \u001b[35m0.0515\u001b[0m  20.5175\n",
-      "      4        \u001b[36m0.0931\u001b[0m       \u001b[32m0.9849\u001b[0m        \u001b[35m0.0482\u001b[0m  20.1864\n",
-      "      5        \u001b[36m0.0814\u001b[0m       \u001b[32m0.9866\u001b[0m        \u001b[35m0.0388\u001b[0m  19.7448\n",
-      "      6        \u001b[36m0.0727\u001b[0m       \u001b[32m0.9871\u001b[0m        \u001b[35m0.0365\u001b[0m  20.1205\n",
-      "      7        \u001b[36m0.0684\u001b[0m       \u001b[32m0.9882\u001b[0m        \u001b[35m0.0357\u001b[0m  20.2718\n",
-      "      8        \u001b[36m0.0645\u001b[0m       \u001b[32m0.9887\u001b[0m        \u001b[35m0.0331\u001b[0m  20.6907\n",
-      "      9        \u001b[36m0.0575\u001b[0m       0.9880        0.0344  20.1551\n",
-      "     10        \u001b[36m0.0566\u001b[0m       \u001b[32m0.9894\u001b[0m        \u001b[35m0.0315\u001b[0m  20.5150\n",
-      "     11        \u001b[36m0.0523\u001b[0m       0.9891        \u001b[35m0.0312\u001b[0m  20.4343\n",
-      "     12        0.0543       \u001b[32m0.9900\u001b[0m        \u001b[35m0.0305\u001b[0m  20.1476\n",
-      "     13        \u001b[36m0.0505\u001b[0m       0.9895        \u001b[35m0.0300\u001b[0m  19.8757\n",
-      "     14        \u001b[36m0.0484\u001b[0m       0.9895        0.0306  20.1783\n",
-      "     15        \u001b[36m0.0445\u001b[0m       0.9893        0.0323  19.9885\n"
+      "      1        \u001b[36m0.4442\u001b[0m       \u001b[32m0.9747\u001b[0m        \u001b[35m0.0861\u001b[0m  20.5060\n",
+      "      2        \u001b[36m0.1471\u001b[0m       \u001b[32m0.9802\u001b[0m        \u001b[35m0.0631\u001b[0m  20.4043\n",
+      "      3        \u001b[36m0.1094\u001b[0m       \u001b[32m0.9845\u001b[0m        \u001b[35m0.0515\u001b[0m  20.2561\n",
+      "      4        \u001b[36m0.0931\u001b[0m       \u001b[32m0.9849\u001b[0m        \u001b[35m0.0482\u001b[0m  20.4430\n",
+      "      5        \u001b[36m0.0814\u001b[0m       \u001b[32m0.9866\u001b[0m        \u001b[35m0.0388\u001b[0m  20.6197\n",
+      "      6        \u001b[36m0.0727\u001b[0m       \u001b[32m0.9871\u001b[0m        \u001b[35m0.0365\u001b[0m  20.5876\n",
+      "      7        \u001b[36m0.0684\u001b[0m       \u001b[32m0.9882\u001b[0m        \u001b[35m0.0357\u001b[0m  20.3337\n",
+      "      8        \u001b[36m0.0645\u001b[0m       \u001b[32m0.9887\u001b[0m        \u001b[35m0.0331\u001b[0m  20.0876\n",
+      "      9        \u001b[36m0.0575\u001b[0m       0.9880        0.0344  20.3122\n",
+      "     10        \u001b[36m0.0566\u001b[0m       \u001b[32m0.9894\u001b[0m        \u001b[35m0.0315\u001b[0m  20.3069\n",
+      "     11        \u001b[36m0.0523\u001b[0m       0.9891        \u001b[35m0.0312\u001b[0m  20.5467\n",
+      "     12        0.0543       \u001b[32m0.9900\u001b[0m        \u001b[35m0.0305\u001b[0m  20.2296\n",
+      "     13        \u001b[36m0.0505\u001b[0m       0.9895        \u001b[35m0.0300\u001b[0m  20.2744\n",
+      "     14        \u001b[36m0.0484\u001b[0m       0.9895        0.0306  20.3480\n",
+      "     15        \u001b[36m0.0445\u001b[0m       0.9893        0.0323  20.4953\n"
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<skorch.net.NeuralNetClassifier at 0x7f6239f7ccc0>"
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
-    "cnn.fit(XCnn_train, y_train)"
+    "cnn.fit(XCnn_train, y_train);"
    ]
   },
   {