Refactor notebook for training neural networks (#379)

ljvmiranda921 · Apr 7, 2020 · c4b3e7a · c4b3e7a
1 parent 5c53bc2
commit c4b3e7a
Showing 1 changed file with 88 additions and 112 deletions.
diff --git a/docs/examples/usecases/train_neural_network.ipynb b/docs/examples/usecases/train_neural_network.ipynb
@@ -38,15 +38,28 @@
     "First, we'll load the dataset from `scikit-learn`. The Iris Dataset contains 3 classes for each of the iris species (_iris setosa_, _iris virginica_, and _iris versicolor_). It has 50 samples per class with 150 samples in total, making it a very balanced dataset. Each sample is characterized by four features (or dimensions): sepal length, sepal width, petal length, petal width."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load the iris dataset"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = load_iris()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Load the iris dataset\n",
-    "data = load_iris()\n",
-    "\n",
     "# Store the features as X and the labels as y\n",
     "X = data.data\n",
     "y = data.target"
@@ -91,9 +104,63 @@
     "Now, let's write the forward propagation procedure as our objective function. Let $X$ be the input, $z_l$ the pre-activation at layer $l$, and $a_l$ the activation for layer $l$:"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Neural network architecture"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_inputs = 4\n",
+    "n_hidden = 20\n",
+    "n_classes = 3\n",
+    "\n",
+    "num_samples = 150"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def logits_function(p):\n",
+    "    \"\"\" Calculate roll-back the weights and biases\n",
+    "    \n",
+    "    Inputs\n",
+    "    ------\n",
+    "    p: np.ndarray\n",
+    "        The dimensions should include an unrolled version of the \n",
+    "        weights and biases.\n",
+    "    \n",
+    "    Returns\n",
+    "    -------\n",
+    "    numpy.ndarray of logits for layer 2\n",
+    "    \n",
+    "    \"\"\"\n",
+    "    # Roll-back the weights and biases\n",
+    "    W1 = p[0:80].reshape((n_inputs,n_hidden))\n",
+    "    b1 = p[80:100].reshape((n_hidden,))\n",
+    "    W2 = p[100:160].reshape((n_hidden,n_classes))\n",
+    "    b2 = p[160:163].reshape((n_classes,))\n",
+    "    \n",
+    "    # Perform forward propagation\n",
+    "    z1 = X.dot(W1) + b1  # Pre-activation in Layer 1\n",
+    "    a1 = np.tanh(z1)     # Activation in Layer 1\n",
+    "    logits = a1.dot(W2) + b2 # Pre-activation in Layer 2\n",
+    "    return logits          # Logits for Layer 2\n",
+    "    "
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -102,8 +169,7 @@
     "    \"\"\"Forward propagation as objective function\n",
     "    \n",
     "    This computes for the forward propagation of the neural network, as\n",
-    "    well as the loss. It receives a set of parameters that must be \n",
-    "    rolled-back into the corresponding weights and biases.\n",
+    "    well as the loss. \n",
     "    \n",
     "    Inputs\n",
     "    ------\n",
@@ -116,31 +182,17 @@
     "    float\n",
     "        The computed negative log-likelihood loss given the parameters\n",
     "    \"\"\"\n",
-    "    # Neural network architecture\n",
-    "    n_inputs = 4\n",
-    "    n_hidden = 20\n",
-    "    n_classes = 3\n",
     "    \n",
-    "    # Roll-back the weights and biases\n",
-    "    W1 = params[0:80].reshape((n_inputs,n_hidden))\n",
-    "    b1 = params[80:100].reshape((n_hidden,))\n",
-    "    W2 = params[100:160].reshape((n_hidden,n_classes))\n",
-    "    b2 = params[160:163].reshape((n_classes,))\n",
-    "    \n",
-    "    # Perform forward propagation\n",
-    "    z1 = X.dot(W1) + b1  # Pre-activation in Layer 1\n",
-    "    a1 = np.tanh(z1)     # Activation in Layer 1\n",
-    "    z2 = a1.dot(W2) + b2 # Pre-activation in Layer 2\n",
-    "    logits = z2          # Logits for Layer 2\n",
+    "    logits = logits_funciton(params)\n",
     "    \n",
     "    # Compute for the softmax of the logits\n",
     "    exp_scores = np.exp(logits)\n",
     "    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) \n",
     "    \n",
     "    # Compute for the negative log likelihood\n",
-    "    N = 150 # Number of samples\n",
-    "    corect_logprobs = -np.log(probs[range(N), y])\n",
-    "    loss = np.sum(corect_logprobs) / N\n",
+    "\n",
+    "    corect_logprobs = -np.log(probs[range(num_samples), y])\n",
+    "    loss = np.sum(corect_logprobs) / num_samples\n",
     "    \n",
     "    return loss\n"
    ]
@@ -154,7 +206,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -188,74 +240,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2019-05-18 15:41:57,877 - pyswarms.single.global_best - INFO - Optimize for 1000 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}\n",
-      "pyswarms.single.global_best: 100%|██████████|1000/1000, best_cost=0.0165\n",
-      "2019-05-18 15:42:31,581 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 0.016531416872331766, best pos: [ 5.85443277e-01  3.18983187e-01  4.87616029e+00  6.92819686e-01\n",
-      "  5.91348094e-01 -1.47072755e+00 -3.87333270e-01  2.28078097e-01\n",
-      "  5.64760071e-01 -7.56088270e-01  6.87331448e-01  2.88151297e-02\n",
-      "  3.31349600e-01 -1.59924694e+00  4.67166210e-02 -1.12181711e+00\n",
-      " -1.46116999e+00  8.28885848e-02 -5.87848222e-01  4.84650773e-01\n",
-      "  7.25055845e-01  1.60319161e+00  1.01734077e+00  6.77151277e-01\n",
-      "  4.68472188e-01 -2.03064036e+00  6.04853725e-01  8.69137842e-01\n",
-      "  3.37968541e-02  7.35966984e+00  3.77854618e-01  5.39704445e-01\n",
-      " -3.18805135e-01  3.98886841e-01  1.35014252e+00  6.30151934e-01\n",
-      " -9.89671205e+00  5.30611778e-01 -3.94029130e-01  1.28325712e+00\n",
-      "  4.00896282e-01  1.44504447e+00  4.80281265e-02 -2.23726897e+00\n",
-      " -1.30647406e+00 -1.22876659e+00  1.05121523e+00  1.65897010e-02\n",
-      " -3.01271564e-01  7.11549766e-01 -4.30742366e-01 -8.79857761e-01\n",
-      "  4.65148904e-01  2.68559029e+00 -8.19525838e-01  1.54207613e+00\n",
-      "  1.11130818e+00 -1.00668214e+00  1.34198293e+00  1.37349242e+00\n",
-      "  8.63633177e-01  7.71218636e-01 -3.24648088e+00 -2.77803089e-01\n",
-      "  5.61820577e-01  3.60558056e-01 -1.84423615e-01  7.31299682e-01\n",
-      " -2.70701588e-01  7.40742860e-01 -2.04198406e-01 -3.40162714e+00\n",
-      " -1.63934080e+00  4.25175361e-01 -5.64146954e-01  9.32478016e-03\n",
-      "  3.21352295e+01 -1.70581217e-02  2.82477110e-03 -6.18616886e-01\n",
-      "  1.38075445e+00  4.79120594e-01 -1.38986251e+00  1.08695023e+00\n",
-      " -3.27381335e-01 -1.88859617e+00  5.74757420e-03  3.17290632e-01\n",
-      " -3.38194174e-01 -7.56263717e-01 -1.99762849e-01  1.48606896e+00\n",
-      "  1.33930708e+00  6.64423514e-01 -3.54654570e-01 -5.97853941e-02\n",
-      " -2.37687453e+01  2.72228437e+00  1.06655131e+00  6.82690854e-01\n",
-      "  2.31485658e+00 -1.52431864e+00 -3.59092541e-01  2.86005282e+00\n",
-      "  6.34208138e-01 -4.77038443e-02 -2.15260265e-01  1.02182659e+00\n",
-      "  9.64123957e-03  1.81348424e+00  1.23999239e+00 -8.64817004e-01\n",
-      "  7.99587790e-01 -6.65369364e-01 -1.19005701e+00  6.56194773e-01\n",
-      " -7.91411474e-01 -1.28616596e+00 -1.17059974e-01  1.58063399e+00\n",
-      " -5.29152828e-02  7.20523887e-01 -7.31449869e-01 -1.91568507e+00\n",
-      "  8.65212737e-02  2.90227497e-01 -1.89333200e+00  5.57458851e-01\n",
-      "  3.49374578e-01 -1.87458746e-01 -9.91365971e-01  3.19524629e-01\n",
-      "  2.94593616e-01  1.04852932e+00 -8.21094379e-01 -8.56035315e-02\n",
-      " -1.11249671e-02 -1.51713779e+00 -1.72314444e-01 -2.63524712e+00\n",
-      "  4.20163362e-01 -1.69099999e+00  1.12468986e+00  7.20661344e-01\n",
-      " -6.21266399e-01 -2.27307898e+00 -1.14414646e+00  2.82661220e+00\n",
-      " -5.48558830e-01  6.14762373e-01  6.30690474e+00  2.65412287e+00\n",
-      "  2.57065658e+00 -1.15195847e+01 -1.70542094e+00  9.64484168e-01\n",
-      "  7.66919284e-01  6.93106240e-01  2.34685030e+00  1.86609454e+00\n",
-      " -7.14644033e-01  7.41293434e-01 -2.36930354e-01]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 23.8 s, sys: 488 ms, total: 24.3 s\n",
-      "Wall time: 33.7 s\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%time\n",
     "# Initialize swarm\n",
     "options = {'c1': 0.5, 'c2': 0.3, 'w':0.9}\n",
     "\n",
     "# Call instance of PSO\n",
-    "dimensions = (4 * 20) + (20 * 3) + 20 + 3 \n",
+    "dimensions = (n_inputs * n_hidden) + (n_hidden * n_classes) + n_hidden + n_classes \n",
     "optimizer = ps.single.GlobalBestPSO(n_particles=100, dimensions=dimensions, options=options)\n",
     "\n",
     "# Perform optimization\n",
@@ -274,39 +268,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def predict(X, pos):\n",
+    "def predict(pos):\n",
     "    \"\"\"\n",
     "    Use the trained weights to perform class predictions.\n",
     "    \n",
     "    Inputs\n",
     "    ------\n",
-    "    X: numpy.ndarray\n",
-    "        Input Iris dataset\n",
     "    pos: numpy.ndarray\n",
     "        Position matrix found by the swarm. Will be rolled\n",
     "        into weights and biases.\n",
     "    \"\"\"\n",
-    "    # Neural network architecture\n",
-    "    n_inputs = 4\n",
-    "    n_hidden = 20\n",
-    "    n_classes = 3\n",
-    "    \n",
-    "    # Roll-back the weights and biases\n",
-    "    W1 = pos[0:80].reshape((n_inputs,n_hidden))\n",
-    "    b1 = pos[80:100].reshape((n_hidden,))\n",
-    "    W2 = pos[100:160].reshape((n_hidden,n_classes))\n",
-    "    b2 = pos[160:163].reshape((n_classes,))\n",
-    "    \n",
-    "    # Perform forward propagation\n",
-    "    z1 = X.dot(W1) + b1  # Pre-activation in Layer 1\n",
-    "    a1 = np.tanh(z1)     # Activation in Layer 1\n",
-    "    z2 = a1.dot(W2) + b2 # Pre-activation in Layer 2\n",
-    "    logits = z2          # Logits for Layer 2\n",
-    "    \n",
+    "    logits = logits_funciton(pos)\n",
     "    y_pred = np.argmax(logits, axis=1)\n",
     "    return y_pred"
    ]
@@ -320,7 +296,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -329,13 +305,13 @@
        "0.9866666666666667"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "(predict(X, pos) == y).mean()"
+    "(predict(pos) == y).mean()"
    ]
   }
  ],
@@ -355,7 +331,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.7.6"
   }
  },
  "nbformat": 4,