Skip to content

Commit

Permalink
Refactor notebook for training neural networks (#379)
Browse files Browse the repository at this point in the history
  • Loading branch information
ivynasantino committed Apr 7, 2020
1 parent 5c53bc2 commit c4b3e7a
Showing 1 changed file with 88 additions and 112 deletions.
200 changes: 88 additions & 112 deletions docs/examples/usecases/train_neural_network.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,28 @@
"First, we'll load the dataset from `scikit-learn`. The Iris Dataset contains 3 classes for each of the iris species (_iris setosa_, _iris virginica_, and _iris versicolor_). It has 50 samples per class with 150 samples in total, making it a very balanced dataset. Each sample is characterized by four features (or dimensions): sepal length, sepal width, petal length, petal width."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load the iris dataset"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"data = load_iris()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# Load the iris dataset\n",
"data = load_iris()\n",
"\n",
"# Store the features as X and the labels as y\n",
"X = data.data\n",
"y = data.target"
Expand Down Expand Up @@ -91,9 +104,63 @@
"Now, let's write the forward propagation procedure as our objective function. Let $X$ be the input, $z_l$ the pre-activation at layer $l$, and $a_l$ the activation for layer $l$:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Neural network architecture"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"n_inputs = 4\n",
"n_hidden = 20\n",
"n_classes = 3\n",
"\n",
"num_samples = 150"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"def logits_function(p):\n",
" \"\"\" Calculate roll-back the weights and biases\n",
" \n",
" Inputs\n",
" ------\n",
" p: np.ndarray\n",
" The dimensions should include an unrolled version of the \n",
" weights and biases.\n",
" \n",
" Returns\n",
" -------\n",
" numpy.ndarray of logits for layer 2\n",
" \n",
" \"\"\"\n",
" # Roll-back the weights and biases\n",
" W1 = p[0:80].reshape((n_inputs,n_hidden))\n",
" b1 = p[80:100].reshape((n_hidden,))\n",
" W2 = p[100:160].reshape((n_hidden,n_classes))\n",
" b2 = p[160:163].reshape((n_classes,))\n",
" \n",
" # Perform forward propagation\n",
" z1 = X.dot(W1) + b1 # Pre-activation in Layer 1\n",
" a1 = np.tanh(z1) # Activation in Layer 1\n",
" logits = a1.dot(W2) + b2 # Pre-activation in Layer 2\n",
" return logits # Logits for Layer 2\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -102,8 +169,7 @@
" \"\"\"Forward propagation as objective function\n",
" \n",
" This computes for the forward propagation of the neural network, as\n",
" well as the loss. It receives a set of parameters that must be \n",
" rolled-back into the corresponding weights and biases.\n",
" well as the loss. \n",
" \n",
" Inputs\n",
" ------\n",
Expand All @@ -116,31 +182,17 @@
" float\n",
" The computed negative log-likelihood loss given the parameters\n",
" \"\"\"\n",
" # Neural network architecture\n",
" n_inputs = 4\n",
" n_hidden = 20\n",
" n_classes = 3\n",
" \n",
" # Roll-back the weights and biases\n",
" W1 = params[0:80].reshape((n_inputs,n_hidden))\n",
" b1 = params[80:100].reshape((n_hidden,))\n",
" W2 = params[100:160].reshape((n_hidden,n_classes))\n",
" b2 = params[160:163].reshape((n_classes,))\n",
" \n",
" # Perform forward propagation\n",
" z1 = X.dot(W1) + b1 # Pre-activation in Layer 1\n",
" a1 = np.tanh(z1) # Activation in Layer 1\n",
" z2 = a1.dot(W2) + b2 # Pre-activation in Layer 2\n",
" logits = z2 # Logits for Layer 2\n",
" logits = logits_funciton(params)\n",
" \n",
" # Compute for the softmax of the logits\n",
" exp_scores = np.exp(logits)\n",
" probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) \n",
" \n",
" # Compute for the negative log likelihood\n",
" N = 150 # Number of samples\n",
" corect_logprobs = -np.log(probs[range(N), y])\n",
" loss = np.sum(corect_logprobs) / N\n",
"\n",
" corect_logprobs = -np.log(probs[range(num_samples), y])\n",
" loss = np.sum(corect_logprobs) / num_samples\n",
" \n",
" return loss\n"
]
Expand All @@ -154,7 +206,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -188,74 +240,16 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2019-05-18 15:41:57,877 - pyswarms.single.global_best - INFO - Optimize for 1000 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}\n",
"pyswarms.single.global_best: 100%|██████████|1000/1000, best_cost=0.0165\n",
"2019-05-18 15:42:31,581 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 0.016531416872331766, best pos: [ 5.85443277e-01 3.18983187e-01 4.87616029e+00 6.92819686e-01\n",
" 5.91348094e-01 -1.47072755e+00 -3.87333270e-01 2.28078097e-01\n",
" 5.64760071e-01 -7.56088270e-01 6.87331448e-01 2.88151297e-02\n",
" 3.31349600e-01 -1.59924694e+00 4.67166210e-02 -1.12181711e+00\n",
" -1.46116999e+00 8.28885848e-02 -5.87848222e-01 4.84650773e-01\n",
" 7.25055845e-01 1.60319161e+00 1.01734077e+00 6.77151277e-01\n",
" 4.68472188e-01 -2.03064036e+00 6.04853725e-01 8.69137842e-01\n",
" 3.37968541e-02 7.35966984e+00 3.77854618e-01 5.39704445e-01\n",
" -3.18805135e-01 3.98886841e-01 1.35014252e+00 6.30151934e-01\n",
" -9.89671205e+00 5.30611778e-01 -3.94029130e-01 1.28325712e+00\n",
" 4.00896282e-01 1.44504447e+00 4.80281265e-02 -2.23726897e+00\n",
" -1.30647406e+00 -1.22876659e+00 1.05121523e+00 1.65897010e-02\n",
" -3.01271564e-01 7.11549766e-01 -4.30742366e-01 -8.79857761e-01\n",
" 4.65148904e-01 2.68559029e+00 -8.19525838e-01 1.54207613e+00\n",
" 1.11130818e+00 -1.00668214e+00 1.34198293e+00 1.37349242e+00\n",
" 8.63633177e-01 7.71218636e-01 -3.24648088e+00 -2.77803089e-01\n",
" 5.61820577e-01 3.60558056e-01 -1.84423615e-01 7.31299682e-01\n",
" -2.70701588e-01 7.40742860e-01 -2.04198406e-01 -3.40162714e+00\n",
" -1.63934080e+00 4.25175361e-01 -5.64146954e-01 9.32478016e-03\n",
" 3.21352295e+01 -1.70581217e-02 2.82477110e-03 -6.18616886e-01\n",
" 1.38075445e+00 4.79120594e-01 -1.38986251e+00 1.08695023e+00\n",
" -3.27381335e-01 -1.88859617e+00 5.74757420e-03 3.17290632e-01\n",
" -3.38194174e-01 -7.56263717e-01 -1.99762849e-01 1.48606896e+00\n",
" 1.33930708e+00 6.64423514e-01 -3.54654570e-01 -5.97853941e-02\n",
" -2.37687453e+01 2.72228437e+00 1.06655131e+00 6.82690854e-01\n",
" 2.31485658e+00 -1.52431864e+00 -3.59092541e-01 2.86005282e+00\n",
" 6.34208138e-01 -4.77038443e-02 -2.15260265e-01 1.02182659e+00\n",
" 9.64123957e-03 1.81348424e+00 1.23999239e+00 -8.64817004e-01\n",
" 7.99587790e-01 -6.65369364e-01 -1.19005701e+00 6.56194773e-01\n",
" -7.91411474e-01 -1.28616596e+00 -1.17059974e-01 1.58063399e+00\n",
" -5.29152828e-02 7.20523887e-01 -7.31449869e-01 -1.91568507e+00\n",
" 8.65212737e-02 2.90227497e-01 -1.89333200e+00 5.57458851e-01\n",
" 3.49374578e-01 -1.87458746e-01 -9.91365971e-01 3.19524629e-01\n",
" 2.94593616e-01 1.04852932e+00 -8.21094379e-01 -8.56035315e-02\n",
" -1.11249671e-02 -1.51713779e+00 -1.72314444e-01 -2.63524712e+00\n",
" 4.20163362e-01 -1.69099999e+00 1.12468986e+00 7.20661344e-01\n",
" -6.21266399e-01 -2.27307898e+00 -1.14414646e+00 2.82661220e+00\n",
" -5.48558830e-01 6.14762373e-01 6.30690474e+00 2.65412287e+00\n",
" 2.57065658e+00 -1.15195847e+01 -1.70542094e+00 9.64484168e-01\n",
" 7.66919284e-01 6.93106240e-01 2.34685030e+00 1.86609454e+00\n",
" -7.14644033e-01 7.41293434e-01 -2.36930354e-01]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 23.8 s, sys: 488 ms, total: 24.3 s\n",
"Wall time: 33.7 s\n"
]
}
],
"outputs": [],
"source": [
"%%time\n",
"# Initialize swarm\n",
"options = {'c1': 0.5, 'c2': 0.3, 'w':0.9}\n",
"\n",
"# Call instance of PSO\n",
"dimensions = (4 * 20) + (20 * 3) + 20 + 3 \n",
"dimensions = (n_inputs * n_hidden) + (n_hidden * n_classes) + n_hidden + n_classes \n",
"optimizer = ps.single.GlobalBestPSO(n_particles=100, dimensions=dimensions, options=options)\n",
"\n",
"# Perform optimization\n",
Expand All @@ -274,39 +268,21 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"def predict(X, pos):\n",
"def predict(pos):\n",
" \"\"\"\n",
" Use the trained weights to perform class predictions.\n",
" \n",
" Inputs\n",
" ------\n",
" X: numpy.ndarray\n",
" Input Iris dataset\n",
" pos: numpy.ndarray\n",
" Position matrix found by the swarm. Will be rolled\n",
" into weights and biases.\n",
" \"\"\"\n",
" # Neural network architecture\n",
" n_inputs = 4\n",
" n_hidden = 20\n",
" n_classes = 3\n",
" \n",
" # Roll-back the weights and biases\n",
" W1 = pos[0:80].reshape((n_inputs,n_hidden))\n",
" b1 = pos[80:100].reshape((n_hidden,))\n",
" W2 = pos[100:160].reshape((n_hidden,n_classes))\n",
" b2 = pos[160:163].reshape((n_classes,))\n",
" \n",
" # Perform forward propagation\n",
" z1 = X.dot(W1) + b1 # Pre-activation in Layer 1\n",
" a1 = np.tanh(z1) # Activation in Layer 1\n",
" z2 = a1.dot(W2) + b2 # Pre-activation in Layer 2\n",
" logits = z2 # Logits for Layer 2\n",
" \n",
" logits = logits_funciton(pos)\n",
" y_pred = np.argmax(logits, axis=1)\n",
" return y_pred"
]
Expand All @@ -320,7 +296,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 21,
"metadata": {},
"outputs": [
{
Expand All @@ -329,13 +305,13 @@
"0.9866666666666667"
]
},
"execution_count": 7,
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(predict(X, pos) == y).mean()"
"(predict(pos) == y).mean()"
]
}
],
Expand All @@ -355,7 +331,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.7.6"
}
},
"nbformat": 4,
Expand Down

0 comments on commit c4b3e7a

Please sign in to comment.