Stateful LSTMs

sachinruk · Aug 22, 2017 · f34bdcc · f34bdcc
1 parent 4421485
commit f34bdcc
Show file tree

Hide file tree

Showing 8 changed files with 1,695 additions and 14 deletions.
diff --git a/Dockerfile.backup b/Dockerfile.backup
@@ -0,0 +1,9 @@
+FROM sachinruk/ds_base
+
+RUN apt-get update && apt-get install -y graphviz
+RUN pip install graphviz xgboost
+
+VOLUME /notebook
+WORKDIR /notebook
+EXPOSE 8888
+CMD jupyter notebook --allow-root --no-browser --ip=0.0.0.0 --NotebookApp.token=
diff --git a/Lesson 13 - Transfer Learning - Solutions.ipynb b/Lesson 13 - Transfer Learning - Solutions.ipynb
@@ -27,10 +27,13 @@
       "Fetching package metadata ...........\n",
       "Solving package specifications: .\n",
       "\n",
-      "# All requested packages already installed.\n",
-      "# packages in environment at /root/miniconda3:\n",
-      "#\n",
-      "pillow                    4.2.1                    py36_0  \n"
+      "Package plan for installation in environment /root/miniconda3:\n",
+      "\n",
+      "The following packages will be UPDATED:\n",
+      "\n",
+      "    conda: 4.3.23-py36_0 --> 4.3.24-py36_0\n",
+      "\n",
+      "conda-4.3.24-p 100% |################################| Time: 0:00:00   2.84 MB/s\n"
      ]
     }
    ],
@@ -189,6 +192,26 @@
     "From the dataset we randomly choose 20000 images and moves them to training and the rest to testing folders. "
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([1, 3, 4, 2, 0])"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.random.permutation(5)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 5,
@@ -203,7 +226,7 @@
     "\n",
     "# randomly choose 20000 as training and testing cases\n",
     "permutation = np.random.permutation(len_data)\n",
-    "train_set = [files[i] for i in permutation[:]]\n",
+    "train_set = [files[i] for i in permutation[:train_examples]]\n",
     "test_set = [files[i] for i in permutation[-test_examples:]]\n",
     "train_labels = labels[permutation[:train_examples]]\n",
     "test_labels = labels[permutation[-test_examples:]]\n",
@@ -773,7 +796,9 @@
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "from keras import applications\n",
@@ -854,6 +879,38 @@
     "It is however important to notice that I am **not** training in this block. I am predicting using a truncated VGG16 net. See how I set the `include_top=False` parameter above. VGG16 was originally trained on the CIFAR10 dataset so that it would predict 10 classes. Now that we are truncating it and only using all but the top few layers (lyer closes to the prediction), it outputs a (3,3,512) image in our case."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "with open('bottleneck_features_train.npy','rb') as f:\n",
+    "    bottleneck_features_train = pickle.load(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(19872, 3, 3, 512)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "bottleneck_features_train.shape"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 9,

diff --git a/Lesson 13 - Transfer Learning.ipynb b/Lesson 13 - Transfer Learning.ipynb
@@ -184,7 +184,7 @@
     "\n",
     "# randomly choose 20000 as training and testing cases\n",
     "permutation = np.random.permutation(len_data)\n",
-    "train_set = [files[i] for i in permutation[:]]\n",
+    "train_set = [files[i] for i in permutation[:][:train_examples]]\n",
     "test_set = [files[i] for i in permutation[-test_examples:]]\n",
     "train_labels = labels[permutation[:train_examples]]\n",
     "test_labels = labels[permutation[-test_examples:]]\n",

diff --git a/Lesson 15 - LSTM Trump Tweets - Solutions.ipynb b/Lesson 15 - LSTM Trump Tweets - Solutions.ipynb
@@ -17,13 +17,17 @@
     "\n",
     "Cons:\n",
     "- Cannot deal with missing time steps.\n",
-    "- Time steps must be discretised and not continuous."
+    "- Time steps must be discretised and not continuous.\n",
+    "\n",
+    "![trump](./images/trump.jpg)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 10,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "%matplotlib inline\n",
@@ -261,6 +265,158 @@
     "text_num = [[char2int['<GO>']]+[char2int[c] for c in tweet]+ [char2int['<END>']] for tweet in trump_tweets]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'\\n': 26,\n",
+       " ' ': 42,\n",
+       " '!': 66,\n",
+       " '\"': 13,\n",
+       " '#': 39,\n",
+       " '$': 40,\n",
+       " '%': 29,\n",
+       " '&': 88,\n",
+       " \"'\": 22,\n",
+       " '(': 10,\n",
+       " ')': 96,\n",
+       " '*': 64,\n",
+       " '+': 55,\n",
+       " ',': 23,\n",
+       " '-': 79,\n",
+       " '.': 52,\n",
+       " '/': 1,\n",
+       " '0': 107,\n",
+       " '1': 114,\n",
+       " '2': 104,\n",
+       " '3': 2,\n",
+       " '4': 90,\n",
+       " '5': 36,\n",
+       " '6': 128,\n",
+       " '7': 75,\n",
+       " '8': 91,\n",
+       " '9': 119,\n",
+       " ':': 32,\n",
+       " ';': 71,\n",
+       " '<END>': 130,\n",
+       " '<GO>': 131,\n",
+       " '<PAD>': 132,\n",
+       " '=': 35,\n",
+       " '?': 126,\n",
+       " '@': 84,\n",
+       " '[': 112,\n",
+       " '\\\\': 86,\n",
+       " ']': 14,\n",
+       " '_': 101,\n",
+       " '`': 73,\n",
+       " 'a': 85,\n",
+       " 'b': 115,\n",
+       " 'c': 123,\n",
+       " 'd': 63,\n",
+       " 'e': 120,\n",
+       " 'f': 24,\n",
+       " 'g': 45,\n",
+       " 'h': 6,\n",
+       " 'i': 121,\n",
+       " 'j': 11,\n",
+       " 'k': 65,\n",
+       " 'l': 59,\n",
+       " 'm': 48,\n",
+       " 'n': 118,\n",
+       " 'o': 20,\n",
+       " 'p': 76,\n",
+       " 'q': 106,\n",
+       " 'r': 111,\n",
+       " 's': 127,\n",
+       " 't': 67,\n",
+       " 'u': 62,\n",
+       " 'v': 49,\n",
+       " 'w': 99,\n",
+       " 'x': 37,\n",
+       " 'y': 9,\n",
+       " 'z': 89,\n",
+       " '{': 100,\n",
+       " '|': 72,\n",
+       " '}': 15,\n",
+       " '~': 69,\n",
+       " '\\xa0': 28,\n",
+       " '£': 122,\n",
+       " '«': 68,\n",
+       " '®': 102,\n",
+       " '´': 19,\n",
+       " 'º': 30,\n",
+       " '»': 12,\n",
+       " 'á': 70,\n",
+       " 'â': 94,\n",
+       " 'è': 4,\n",
+       " 'é': 129,\n",
+       " 'í': 25,\n",
+       " 'ñ': 17,\n",
+       " 'ø': 77,\n",
+       " 'ú': 34,\n",
+       " 'ğ': 105,\n",
+       " 'ı': 0,\n",
+       " 'ĺ': 16,\n",
+       " 'ō': 74,\n",
+       " 'ễ': 5,\n",
+       " '\\u200b': 41,\n",
+       " '\\u200e': 31,\n",
+       " '\\u200f': 51,\n",
+       " '–': 117,\n",
+       " '—': 43,\n",
+       " '―': 44,\n",
+       " '‘': 81,\n",
+       " '’': 54,\n",
+       " '“': 7,\n",
+       " '”': 108,\n",
+       " '•': 58,\n",
+       " '…': 97,\n",
+       " '′': 80,\n",
+       " '‼': 38,\n",
+       " '€': 33,\n",
+       " '●': 124,\n",
+       " '☀': 21,\n",
+       " '☁': 60,\n",
+       " '★': 18,\n",
+       " '☆': 110,\n",
+       " '☉': 57,\n",
+       " '☑': 113,\n",
+       " '☝': 61,\n",
+       " '☞': 125,\n",
+       " '☹': 53,\n",
+       " '☺': 98,\n",
+       " '♡': 87,\n",
+       " '♥': 93,\n",
+       " '⚾': 95,\n",
+       " '✅': 82,\n",
+       " '✈': 78,\n",
+       " '✊': 109,\n",
+       " '✔': 83,\n",
+       " '✨': 47,\n",
+       " '❌': 8,\n",
+       " '❤': 56,\n",
+       " '➡': 27,\n",
+       " '⬅': 92,\n",
+       " '《': 103,\n",
+       " '️': 50,\n",
+       " 'ｒ': 3,\n",
+       " 'ｔ': 116,\n",
+       " '\\U0010fc00': 46}"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "char2int"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 25,
@@ -465,7 +621,9 @@
   {
    "cell_type": "code",
    "execution_count": 34,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# if not 'model' in vars():\n",

diff --git a/Lesson 15 - LSTM Trump Tweets.ipynb b/Lesson 15 - LSTM Trump Tweets.ipynb
@@ -17,7 +17,8 @@
     "\n",
     "Cons:\n",
     "- Cannot deal with missing time steps.\n",
-    "- Time steps must be discretised and not continuous."
+    "- Time steps must be discretised and not continuous.\n",
+    "![trump](./images/trump.jpg)"
    ]
   },
   {
@@ -367,7 +368,7 @@
     "\n",
     "Why did we keep appending to the sequence and predicting? Why not use simply the last letter. If we were to do this, we would lose information that comes from the previous letter via the hidden state and cell memory. Keep in mind that each LSTM unit has 3 inputs, the x, the hidden state, and the cell memory. \n",
     "\n",
-    "Also important to notice that the Cell Memory is not used in connecting to the Dense layer"
+    "Also important to notice that the Cell Memory is not used in connecting to the Dense layer, only the hidden state."
    ]
   },
   {