Permalink
Browse files

Stateful LSTMs

  • Loading branch information...
sachinruk committed Aug 22, 2017
1 parent 4421485 commit f34bdcc912beddb91a96b823683e15446ab0e5a8
View
@@ -0,0 +1,9 @@
FROM sachinruk/ds_base
RUN apt-get update && apt-get install -y graphviz
RUN pip install graphviz xgboost
VOLUME /notebook
WORKDIR /notebook
EXPOSE 8888
CMD jupyter notebook --allow-root --no-browser --ip=0.0.0.0 --NotebookApp.token=
@@ -27,10 +27,13 @@
"Fetching package metadata ...........\n",
"Solving package specifications: .\n",
"\n",
"# All requested packages already installed.\n",
"# packages in environment at /root/miniconda3:\n",
"#\n",
"pillow 4.2.1 py36_0 \n"
"Package plan for installation in environment /root/miniconda3:\n",
"\n",
"The following packages will be UPDATED:\n",
"\n",
" conda: 4.3.23-py36_0 --> 4.3.24-py36_0\n",
"\n",
"conda-4.3.24-p 100% |################################| Time: 0:00:00 2.84 MB/s\n"
]
}
],
@@ -189,6 +192,26 @@
"From the dataset we randomly choose 20000 images and moves them to training and the rest to testing folders. "
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1, 3, 4, 2, 0])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.random.permutation(5)"
]
},
{
"cell_type": "code",
"execution_count": 5,
@@ -203,7 +226,7 @@
"\n",
"# randomly choose 20000 as training and testing cases\n",
"permutation = np.random.permutation(len_data)\n",
"train_set = [files[i] for i in permutation[:]]\n",
"train_set = [files[i] for i in permutation[:train_examples]]\n",
"test_set = [files[i] for i in permutation[-test_examples:]]\n",
"train_labels = labels[permutation[:train_examples]]\n",
"test_labels = labels[permutation[-test_examples:]]\n",
@@ -773,7 +796,9 @@
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from keras import applications\n",
@@ -854,6 +879,38 @@
"It is however important to notice that I am **not** training in this block. I am predicting using a truncated VGG16 net. See how I set the `include_top=False` parameter above. VGG16 was originally trained on the CIFAR10 dataset so that it would predict 10 classes. Now that we are truncating it and only using all but the top few layers (lyer closes to the prediction), it outputs a (3,3,512) image in our case."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"with open('bottleneck_features_train.npy','rb') as f:\n",
" bottleneck_features_train = pickle.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(19872, 3, 3, 512)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bottleneck_features_train.shape"
]
},
{
"cell_type": "code",
"execution_count": 9,
@@ -184,7 +184,7 @@
"\n",
"# randomly choose 20000 as training and testing cases\n",
"permutation = np.random.permutation(len_data)\n",
"train_set = [files[i] for i in permutation[:]]\n",
"train_set = [files[i] for i in permutation[:][:train_examples]]\n",
"test_set = [files[i] for i in permutation[-test_examples:]]\n",
"train_labels = labels[permutation[:train_examples]]\n",
"test_labels = labels[permutation[-test_examples:]]\n",
@@ -17,13 +17,17 @@
"\n",
"Cons:\n",
"- Cannot deal with missing time steps.\n",
"- Time steps must be discretised and not continuous."
"- Time steps must be discretised and not continuous.\n",
"\n",
"![trump](./images/trump.jpg)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%matplotlib inline\n",
@@ -261,6 +265,158 @@
"text_num = [[char2int['<GO>']]+[char2int[c] for c in tweet]+ [char2int['<END>']] for tweet in trump_tweets]"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'\\n': 26,\n",
" ' ': 42,\n",
" '!': 66,\n",
" '\"': 13,\n",
" '#': 39,\n",
" '$': 40,\n",
" '%': 29,\n",
" '&': 88,\n",
" \"'\": 22,\n",
" '(': 10,\n",
" ')': 96,\n",
" '*': 64,\n",
" '+': 55,\n",
" ',': 23,\n",
" '-': 79,\n",
" '.': 52,\n",
" '/': 1,\n",
" '0': 107,\n",
" '1': 114,\n",
" '2': 104,\n",
" '3': 2,\n",
" '4': 90,\n",
" '5': 36,\n",
" '6': 128,\n",
" '7': 75,\n",
" '8': 91,\n",
" '9': 119,\n",
" ':': 32,\n",
" ';': 71,\n",
" '<END>': 130,\n",
" '<GO>': 131,\n",
" '<PAD>': 132,\n",
" '=': 35,\n",
" '?': 126,\n",
" '@': 84,\n",
" '[': 112,\n",
" '\\\\': 86,\n",
" ']': 14,\n",
" '_': 101,\n",
" '`': 73,\n",
" 'a': 85,\n",
" 'b': 115,\n",
" 'c': 123,\n",
" 'd': 63,\n",
" 'e': 120,\n",
" 'f': 24,\n",
" 'g': 45,\n",
" 'h': 6,\n",
" 'i': 121,\n",
" 'j': 11,\n",
" 'k': 65,\n",
" 'l': 59,\n",
" 'm': 48,\n",
" 'n': 118,\n",
" 'o': 20,\n",
" 'p': 76,\n",
" 'q': 106,\n",
" 'r': 111,\n",
" 's': 127,\n",
" 't': 67,\n",
" 'u': 62,\n",
" 'v': 49,\n",
" 'w': 99,\n",
" 'x': 37,\n",
" 'y': 9,\n",
" 'z': 89,\n",
" '{': 100,\n",
" '|': 72,\n",
" '}': 15,\n",
" '~': 69,\n",
" '\\xa0': 28,\n",
" '£': 122,\n",
" '«': 68,\n",
" '®': 102,\n",
" '´': 19,\n",
" 'º': 30,\n",
" '»': 12,\n",
" 'á': 70,\n",
" 'â': 94,\n",
" 'è': 4,\n",
" 'é': 129,\n",
" 'í': 25,\n",
" 'ñ': 17,\n",
" 'ø': 77,\n",
" 'ú': 34,\n",
" 'ğ': 105,\n",
" 'ı': 0,\n",
" 'ĺ': 16,\n",
" 'ō': 74,\n",
" 'ễ': 5,\n",
" '\\u200b': 41,\n",
" '\\u200e': 31,\n",
" '\\u200f': 51,\n",
" '–': 117,\n",
" '—': 43,\n",
" '―': 44,\n",
" '‘': 81,\n",
" '’': 54,\n",
" '“': 7,\n",
" '”': 108,\n",
" '•': 58,\n",
" '…': 97,\n",
" '′': 80,\n",
" '‼': 38,\n",
" '€': 33,\n",
" '●': 124,\n",
" '☀': 21,\n",
" '☁': 60,\n",
" '★': 18,\n",
" '☆': 110,\n",
" '☉': 57,\n",
" '☑': 113,\n",
" '☝': 61,\n",
" '☞': 125,\n",
" '☹': 53,\n",
" '☺': 98,\n",
" '♡': 87,\n",
" '♥': 93,\n",
" '⚾': 95,\n",
" '✅': 82,\n",
" '✈': 78,\n",
" '✊': 109,\n",
" '✔': 83,\n",
" '✨': 47,\n",
" '❌': 8,\n",
" '❤': 56,\n",
" '➡': 27,\n",
" '⬅': 92,\n",
" '《': 103,\n",
" '️': 50,\n",
" 'r': 3,\n",
" 't': 116,\n",
" '\\U0010fc00': 46}"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"char2int"
]
},
{
"cell_type": "code",
"execution_count": 25,
@@ -465,7 +621,9 @@
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# if not 'model' in vars():\n",
@@ -17,7 +17,8 @@
"\n",
"Cons:\n",
"- Cannot deal with missing time steps.\n",
"- Time steps must be discretised and not continuous."
"- Time steps must be discretised and not continuous.\n",
"![trump](./images/trump.jpg)"
]
},
{
@@ -367,7 +368,7 @@
"\n",
"Why did we keep appending to the sequence and predicting? Why not use simply the last letter. If we were to do this, we would lose information that comes from the previous letter via the hidden state and cell memory. Keep in mind that each LSTM unit has 3 inputs, the x, the hidden state, and the cell memory. \n",
"\n",
"Also important to notice that the Cell Memory is not used in connecting to the Dense layer"
"Also important to notice that the Cell Memory is not used in connecting to the Dense layer, only the hidden state."
]
},
{
Oops, something went wrong.

0 comments on commit f34bdcc

Please sign in to comment.