In [27]:
import numpy
from keras.datasets import imdb
from keras.models import Sequential, Model
from keras.layers import Dense
from keras.layers import LSTM, Convolution1D, Flatten, Dropout
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence

In [2]:
top_words = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)



Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz

In [3]:
print (X_train[0], y_train[0])

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32] 1


In [5]:
for a in X_train[:10]:
    print (len(a))

218
189
141
550
147
43
123
562
233
130


In [6]:
max_review_length = 1600
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)

In [8]:
print (X_train[0], len(X_train[0]))

[  0   0   0 ...,  19 178  32] 1600


In [17]:
embedding_vector_length = 300
model = Sequential()
model.add(Embedding(top_words, embedding_vector_length, input_length=max_review_length))

In [18]:
model.add(Convolution1D(64, 3, border_mode='same'))
model.add(Convolution1D(32, 3, border_mode='same'))
model.add(Convolution1D(16, 3, border_mode='same'))
model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(180,activation='sigmoid'))
model.add(Dropout(0.2))
model.add(Dense(1,activation='sigmoid'))

  """Entry point for launching an IPython kernel.
  
  This is separate from the ipykernel package so we can avoid doing imports until


In [19]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [12]:
model.fit(X_train, y_train, nb_epoch=3, verbose = 1, batch_size = 64)



Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f1d4448f278>

In [15]:
score = model.evaluate(X_test[:100], y_test[:100], verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.465346926227
Test accuracy: 0.86


In [16]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 1600, 300)         3000000   
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 1600, 64)          57664     
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 1600, 32)          6176      
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 1600, 16)          1552      
_________________________________________________________________
flatten_1 (Flatten)          (None, 25600)             0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 25600)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 180)               4608180   
__________

In [24]:
from keras.layers import Input, merge
query = Input(shape = (1600, ))

In [25]:
e = Embedding(top_words, embedding_vector_length, input_length=max_review_length)

In [26]:
out_e = e(query)

In [28]:
model_tmp = Model(input = query, output = out_e)

  """Entry point for launching an IPython kernel.


In [33]:
model_tmp.compile(optimizer = "adadelta", loss = "binary_crossentropy")
from keras import backend

get_repr = backend.function([query], out_e)

In [37]:
repr = get_repr([X_train[0].reshape(1600,1)])

In [38]:
repr.shape

(1600, 1, 300)

In [39]:
X_train[0]

array([  0,   0,   0, ...,  19, 178,  32], dtype=int32)

In [40]:
repr[0]

array([[ 0.04689165,  0.00164434, -0.0414691 , -0.01268752,  0.01458974,
         0.0194481 , -0.01376501, -0.00874614,  0.0016255 ,  0.01300043,
         0.00913168, -0.03762851,  0.04514131,  0.00753866,  0.0113394 ,
         0.01148813, -0.04490088, -0.0014638 , -0.04716635,  0.03980613,
        -0.00968654, -0.03714398, -0.00895769, -0.00449478, -0.00194495,
        -0.01090954, -0.03410237,  0.0360073 , -0.02807523,  0.04654723,
        -0.04427888, -0.03530645, -0.04637638,  0.04398745,  0.04276075,
        -0.01069003, -0.00922956,  0.0437774 , -0.01333702, -0.04761531,
         0.04720573,  0.00678061, -0.02969839,  0.02867307, -0.03363846,
        -0.04719247,  0.03243979, -0.0342715 ,  0.03748943, -0.00708717,
        -0.00425385, -0.00998966, -0.03635811,  0.0221416 , -0.00365049,
        -0.02227639, -0.03176845,  0.01458277, -0.0209214 , -0.04383406,
        -0.01147091,  0.03769008,  0.03926635,  0.02770606,  0.04895506,
        -0.01544254,  0.00985148,  0.02622911, -0.0

In [41]:
repr[1]

array([[ 0.04689165,  0.00164434, -0.0414691 , -0.01268752,  0.01458974,
         0.0194481 , -0.01376501, -0.00874614,  0.0016255 ,  0.01300043,
         0.00913168, -0.03762851,  0.04514131,  0.00753866,  0.0113394 ,
         0.01148813, -0.04490088, -0.0014638 , -0.04716635,  0.03980613,
        -0.00968654, -0.03714398, -0.00895769, -0.00449478, -0.00194495,
        -0.01090954, -0.03410237,  0.0360073 , -0.02807523,  0.04654723,
        -0.04427888, -0.03530645, -0.04637638,  0.04398745,  0.04276075,
        -0.01069003, -0.00922956,  0.0437774 , -0.01333702, -0.04761531,
         0.04720573,  0.00678061, -0.02969839,  0.02867307, -0.03363846,
        -0.04719247,  0.03243979, -0.0342715 ,  0.03748943, -0.00708717,
        -0.00425385, -0.00998966, -0.03635811,  0.0221416 , -0.00365049,
        -0.02227639, -0.03176845,  0.01458277, -0.0209214 , -0.04383406,
        -0.01147091,  0.03769008,  0.03926635,  0.02770606,  0.04895506,
        -0.01544254,  0.00985148,  0.02622911, -0.0

In [42]:
repr[0] == repr[1]

array([[ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
      