In [108]:
import keras
from keras.datasets import mnist
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Dropout, Flatten, Embedding
from keras.layers import Conv1D, MaxPooling1D
from keras.layers.merge import Concatenate
from keras import backend as K
import numpy as np
import matplotlib.pyplot as plt
import pickle
import random

In [20]:
mode = '-nonstatic'
word_vectors = '-rand'

In [52]:
with open("mr.p", 'rb') as f:
    x = pickle.load(f, encoding='latin1')

# W - word2vec, W2 - rand
revs, W, W2, word_idx_map, vocab = x[0], x[1], x[2], x[3], x[4]
vocab_size = len(vocab)
print('Data successfully loaded')

if mode == "-nonstatic":
    print ("model architecture: CNN-non-static")
    non_static=True
elif mode=="-static":
    print ("model architecture: CNN-static")
    non_static=False
    
if word_vectors=="-rand":
    print ("using: random vectors")
    U = W2
elif word_vectors=="-word2vec":
    print ("using: word2vec vectors")
    U = W

Data successfully loaded
model architecture: CNN-non-static
using: random vectors


In [70]:
revs[0]['num_words']

25

In [75]:
max_sentence_len = -1

for i in range(len(revs)):
    num = revs[i]['num_words']
    max_sentence_len = max(num, max_sentence_len)
    
print('Max sentence length:', max_sentence_len)

Max sentence length: 56


In [86]:
word_idx_map[',']

9596

In [82]:
revs[100]

{'num_words': 42,
 'split': 3,
 'text': 'this latest installment of the horror film franchise that is apparently as invulnerable as its trademark villain has arrived for an incongruous summer playoff , demonstrating yet again that the era of the intelligent , well made b movie is long gone',
 'y': 0}

In [97]:
N = len(revs)
N_train = int(np.round(N * 0.1))
N_test = int(N - N_train)

X = np.zeros((N, max_sentence_len), np.uint16)
Y = np.zeros((N,1), np.uint8)

random.shuffle(revs)
for i in range(len(revs)):
    list_wrds = revs[i]['text'].split()
    for j in range(len(list_wrds)):
        X[i][j] = word_idx_map[list_wrds[j]]
    Y[i] = revs[i]['y']

x_train = X[0:N_train]
y_train = Y[0:N_train]

x_test = X[N_train:N]
y_test = Y[N_train:N]

In [124]:
batch_size = 50
filter_sizes = [3,4,5]
num_filters = 100
dropout_prob = (0.5, 0.8)
hidden_dims = 50

l2_reg = 0.3
embedding_dim = 300

model_input = Input(shape= (max_sentence_len,))

z = Embedding(vocab_size+1, embedding_dim, input_length=max_sentence_len, name="embedding")(model_input)
z = Dropout(dropout_prob[0])(z)
    
z1 = Conv1D( filters=100, kernel_size=3, padding="valid", activation="relu", strides=1)(z)
z1 = MaxPooling1D(pool_size=2)(z1)
z1 = Flatten()(z1)

z2 = Conv1D( filters=100, kernel_size=4, padding="valid", activation="relu", strides=1)(z)
z2 = MaxPooling1D(pool_size=2)(z2)
z2 = Flatten()(z2)

z3 = Conv1D( filters=100, kernel_size=5, padding="valid", activation="relu", strides=1)(z)
z3 = MaxPooling1D(pool_size=2)(z3)
z3 = Flatten()(z3)

# Concatenate the output of all convolution layers
z = Concatenate()([z1, z2, z3])
z = Dropout(dropout_prob[1])(z)

z = Dense(hidden_dims, activation="relu")(z)
model_output = Dense(1, activation="sigmoid")(z)
    
model = Model(model_input, model_output)
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [125]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_12 (InputLayer)            (None, 56)            0                                            
____________________________________________________________________________________________________
embedding (Embedding)            (None, 56, 300)       5629800                                      
____________________________________________________________________________________________________
dropout_13 (Dropout)             (None, 56, 300)       0                                            
____________________________________________________________________________________________________
conv1d_18 (Conv1D)               (None, 54, 100)       90100                                        
___________________________________________________________________________________________

In [116]:
W.shape

(18766, 300)

In [127]:
embedding_layer = model.get_layer("embedding")
embedding_layer.set_weights([W])

model.fit(x_train, y_train, 
          batch_size = batch_size, 
          epochs=10,
          validation_data=(x_test, y_test), verbose=2)

Train on 1066 samples, validate on 9596 samples
Epoch 1/10


KeyboardInterrupt: 