In [None]:
# https://machinelearningmastery.com/sequence-classification-lstm-recurrent-neural-networks-python-keras/
import json
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [17]:
# load dataset
filename = 'text_and_label_all/json_vectorized_categorical.json'

X = []
y = []
with open(filename) as json_file:
    data = json.load(json_file)
    
    for key, value in data.items():
        X.append(value["vectorized"])
        y.append(value["label"])

X = np.array(X)
y = np.array(y)

# X = X[:,np.newaxis]
# y = y[:,np.newaxis]
        
y = to_categorical(y, num_classes=5)
# y = np.expand_dims(y, axis=2)
        
print(X[0])
print(y[0])

[12, 0, 0, 9, 0, 6, 15, 0, 0, 0, 8, 18, 20, 4, 0, 0, 0, 19, 0, 0, 17, 0, 1, 24, 7, 0, 23, 16, 0, 13, 14, 21, 22, 5, 5005, 5002, 5004, 5011, 5003, 5010, 5007, 5009, 5008, 5006, 5000, 5001]
[0. 0. 1. 0. 0.]


In [27]:
top_words = 5057
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(11447,) (11447, 5)
(2862,) (2862, 5)


In [28]:
# truncate and pad input sequences
max_review_length = 512
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)

In [29]:
# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(LSTM(100))
model.add(Dense(5, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_8 (Embedding)      (None, 512, 32)           163200    
_________________________________________________________________
lstm_7 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_7 (Dense)              (None, 5)                 505       
Total params: 216,905
Trainable params: 216,905
Non-trainable params: 0
_________________________________________________________________
None


In [11]:
model.fit(X_train, y_train, epochs=6, batch_size=16)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Accuracy: 31.97%


In [32]:
# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(LSTM(100))
model.add(Dense(5, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_10 (Embedding)     (None, 512, 32)           163200    
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 512, 32)           3104      
_________________________________________________________________
max_pooling1d_5 (MaxPooling1 (None, 256, 32)           0         
_________________________________________________________________
lstm_9 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_9 (Dense)              (None, 5)                 505       
Total params: 220,009
Trainable params: 220,009
Non-trainable params: 0
_________________________________________________________________
None


In [33]:
model.fit(X_train, y_train, epochs=6, batch_size=16)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Accuracy: 36.41%
