In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import keras
from sklearn.feature_extraction.text import CountVectorizer
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.wrappers.scikit_learn import KerasClassifier
import re

from sklearn.preprocessing import LabelEncoder

data = pd.read_csv('Sentiment.csv')
# Keeping only the neccessary columns
data = data[['text','sentiment']]
data = data[data.sentiment != 'Neutral']

data = data[data.sentiment != "Neutral"]
data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))

print(data[data['sentiment'] == 'Positive'].size)
print(data[data['sentiment'] == 'Negative'].size)

for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ')

max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)
print(X)
X = pad_sequences(X)
print(X)
embed_dim = 128
lstm_out = 196

tbCallBack= keras.callbacks.TensorBoard(log_dir='./Graph', write_graph=True, write_images=True)


def createmodel():
    model = Sequential()
    model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
    model.add(SpatialDropout1D(0.4))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(2,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
    return model
# print(model.summary())

labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)

model = createmodel()
model.fit(X_train, Y_train, epochs = 4, batch_size=40, verbose = 2, callbacks=[tbCallBack])
twt = ['A lot of good things are happening. We are respected again throughout the world, and that\'s a great thing']

score,acc = model.evaluate(X_test,Y_test,verbose=2,batch_size=40)
print(score)
print(acc)

#save to disk
model1_json = model.to_json()
with open('model1.json', 'w') as json_file:
    json_file.write(model1_json)
model.save_weights('model1.h5')

4472
16986
[[363, 122, 1, 703, 2, 39, 58, 237, 37, 210, 6, 174, 1761, 12, 1324, 1409, 743], [16, 284, 252, 5, 821, 102, 167, 26, 136, 6, 1, 173, 12, 2, 233, 724, 17], [1261, 2, 303, 23, 1943, 1, 1632, 216, 12, 1, 704, 6, 185, 207, 371, 670], [127, 17, 53, 263, 410, 9, 82, 304, 441, 1325, 1762, 1150, 62, 1944, 194, 2, 51], [9, 1151, 167, 8, 21, 1326, 63, 9, 614, 188, 21, 189, 4, 34, 1, 563, 19, 822, 2, 44, 744], [12, 1, 168, 96, 547, 34, 1, 103, 589, 52, 59, 1763, 7, 17, 615, 2], [39, 149, 26, 823, 13, 2, 926], [23, 12, 46, 16], [36, 1945, 47, 4, 860, 20, 69, 2, 174, 548, 6, 1, 58, 275, 335], [31, 1208, 53, 453, 22, 54, 146, 1, 2, 21], [671, 9, 528, 305, 169, 55, 12, 1327, 182, 13, 47, 2], [306, 372, 1, 6, 893, 14, 92, 466, 270, 5, 271, 2], [266, 467, 966, 14, 453, 22, 107, 12, 17, 2, 475], [616, 52, 186, 132, 175, 109, 29, 1, 2, 51, 277, 45, 6, 109], [770, 894, 5, 582, 12, 1328, 140, 1012, 5, 1, 42, 154, 37, 1, 770, 1, 603, 434, 795, 14, 1, 49, 2], [1410, 118, 1329, 14, 1633, 14, 1764,

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/4
 - 18s - loss: 0.4494 - accuracy: 0.8077
Epoch 2/4
 - 17s - loss: 0.3262 - accuracy: 0.8620
Epoch 3/4
 - 17s - loss: 0.2834 - accuracy: 0.8805
Epoch 4/4
 - 17s - loss: 0.2542 - accuracy: 0.8941
0.3628239914610776
0.8424174189567566
