In [1]:
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
import re
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV


In [2]:
data = pd.read_csv(r'/content/Sentiment.csv')
# Keeping only the neccessary columns
data = data[['text','sentiment']]

data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))

for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ')

max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)

X = pad_sequences(X)

In [3]:
embed_dim = 128
lstm_out = 196
def createmodel():
    model = Sequential()
    model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
    return model
# print(model.summary())

labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)

In [5]:
# Hyper Tune the model

model = KerasClassifier(build_fn=createmodel,verbose=0)
batch_size = [32]
epochs = [5]
# optimizer = ['SGD','RMSprop','Adagrad','Adam']
# activation = ['softmax','relu','tanh','sigmoid']
# dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]

param_grid = dict(batch_size= batch_size,
                  epochs = epochs,
#                   optimizer = optimizer,
#                   activation = activation,
#                   dropout_rate= dropout_rate
                 )

grid = GridSearchCV(estimator= model,
                   param_grid= param_grid)
grid_result = grid.fit(X_train, Y_train)

  model = KerasClassifier(build_fn=createmodel,verbose=0)


In [6]:
print("Best params : %f using %s"%(grid_result.best_score_,grid_result.best_params_))

Best params : 0.614769 using {'batch_size': 32, 'epochs': 5}


In [16]:
batch_size = grid_result.best_params_['batch_size']
epochs = grid_result.best_params_['epochs']
model = createmodel()
model.fit(X_train, Y_train, epochs = epochs, batch_size=batch_size, verbose = 2)

Epoch 1/5
153/153 - 28s - loss: 0.9274 - accuracy: 0.5914 - 28s/epoch - 181ms/step
Epoch 2/5
153/153 - 24s - loss: 0.7527 - accuracy: 0.6714 - 24s/epoch - 157ms/step
Epoch 3/5
153/153 - 25s - loss: 0.6208 - accuracy: 0.7368 - 25s/epoch - 164ms/step
Epoch 4/5
153/153 - 24s - loss: 0.5324 - accuracy: 0.7752 - 24s/epoch - 159ms/step
Epoch 5/5
153/153 - 25s - loss: 0.4507 - accuracy: 0.8125 - 25s/epoch - 166ms/step


<keras.callbacks.History at 0x7ec30bb845e0>

In [17]:
score,acc = model.evaluate(X_test,Y_test,verbose=2,batch_size=batch_size)
print(score)
print(acc)



76/76 - 2s - loss: 1.0519 - accuracy: 0.6137 - 2s/epoch - 20ms/step
1.0519311428070068
0.6136552691459656


In [21]:
# test example case

tweet = "A lot of good things are happening. We are respected again throughout the world, and that's a great thing.@realDonaldTrump"

tweet = tweet.replace('rt',"")

tweet_bg = tokenizer.texts_to_sequences(tweet)

tweet_padded = pad_sequences(tweet_bg)

In [24]:
tweet_sent_pred = model.predict(tweet_padded)



In [28]:
tweet_sent_pred

array([[0.41703022, 0.3064663 , 0.2765035 ],
       [0.32242325, 0.3128235 , 0.36475328],
       [0.32242325, 0.3128235 , 0.36475328],
       [0.3789304 , 0.19307324, 0.4279963 ],
       [0.12173779, 0.37894967, 0.4993125 ],
       [0.32242325, 0.3128235 , 0.36475328],
       [0.3789304 , 0.19307324, 0.4279963 ],
       [0.32242325, 0.3128235 , 0.36475328],
       [0.32242325, 0.3128235 , 0.36475328],
       [0.95334864, 0.01811399, 0.02853725],
       [0.3789304 , 0.19307324, 0.4279963 ],
       [0.3789304 , 0.19307324, 0.4279963 ],
       [0.14395496, 0.36503345, 0.49101168],
       [0.32242325, 0.3128235 , 0.36475328],
       [0.12173779, 0.37894967, 0.4993125 ],
       [0.23224777, 0.15724312, 0.610509  ],
       [0.24883147, 0.3977808 , 0.35338768],
       [0.35977954, 0.44575936, 0.19446114],
       [0.95334864, 0.01811399, 0.02853725],
       [0.35451624, 0.39537922, 0.25010446],
       [0.32242325, 0.3128235 , 0.36475328],
       [0.41703022, 0.3064663 , 0.2765035 ],
       [0.

In [29]:
model.save("model_sent.h5")