In [4]:
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import re

from sklearn.preprocessing import LabelEncoder

data = pd.read_csv("Sentiment.csv")
# Keeping only the neccessary columns
data = data[['text','sentiment']]

data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))

for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ')

max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)

X = pad_sequences(X)

embed_dim = 128
lstm_out = 196
def createmodel():
    model = Sequential()
    model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
    return model
# print(model.summary())

labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)

batch_size = 32
model = createmodel()
model.fit(X_train, Y_train, epochs = 1, batch_size=batch_size, verbose = 2)
score,acc = model.evaluate(X_test,Y_test,verbose=2,batch_size=batch_size)
print(score)
print(acc)
print(model.metrics_names)

291/291 - 27s - loss: 0.8222 - accuracy: 0.6482 - 27s/epoch - 93ms/step
144/144 - 2s - loss: 0.7488 - accuracy: 0.6739 - 2s/epoch - 13ms/step
0.7488127946853638
0.6738750338554382
['loss', 'accuracy']


In [5]:
model.save('sentiment_model.h5')

  saving_api.save_model(


In [6]:
from keras.models import load_model
import numpy as np

loaded_model = load_model('sentiment_model.h5')

new_text = ["A lot of good things are happening. We are respected again throughout the world, and that's a great thing.@realDonaldTrump"]
new_text = tokenizer.texts_to_sequences(new_text)
new_text = pad_sequences(new_text, maxlen=X.shape[1], dtype='int32', value=0)
sentiment_prob = loaded_model.predict(new_text, batch_size=1, verbose=2)[0]

sentiment_classes = ['Negative', 'Neutral', 'Positive']
sentiment_pred = sentiment_classes[np.argmax(sentiment_prob)]

print("Predicted sentiment: ", sentiment_pred)
print("Predicted probabilities: ", sentiment_prob)



1/1 - 0s - 189ms/epoch - 189ms/step
Predicted sentiment:  Positive
Predicted probabilities:  [0.40098596 0.16382574 0.4351882 ]


In [7]:
pip install scikeras

Collecting scikeras
  Downloading scikeras-0.12.0-py3-none-any.whl (27 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.12.0


In [None]:
from scikeras.wrappers import KerasClassifier
#from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.optimizers import Adam

def create_model(units=196, dropout=0.2, learning_rate=0.001):
    model = Sequential()
    model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
    model.add(LSTM(units, dropout=dropout, recurrent_dropout=dropout))
    model.add(Dense(3, activation='softmax'))
    optimizer = Adam(lr=learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

model = KerasClassifier(build_fn=createmodel,verbose=2) #initiating model to test performance by applying multiple hyper parameters
batch_size= [10, 20, 40] #hyper parameter batch_size
epochs = [1, 2] #hyper parameter no. of epochs
param_grid= {'batch_size':batch_size, 'epochs':epochs} #creating dictionary for batch size, no. of epochs
grid  = GridSearchCV(estimator=model, param_grid=param_grid) #Applying dictionary with hyper parameters
grid_result= grid.fit(X_train,Y_train) #Fitting the model
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) #best score, best hyper parameters

  X, y = self._initialize(X, y)


744/744 - 55s - loss: 0.8164 - accuracy: 0.6512 - 55s/epoch - 73ms/step
186/186 - 1s - 1s/epoch - 8ms/step


  X, y = self._initialize(X, y)


744/744 - 55s - loss: 0.8204 - accuracy: 0.6480 - 55s/epoch - 74ms/step
186/186 - 1s - 1s/epoch - 8ms/step


  X, y = self._initialize(X, y)


744/744 - 55s - loss: 0.8263 - accuracy: 0.6442 - 55s/epoch - 74ms/step
186/186 - 1s - 1s/epoch - 8ms/step


  X, y = self._initialize(X, y)


744/744 - 62s - loss: 0.8241 - accuracy: 0.6461 - 62s/epoch - 83ms/step
186/186 - 1s - 1s/epoch - 8ms/step


  X, y = self._initialize(X, y)


744/744 - 57s - loss: 0.8163 - accuracy: 0.6456 - 57s/epoch - 77ms/step
186/186 - 1s - 1s/epoch - 8ms/step
Epoch 1/2


  X, y = self._initialize(X, y)


744/744 - 53s - loss: 0.8256 - accuracy: 0.6431 - 53s/epoch - 72ms/step
Epoch 2/2
744/744 - 52s - loss: 0.6808 - accuracy: 0.7144 - 52s/epoch - 70ms/step
186/186 - 1s - 1s/epoch - 7ms/step
Epoch 1/2


  X, y = self._initialize(X, y)


744/744 - 53s - loss: 0.8271 - accuracy: 0.6462 - 53s/epoch - 72ms/step
Epoch 2/2
744/744 - 50s - loss: 0.6841 - accuracy: 0.7111 - 50s/epoch - 68ms/step
186/186 - 1s - 1s/epoch - 8ms/step
Epoch 1/2


  X, y = self._initialize(X, y)


744/744 - 57s - loss: 0.8274 - accuracy: 0.6478 - 57s/epoch - 76ms/step
Epoch 2/2
744/744 - 53s - loss: 0.6752 - accuracy: 0.7147 - 53s/epoch - 72ms/step
186/186 - 2s - 2s/epoch - 9ms/step


  X, y = self._initialize(X, y)


Epoch 1/2
744/744 - 54s - loss: 0.8246 - accuracy: 0.6471 - 54s/epoch - 73ms/step
Epoch 2/2
