In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.callbacks import ModelCheckpoint

# Load the dataset
data = pd.read_csv('Sentiment.csv')

# Preprocessing
X = data['text'].values
y = data['sentiment'].values

# Encode labels to numerical values
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_sequences(X)
X = pad_sequences(X, maxlen=100)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build LSTM model
def create_model(optimizer='adam'):
    model = Sequential()
    model.add(Embedding(input_dim=5000, output_dim=128, input_length=100))
    model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# Hyperparameter tuning
optimizers = ['adam', 'rmsprop']
best_accuracy = 0
best_optimizer = ''

for optimizer in optimizers:
    model = create_model(optimizer)
    checkpoint = ModelCheckpoint('model.keras', monitor='val_loss', save_best_only=True)
    model.fit(X_train, y_train, validation_split=0.2, epochs=5, batch_size=64, callbacks=[checkpoint])
    accuracy = model.evaluate(X_test, y_test, verbose=0)[1]
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_optimizer = optimizer

print(f"Best accuracy: {best_accuracy} with optimizer: {best_optimizer}")

# Load the saved model and predict on new text data
saved_model = load_model('model.keras')
new_text = ["A lot of good things are happening. We are respected again throughout the world, and that's a great thing.@realDonaldTrump"]
new_text_seq = tokenizer.texts_to_sequences(new_text)
new_text_pad = pad_sequences(new_text_seq, maxlen=100)
prediction = saved_model.predict(new_text_pad)
print(f"Prediction: {prediction[0][0]}")


Epoch 1/5




[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 296ms/step - accuracy: 0.4705 - loss: 0.5841 - val_accuracy: 0.5351 - val_loss: -0.2878
Epoch 2/5
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 317ms/step - accuracy: 0.6197 - loss: -1.2020 - val_accuracy: 0.5527 - val_loss: -0.9492
Epoch 3/5
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 298ms/step - accuracy: 0.6217 - loss: -3.0504 - val_accuracy: 0.5851 - val_loss: -1.9809
Epoch 4/5
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 295ms/step - accuracy: 0.6336 - loss: -4.0125 - val_accuracy: 0.5887 - val_loss: -2.4593
Epoch 5/5
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 293ms/step - accuracy: 0.6435 - loss: -7.3315 - val_accuracy: 0.5523 - val_loss: -2.1986
Epoch 1/5
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 292ms/step - accuracy: 0.3492 - loss: 0.6407 - val_accuracy: 0.4396 - val_loss: 0.3723
Epoch 2/5
[1m139