In [8]:
# training_and_saving.py

!pip install tensorflow
import pandas as pd
import re
from tensorflow.keras.preprocessing.text import Tokenizer # Use tensorflow.keras
from tensorflow.keras.preprocessing.sequence import pad_sequences # Use tensorflow.keras
from tensorflow.keras.models import Sequential # Use tensorflow.keras
from tensorflow.keras.layers import Dense, Embedding, LSTM # Use tensorflow.keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical # Use tensorflow.keras
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pickle

# Load and clean data
data = pd.read_csv('/content/Data.csv')
data = data[['text', 'sentiment']]
data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply(lambda x: re.sub('[^a-zA-z0-9\s]', '', x))
data['text'] = data['text'].apply(lambda x: x.replace('rt', ' '))

# Tokenization and padding
max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)
X = pad_sequences(X)

# Encode labels
labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)

# Train-test split
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Build LSTM model
embed_dim = 128
lstm_out = 196

def createmodel():
    model = Sequential()
    model.add(Embedding(max_fatures, embed_dim, input_length=X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Train model
batch_size = 32
model = createmodel()
model.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=2)

# Evaluate
score, acc = model.evaluate(X_test, Y_test, verbose=2, batch_size=batch_size)
print("Test loss:", score)
print("Test accuracy:", acc)

# Save model and tokenizer
model.save("sentiment_model.h5")
with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(labelencoder, f)






291/291 - 45s - 156ms/step - accuracy: 0.6423 - loss: 0.8307
144/144 - 4s - 30ms/step - accuracy: 0.6468 - loss: 0.8559




Test loss: 0.8558618426322937
Test accuracy: 0.646789014339447


In [9]:
# predict_new_text.py

from keras.models import load_model
import pickle
from keras.preprocessing.sequence import pad_sequences
import numpy as np
import re

# Load saved model, tokenizer, label encoder
model = load_model("sentiment_model.h5")
with open("tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)
with open("label_encoder.pkl", "rb") as f:
    labelencoder = pickle.load(f)

# Function to clean new input text
def clean_text(text):
    text = text.lower()
    text = re.sub('[^a-zA-Z0-9\s]', '', text)
    text = text.replace('rt', ' ')
    return text

# Predict on new input
new_text = "A lot of good things are happening. We are respected again throughout the world, and that's a great thing .@realDonaldTrump"
cleaned_text = clean_text(new_text)
seq = tokenizer.texts_to_sequences([cleaned_text])
padded = pad_sequences(seq, maxlen=model.input_shape[1])
pred = model.predict(padded)
predicted_class = np.argmax(pred, axis=1)
label = labelencoder.inverse_transform(predicted_class)

print(f"Predicted sentiment: {label[0]}")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 469ms/step
Predicted sentiment: Negative


In [30]:
!pip install scikeras




In [35]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.optimizers import Adam

# Load and clean dataset
data = pd.read_csv('/content/Data.csv')  # Corrected path to your data file
data = data[['text', 'sentiment']]
data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply(lambda x: re.sub('[^a-zA-z0-9\s]', '', x))
data['text'] = data['text'].apply(lambda x: x.replace('rt', ' '))

# Tokenize and pad sequences
max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)
X = pad_sequences(X)

# Encode labels
labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)

# Split data
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Model creation function for Keras
def create_model(embed_dim=128, lstm_out=196):
    model = Sequential()
    model.add(Embedding(max_features, embed_dim, input_length=X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
    return model

# Hyperparameters for tuning
embed_dim = 128
lstm_out = 196
batch_size = 32
epochs = 2

# Create model instance
model = create_model(embed_dim=embed_dim, lstm_out=lstm_out)

# Train the model
model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, verbose=1)

# Evaluate the model
score, acc = model.evaluate(X_test, Y_test, batch_size=batch_size)
print(f"Test score: {score}, Test accuracy: {acc}")

# Use the trained model to predict new data
sample_text = "A lot of good things are happening. We are respected again throughout the world, and that's a great thing .@realDonaldTrump"
sample_text = re.sub('[^a-zA-z0-9\s]', '', sample_text.lower())  # Clean text
sample_seq = tokenizer.texts_to_sequences([sample_text])
sample_pad = pad_sequences(sample_seq, maxlen=X.shape[1])

# Make prediction
prediction = model.predict(sample_pad)
predicted_class = labelencoder.inverse_transform([prediction.argmax()])
print(f"Predicted sentiment: {predicted_class[0]}")


Epoch 1/2




[1m291/291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 144ms/step - accuracy: 0.6074 - loss: 0.9004
Epoch 2/2
[1m291/291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 149ms/step - accuracy: 0.7040 - loss: 0.6955
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - accuracy: 0.6773 - loss: 0.7521
Test score: 0.7534351944923401, Test accuracy: 0.6734381914138794
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 438ms/step
Predicted sentiment: Positive
