In [None]:
import pickle
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout

In [None]:
df = pd.read_excel("/content/twitter_training.xlsx")
texts = df['text'].values
labels = df['Label'].values

In [None]:
texts = texts.astype(str)  # This converts all elements to string type
texts = np.where(pd.isnull(texts), '', texts) # This replaces NaN values with empty strings

In [None]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

In [None]:
with open('label_encoder.pickle', 'wb') as handle:
    pickle.dump(label_encoder, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(texts, encoded_labels, test_size=0.2, random_state=42)

In [None]:
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [None]:
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
max_length = 100  # Set a maximum length for input sequences
X_train_padded = pad_sequences(X_train_seq, maxlen=max_length)
X_test_padded = pad_sequences(X_test_seq, maxlen=max_length)

In [None]:
model = Sequential()


In [None]:
model.add(Embedding(input_dim=10000, output_dim=128, input_length=max_length))



In [None]:
model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))

In [None]:
model.add(GlobalMaxPooling1D())

In [None]:
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))

In [None]:
model.add(Dense(4, activation='softmax'))  # 4 classes: Positive, Negative, Neutral, Irrelevant

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(X_train_padded, y_train, epochs=10, batch_size=32, validation_data=(X_test_padded, y_test))

Epoch 1/10
[1m1868/1868[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 61ms/step - accuracy: 0.5215 - loss: 1.0967 - val_accuracy: 0.7758 - val_loss: 0.5947
Epoch 2/10
[1m1868/1868[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 60ms/step - accuracy: 0.8468 - loss: 0.4328 - val_accuracy: 0.8466 - val_loss: 0.4142
Epoch 3/10
[1m1868/1868[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 60ms/step - accuracy: 0.9229 - loss: 0.2095 - val_accuracy: 0.8600 - val_loss: 0.4106
Epoch 4/10
[1m1868/1868[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 61ms/step - accuracy: 0.9415 - loss: 0.1503 - val_accuracy: 0.8624 - val_loss: 0.4710
Epoch 5/10
[1m1868/1868[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 59ms/step - accuracy: 0.9484 - loss: 0.1290 - val_accuracy: 0.8646 - val_loss: 0.4481
Epoch 6/10
[1m1868/1868[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 60ms/step - accuracy: 0.9520 - loss: 0.1178 - val_accuracy: 0.8680 - val_loss: 0.474

<keras.src.callbacks.history.History at 0x7e83212b82b0>

In [None]:
model.save('sentiment_model.h5')



In [None]:
test_loss, test_accuracy = model.evaluate(X_test_padded, y_test)
print(f'Test Accuracy: {test_accuracy:.2f}')

[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - accuracy: 0.8714 - loss: 0.5548
Test Accuracy: 0.87


In [None]:
def predict_sentiment(statement):
    # Preprocess the input statement
    statement = [statement]  # Convert to a list
    statement_seq = tokenizer.texts_to_sequences(statement)  # Tokenize
    statement_padded = pad_sequences(statement_seq, maxlen=max_length)  # Pad

    # Make predictions
    prediction = model.predict(statement_padded)

    # Decode the prediction
    predicted_class = np.argmax(prediction, axis=1)  # Get the index of the max probability
    sentiment_label = label_encoder.inverse_transform(predicted_class)  # Convert index to label

    return sentiment_label[0]  # Return the predicted sentiment

# Example usage to check sentiment
input_statement = input("Enter a sentence to check its sentiment: ")  # Take user input
predicted_sentiment = predict_sentiment(input_statement)
print(f'The predicted sentiment is: {predicted_sentiment}')

Enter a sentence to check its sentiment: i hate u
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
The predicted sentiment is: Negative
