In [None]:
!pip install tensorflow




In [None]:
# Import necessary libraries
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from google.colab import drive
drive.mount('/content/drive')

# Load the dataset
dataset_path = "/content/drive/My Drive/twi.csv"  # Path in Google Colab

data = pd.read_csv(dataset_path)

# Separate features (text) and labels (sentiment)
texts = data['text'].values
labels = data['sentiment'].values

# Tokenize and pad sequences
vocab_size = 10000  # Vocabulary size for the tokenizer
max_length = 50  # Maximum length of each sequence
oov_token = "<OOV>"  # Out of vocabulary token

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(texts)

# Convert texts to sequences and pad them
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# Define the model
model = Sequential([
    Embedding(vocab_size, 16, input_length=max_length),
    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.5),
    Bidirectional(LSTM(32)),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
epochs = 10  # Number of epochs
batch_size = 32  # Batch size

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=epochs,
    batch_size=batch_size,
    verbose=1
)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Generate a classification report
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print(classification_report(y_test, y_pred))

# Save the model
model.save("twitter_sentiment_model.h5")




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




Epoch 1/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 330ms/step - accuracy: 0.7679 - loss: 0.6788 - val_accuracy: 0.9070 - val_loss: 0.5843
Epoch 2/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 92ms/step - accuracy: 0.8520 - loss: 0.5606 - val_accuracy: 0.9070 - val_loss: 0.3369
Epoch 3/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 91ms/step - accuracy: 0.8642 - loss: 0.4260 - val_accuracy: 0.9070 - val_loss: 0.3061
Epoch 4/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 166ms/step - accuracy: 0.8380 - loss: 0.4727 - val_accuracy: 0.9070 - val_loss: 0.3128
Epoch 5/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 178ms/step - accuracy: 0.8657 - loss: 0.4083 - val_accuracy: 0.9070 - val_loss: 0.3217
Epoch 6/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 175ms/step - accuracy: 0.8720 - loss: 0.3653 - val_accuracy: 0.9070 - val_loss: 0.2850
Epoch 7/10
[1m6/6[0m [32m━━━━━━━━━━━━━



              precision    recall  f1-score   support

           0       1.00      0.97      0.99        39
           1       0.80      1.00      0.89         4

    accuracy                           0.98        43
   macro avg       0.90      0.99      0.94        43
weighted avg       0.98      0.98      0.98        43



In [None]:
# Test with custom text
custom_text = ["fuck you"]
custom_seq = tokenizer.texts_to_sequences(custom_text)
custom_padded = pad_sequences(custom_seq, maxlen=max_length, padding='post')
custom_prediction = model.predict(custom_padded)

if custom_prediction[0][0] > 0.5:
    print("Sentiment: Happy")
else:
    print("Sentiment: Sad")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
Sentiment: Happy
