In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Bidirectional, Dropout, SpatialDropout1D
from tensorflow.keras.callbacks import EarlyStopping

df = pd.read_csv('twitter_training.csv', header=None, names=['id', 'company', 'sentiment', 'text'])
df['text'] = df['text'].fillna('').str.lower().replace(r'[^a-z0-9\s]', '', regex=True)
df = df[df['text'].str.strip() != ''].reset_index(drop=True)

max_words = 20000 
max_len = 100
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['text'])
X = pad_sequences(tokenizer.texts_to_sequences(df['text']), maxlen=max_len)

le = LabelEncoder()
y_encoded = le.fit_transform(df['sentiment']) 
num_classes = len(le.classes_) 
y = tf.keras.utils.to_categorical(y_encoded, num_classes) 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential([
    Embedding(input_dim=max_words, output_dim=128, input_length=max_len),
    SpatialDropout1D(0.3),
    Bidirectional(GRU(64, return_sequences=True)),
    Bidirectional(GRU(32)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    
    Dense(num_classes, activation='softmax') 
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

print(f"Training on {num_classes} classes: {le.classes_}")
model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test), callbacks=[early_stop])

def predict_sentiment(text):
    
    seq = tokenizer.texts_to_sequences([text.lower()])
    padded = pad_sequences(seq, maxlen=max_len)
    
    prediction = model.predict(padded, verbose=0)
   
    class_idx = np.argmax(prediction)
    
    sentiment_label = le.inverse_transform([class_idx])[0]
    confidence = np.max(prediction)
    
    return f"{sentiment_label} ({confidence:.1%})"

samples = ["I love this game!", "This is the worst update ever.", "It's an okay product.", "Bananas are yellow."]
for s in samples:
    print(f"'{s}' -> {predict_sentiment(s)}")

  if not hasattr(np, "object"):


Training on 4 classes: ['Irrelevant' 'Negative' 'Neutral' 'Positive']
Epoch 1/10
[1m922/922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m276s[0m 271ms/step - accuracy: 0.6052 - loss: 0.9615 - val_accuracy: 0.7708 - val_loss: 0.6183
Epoch 2/10
[1m922/922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 217ms/step - accuracy: 0.8343 - loss: 0.4708 - val_accuracy: 0.8442 - val_loss: 0.4288
Epoch 3/10
[1m922/922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 218ms/step - accuracy: 0.8892 - loss: 0.3174 - val_accuracy: 0.8610 - val_loss: 0.4029
Epoch 4/10
[1m922/922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 216ms/step - accuracy: 0.9107 - loss: 0.2528 - val_accuracy: 0.8704 - val_loss: 0.3778
Epoch 5/10
[1m922/922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 213ms/step - accuracy: 0.9239 - loss: 0.2147 - val_accuracy: 0.8734 - val_loss: 0.3859
Epoch 6/10
[1m922/922[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m188s[0m 198ms/step - accuracy

In [4]:
samples = ["I love this game!", "This is the worst update ever.", "It's bad product ", "Bananas are yellow."]
for s in samples:
    print(f"'{s}' -> {predict_sentiment(s)}")

'I love this game!' -> Positive (94.4%)
'This is the worst update ever.' -> Negative (98.8%)
'It's bad product ' -> Negative (86.4%)
'Bananas are yellow.' -> Irrelevant (96.6%)
