In [208]:

import pandas as pd
import numpy as np
import re
import string
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SpatialDropout1D, LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [209]:
df=pd.read_csv('/content/tweet_sentiment_dataset.csv')

In [210]:
df

Unnamed: 0,Candidate,Text,Sentiment
0,Candidate_A,"I really admire the new policies introduced, t...",Positive
1,Candidate_B,This speech was full of empty promises and no ...,Negative
2,Candidate_A,"Great leadership during tough economic times, ...",Positive
3,Candidate_B,I am disappointed with the lack of action on c...,Negative
4,Candidate_A,The healthcare reform plan sounds practical an...,Positive
5,Candidate_B,"Another debate and still no clear answers, fru...",Negative
6,Candidate_A,"Strong performance in the interview, answered ...",Positive
7,Candidate_B,I don’t trust these campaign promises at all.,Negative
8,Candidate_A,Impressive vision for education and youth deve...,Positive
9,Candidate_B,The policies discussed today seem unrealistic ...,Negative


In [211]:
df = df[['Candidate','Sentiment', 'Text']]

df.head()

Unnamed: 0,Candidate,Sentiment,Text
0,Candidate_A,Positive,"I really admire the new policies introduced, t..."
1,Candidate_B,Negative,This speech was full of empty promises and no ...
2,Candidate_A,Positive,"Great leadership during tough economic times, ..."
3,Candidate_B,Negative,I am disappointed with the lack of action on c...
4,Candidate_A,Positive,The healthcare reform plan sounds practical an...


In [212]:

def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+", "", text)
    text = re.sub(r"@\w+|#\w+", "", text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = " ".join([word for word in text.split() if word not in stop_words])
    return text

df['clean_text'] = df['Text'].astype(str).apply(clean_text)
df.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['clean_text'] = df['Text'].astype(str).apply(clean_text)


Unnamed: 0,Candidate,Sentiment,Text,clean_text
0,Candidate_A,Positive,"I really admire the new policies introduced, t...",really admire new policies introduced give hop...
1,Candidate_B,Negative,This speech was full of empty promises and no ...,speech full empty promises real solutions
2,Candidate_A,Positive,"Great leadership during tough economic times, ...",great leadership tough economic times reassuring
3,Candidate_B,Negative,I am disappointed with the lack of action on c...,disappointed lack action climate change
4,Candidate_A,Positive,The healthcare reform plan sounds practical an...,healthcare reform plan sounds practical people...


In [213]:

MAX_WORDS = 5000
MAX_LEN = 50

tokenizer = Tokenizer(num_words=MAX_WORDS, oov_token="<OOV>")
tokenizer.fit_on_texts(df['clean_text'])

sequences = tokenizer.texts_to_sequences(df['clean_text'])
X = pad_sequences(sequences, maxlen=MAX_LEN)

X.shape


(20, 50)

In [214]:

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['Sentiment'])
y = to_categorical(y)

y.shape


(20, 2)

In [215]:

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)


In [216]:
model = Sequential([
    Embedding(MAX_WORDS, 128, input_length=MAX_LEN),
    SpatialDropout1D(0.2),
    LSTM(100, dropout=0.2, recurrent_dropout=0.2),
    Dense(y_train.shape[1], activation='softmax') # Changed to y_train.shape[1] (2 classes)
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])





In [222]:
history = model.fit(
    X_train, y_train,
    epochs=5,
    batch_size=32,
    validation_data=(X_val, y_val),
    verbose=1
)

Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step - accuracy: 1.0000 - loss: 0.5014 - val_accuracy: 0.2500 - val_loss: 0.7276
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step - accuracy: 1.0000 - loss: 0.4589 - val_accuracy: 0.2500 - val_loss: 0.7098
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step - accuracy: 1.0000 - loss: 0.4439 - val_accuracy: 0.2500 - val_loss: 0.6907
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step - accuracy: 1.0000 - loss: 0.4115 - val_accuracy: 0.7500 - val_loss: 0.6743
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step - accuracy: 1.0000 - loss: 0.3849 - val_accuracy: 1.0000 - val_loss: 0.6512


In [223]:
loss, accuracy = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation Accuracy: {accuracy * 100:.4f}%")

Validation Accuracy: 100.0000%
