In [1]:
import pandas as pd
import numpy as np
import re
import string
import nltk
import seaborn as sns
import matplotlib.pyplot as plt

import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

In [2]:
# Download necessary NLTK data
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [3]:
df = pd.read_csv('/content/tweets.csv')

In [4]:
df.head()

Unnamed: 0,id,label,tweet
0,1,0,#fingerprint #Pregnancy Test https://goo.gl/h1...
1,2,0,Finally a transparant silicon case ^^ Thanks t...
2,3,0,We love this! Would you go? #talk #makememorie...
3,4,0,I'm wired I know I'm George I was made that wa...
4,5,1,What amazing service! Apple won't even talk to...


In [5]:
df = df[['label','tweet']]

In [6]:
df = df.rename(columns={ 'label': 'sentiment'})
df.head()

Unnamed: 0,sentiment,tweet
0,0,#fingerprint #Pregnancy Test https://goo.gl/h1...
1,0,Finally a transparant silicon case ^^ Thanks t...
2,0,We love this! Would you go? #talk #makememorie...
3,0,I'm wired I know I'm George I was made that wa...
4,1,What amazing service! Apple won't even talk to...


In [7]:
df.isnull().sum()

Unnamed: 0,0
sentiment,0
tweet,0


#Preprocessing

In [8]:
# Initialize tools
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

In [9]:
# Text cleaning and preprocessing function
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'http\S+|www.\S+', '', text)
    text = re.sub(r'@\w+|#\w+|\d+', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    tokens = text.split()
    tokens = [word for word in tokens if word not in stop_words]
    tokens = [stemmer.stem(lemmatizer.lemmatize(word)) for word in tokens]
    return ' '.join(tokens)

df['clean_tweet'] = df['tweet'].apply(preprocess_text)

In [10]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(df['clean_tweet'], df['sentiment'], test_size=0.2, random_state=42)


In [11]:
# Tokenize text
max_words = 10000
max_len = 300
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

In [12]:
X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)

In [13]:
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)

In [14]:
X_train[0]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,   

#Model

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Embedding,SimpleRNN

In [16]:
model = Sequential()
model.add(Embedding(input_dim=10000,output_dim=100,input_shape=(300,)))
model.add(SimpleRNN(128,dropout=0.2))
model.add(Dense(1,activation='sigmoid'))

  super().__init__(**kwargs)


In [17]:
model.summary()

#Compile

In [18]:
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [19]:
model.fit(X_train,y_train,batch_size=256,epochs = 10)

Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 544ms/step - accuracy: 0.7189 - loss: 0.5401
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 669ms/step - accuracy: 0.8568 - loss: 0.3533
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 551ms/step - accuracy: 0.9116 - loss: 0.2415
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 544ms/step - accuracy: 0.9469 - loss: 0.1560
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 554ms/step - accuracy: 0.9513 - loss: 0.1379
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 554ms/step - accuracy: 0.6483 - loss: 0.8905
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 560ms/step - accuracy: 0.9146 - loss: 0.2160
Epoch 8/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 559ms/step - accuracy: 0.9406 - loss: 0.1675
Epoch 9/10
[1m25/25[0m [32m━━

<keras.src.callbacks.history.History at 0x7d279ffa4710>

#Evaluate Model

In [20]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)

print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - accuracy: 0.8881 - loss: 0.2868
Test Loss: 0.3078
Test Accuracy: 0.8699


#LSTM- Model 1

In [21]:
from tensorflow.keras.layers import LSTM

In [22]:
model1 = Sequential()
model1.add(Embedding(input_dim = 10000,output_dim=100,input_shape=(300,)))
model1.add(LSTM(50,return_sequences=True))
model1.add(LSTM(128,dropout = 0.2))
model1.add(Dense(1,activation='sigmoid'))

In [23]:
model1.summary()

In [24]:
#Compile
model1.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [25]:
model1.fit(X_train,y_train,batch_size=256,epochs = 10)

Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 2s/step - accuracy: 0.7149 - loss: 0.5646
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 2s/step - accuracy: 0.8120 - loss: 0.4016
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 2s/step - accuracy: 0.8807 - loss: 0.2989
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 2s/step - accuracy: 0.9125 - loss: 0.2255
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 2s/step - accuracy: 0.9334 - loss: 0.1778
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 2s/step - accuracy: 0.9495 - loss: 0.1443
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 2s/step - accuracy: 0.9611 - loss: 0.1237
Epoch 8/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 2s/step - accuracy: 0.9698 - loss: 0.1058
Epoch 9/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x7d278c0ce490>

In [26]:
#Evaluate
loss, accuracy = model1.evaluate(X_test, y_test, verbose=1)

print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 165ms/step - accuracy: 0.8532 - loss: 0.5362
Test Loss: 0.5798
Test Accuracy: 0.8453


#GRU - Model2

In [27]:
from tensorflow.keras.layers import GRU

In [29]:
model2  =Sequential()
model2.add(Embedding(input_dim = 10000,output_dim = 100,input_shape=(300,)))
model2.add(GRU(128,dropout=0.2))
model2.add(Dense(1,activation='sigmoid'))

In [30]:
model2.summary()

In [31]:
#Compile
model2.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [32]:
model2.fit(X_train,y_train,batch_size=256,epochs = 10)

Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 2s/step - accuracy: 0.7294 - loss: 0.5669
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 2s/step - accuracy: 0.8581 - loss: 0.3642
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 2s/step - accuracy: 0.8995 - loss: 0.2731
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 2s/step - accuracy: 0.9207 - loss: 0.2056
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 2s/step - accuracy: 0.9461 - loss: 0.1539
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 2s/step - accuracy: 0.9587 - loss: 0.1260
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 2s/step - accuracy: 0.9646 - loss: 0.1074
Epoch 8/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 2s/step - accuracy: 0.9727 - loss: 0.0855
Epoch 9/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x7d2792f4a350>

In [None]:
#Evaluate
loss, accuracy = model2.evaluate(X_test, y_test, verbose=1)

print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

#Conv 1D - Model 3

In [33]:
from tensorflow.keras.layers import Conv1D,Dropout,GlobalMaxPooling1D

In [35]:
model3 = Sequential()
model3.add(Embedding(input_dim=10000,output_dim=100,input_shape=(300,)))
model3.add(Conv1D(64,kernel_size=3,activation ='relu'))
model3.add(Dropout(0.2))

model3.add(Conv1D(62,kernel_size=3,activation='relu'))
model3.add(Dropout(0.2))

model3.add(GlobalMaxPooling1D())

model3.add(Dense(128,activation='relu'))
model3.add(Dense(1,activation='relu'))

In [37]:
model3.summary()

In [38]:
#Compile
model3.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [39]:
model3.fit(X_train,y_train,batch_size=256,epochs = 10)

Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 473ms/step - accuracy: 0.7493 - loss: 0.6117
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 447ms/step - accuracy: 0.8292 - loss: 0.4038
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 468ms/step - accuracy: 0.8864 - loss: 0.2839
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 471ms/step - accuracy: 0.9121 - loss: 0.2398
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 437ms/step - accuracy: 0.9334 - loss: 0.2116
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 470ms/step - accuracy: 0.9434 - loss: 0.1675
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 475ms/step - accuracy: 0.9548 - loss: 0.1476
Epoch 8/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 472ms/step - accuracy: 0.9758 - loss: 0.1144
Epoch 9/10
[1m25/25[0m [32m━━

<keras.src.callbacks.history.History at 0x7d278d094290>

In [40]:
#Evaluate
loss, accuracy = model3.evaluate(X_test, y_test, verbose=1)

print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.8248 - loss: 0.7593
Test Loss: 0.7340
Test Accuracy: 0.8251
