In [28]:
import pandas as pd
import re
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

**Loading Dataset**

In [6]:
train_df = pd.read_csv('twitter_training.csv')

In [3]:
val_df = pd.read_csv('twitter_validation.csv')

In [13]:
val_df.columns = ['id', 'source', 'review', 'text']
train_df.columns = ['id', 'source', 'review', 'text']

In [12]:
val_df.head()

Unnamed: 0,id,source,review,text
0,352,Amazon,Neutral,BBC News - Amazon boss Jeff Bezos rejects clai...
1,8312,Microsoft,Negative,@Microsoft Why do I pay for WORD when it funct...
2,4371,CS-GO,Negative,"CSGO matchmaking is so full of closet hacking,..."
3,4433,Google,Neutral,Now the President is slapping Americans in the...
4,6273,FIFA,Negative,Hi @EAHelp I’ve had Madeleine McCann in my cel...


In [14]:
train_df.head()

Unnamed: 0,id,source,review,text
0,2401,Borderlands,Positive,I am coming to the borders and I will kill you...
1,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...
2,2401,Borderlands,Positive,im coming on borderlands and i will murder you...
3,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...
4,2401,Borderlands,Positive,im getting into borderlands and i can murder y...


**Pre-processing Dataset**

In [16]:
X_train = train_df.text
Y_train = train_df.review.map({'Negative':0, 'Neutral':1, 'Positive':2, 'Irrelevant':3})

In [18]:
X_test = val_df.text
Y_test = val_df.review.map({'Negative':0, 'Neutral':1, 'Positive':2, 'Irrelevant':3})

In [32]:
def clean_text(text):
  text = str(text)
  text = text.lower()
  text = re.sub(r'[^\w\s]', '', text)
  text = re.sub(r'\s+', ' ', text).strip()
  return text

In [35]:
X_train = X_train.apply(clean_text)
X_test = X_test.apply(clean_text)

In [36]:
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(['text'])

In [37]:
X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)

In [38]:
X_train = pad_sequences(X_train, maxlen = 50, padding = 'post')
X_test = pad_sequences(X_test, maxlen = 50, padding = 'post')

**Creating Model**

In [40]:
model = Sequential()
model.add(Embedding(1000,128, input_length = 50))
model.add(LSTM(64, return_sequences= False))
#model.add(LSTM(32, return_sequences= False))
#model.add(LSTM(16, return_sequences= False))
model.add(Dense(4, activation = 'softmax'))

In [47]:
model.compile(optimizer="adam", loss = "sparse_categorical_crossentropy", metrics=["accuracy"])

In [48]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 50, 128)           128000    
                                                                 
 lstm (LSTM)                 (None, 64)                49408     
                                                                 
 dense (Dense)               (None, 4)                 260       
                                                                 
Total params: 177668 (694.02 KB)
Trainable params: 177668 (694.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_test = np.array(X_test)
Y_test = np.array(Y_test)

**Training**

In [58]:
model.fit(X_train, Y_train, epochs = 5, batch_size = 32, validation_data=(X_test, Y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7e6b4447c7f0>

**Evaluation**

In [59]:
test_loss, test_accuracy = model.evaluate(X_test,Y_test)



In [60]:
train_loss, train_accuracy = model.evaluate(X_train,Y_train)



In [61]:
print(f'tarin_accuracy:{train_accuracy}  test_accuracy:{test_accuracy}')

tarin_accuracy:0.3018438518047333  test_accuracy:0.26626625657081604
