In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from tensorflow.keras.utils import to_categorical

# Load and preprocess data
data = pd.read_csv('twitter_training.csv', header=None)
data.columns = ['Sr No', 'Ecommerce Website', 'Sentiment', 'Review']
data = data[['Sentiment', 'Review']]

# Text preprocessing
data['Review'] = data['Review'].astype(str)  # Ensure all values are strings
data['Review'] = data['Review'].str.lower()
data['Review'] = data['Review'].str.replace(r'\brt\b', ' ', regex=True)
data['Review'] = data['Review'].apply(lambda x: ''.join([char if char.isalnum() or char.isspace() else '' for char in x]))

# Sentiment encoding
sentiment_map = {'Positive': 0, 'Negative': 1, 'Neutral': 2, 'Irrelevant': 3}
data['Sentiment'] = data['Sentiment'].map(sentiment_map)

# Tokenization and padding
max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(data['Review'].values)
X = tokenizer.texts_to_sequences(data['Review'].values)
X = pad_sequences(X)

# Convert Sentiment to categorical
Y = to_categorical(data['Sentiment'].values, num_classes=4)

# Build model
embed_dim = 128
model = Sequential()
model.add(Embedding(max_features, embed_dim, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(196, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(4, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X, Y, epochs=10, batch_size=32, verbose=2)

print("\nSentiment Prediction using LSTM")
maxlen = X.shape[1]
sentiment_classes = ['Positive', 'Negative', 'Neutral', 'Irrelevant']

while True:
    
    print("\nMenu:")
    print("1. Enter a review")
    print("2. Exit")
    ch = input("Enter your choice (1/2): ")

    if ch == '1':
        ip = input("\nEnter a review: ")
        ip = ip.lower()
        ip = ''.join([char if char.isalnum() or char.isspace() else '' for char in ip])
        
        review_seq = tokenizer.texts_to_sequences([ip])
        review_seq = pad_sequences(review_seq, maxlen=maxlen, dtype='int32', value=0)
        
        sentiment = model.predict(review_seq, batch_size=1, verbose=2)[0]
        print(f"Predicted Sentiment: {sentiment_classes[np.argmax(sentiment)]}")
   
    elif ch == '2':
        break
    else:
        print("Invalid choice")


Epoch 1/10




2334/2334 - 513s - 220ms/step - accuracy: 0.5561 - loss: 1.0521
Epoch 2/10
2334/2334 - 482s - 207ms/step - accuracy: 0.6410 - loss: 0.8809
Epoch 3/10
2334/2334 - 463s - 198ms/step - accuracy: 0.6789 - loss: 0.7978
Epoch 4/10
2334/2334 - 448s - 192ms/step - accuracy: 0.7063 - loss: 0.7361
Epoch 5/10
2334/2334 - 487s - 209ms/step - accuracy: 0.7286 - loss: 0.6791
Epoch 6/10
2334/2334 - 570s - 244ms/step - accuracy: 0.7512 - loss: 0.6261
Epoch 7/10
2334/2334 - 600s - 257ms/step - accuracy: 0.7702 - loss: 0.5805
Epoch 8/10
2334/2334 - 579s - 248ms/step - accuracy: 0.7850 - loss: 0.5427
Epoch 9/10
2334/2334 - 588s - 252ms/step - accuracy: 0.7992 - loss: 0.5094
Epoch 10/10
2334/2334 - 595s - 255ms/step - accuracy: 0.8122 - loss: 0.4771

Sentiment Prediction using LSTM

Menu:
1. Enter a review
2. Exit
Enter your choice (1/2): 1

Enter a review: Check out this epic streamer!.
1/1 - 1s - 1s/step
Predicted Sentiment: Neutral

Menu:
1. Enter a review
2. Exit
Enter your choice (1/2): 2


In [1]:
# numpy: Library for numerical operations.
# pandas: Used to handle and manipulate data in a tabular format (CSV files).
# tensorflow.keras.preprocessing.text.Tokenizer: Tokenizer to convert text data into sequences (integer representation of words).
# tensorflow.keras.preprocessing.sequence.pad_sequences: To ensure the sequences of text are of equal length by padding them.
# tensorflow.keras.models.Sequential: Defines a linear stack of layers in the neural network.
# tensorflow.keras.layers.Dense, Embedding, LSTM, SpatialDropout1D: Layers used in the LSTM-based model.
# tensorflow.keras.utils.to_categorical: Converts labels to categorical format (one-hot encoded).

    
# Loading and Preprocessing the Data:
# Loads the dataset from a CSV file (twitter_training.csv), where columns represent Sr No, Ecommerce Website, Sentiment, and Review.
# The dataset is filtered to only include Sentiment and Review columns.


# Text Preprocessing:
# Converts the Review column to string format.
# Converts all text to lowercase for uniformity.
# Removes Twitter-specific artifacts like "RT" (retweets).
# Removes all non-alphanumeric characters except spaces using a lambda function.

# Sentiment Encoding:
# Maps sentiment labels (Positive, Negative, Neutral, Irrelevant) to numeric values (0, 1, 2, 3).

# Tokenization and Padding:
# Tokenizer: Converts the text reviews into sequences of integers, where each word is mapped to a unique integer.
# num_words=max_features: Limits the vocabulary to the top 2000 most frequent words.
# pad_sequences: Pads sequences to ensure uniform length, which is crucial for feeding the data into an LSTM model.
 

# Convert Sentiment to Categorical:    
# Converts the sentiment labels (0, 1, 2, 3) into categorical (one-hot) format with 4 classes.


# Building the LSTM Model:
# Embedding Layer: Turns integer sequences into dense vector representations of fixed size (embed_dim=128).
# SpatialDropout1D: Applies dropout to the input sequence, which helps in preventing overfitting.
# LSTM Layer: Long Short-Term Memory (LSTM) network processes sequential data. It contains 196 units, with dropout for both input and recurrent connections (dropout=0.2, recurrent_dropout=0.2).
# Dense Layer: A fully connected layer that outputs the prediction. The softmax activation function is used because this is a multi-class classification problem.
# Model Compilation: Uses categorical_crossentropy as the loss function for multi-class classification and the adam optimizer.
    
    
# Training the Model:
# The model is trained on the input data (X) and target labels (Y) for 10 epochs with a batch size of 32.


# Sentiment Prediction:
# The user enters a review.
# The review is preprocessed similarly to the training data (lowercased, non-alphanumeric characters removed).
# The review is converted to a sequence of integers and padded to match the input length (maxlen).
# The model predicts the sentiment, and the sentiment class with the highest probability is selected and displayed.



# LSTM (Long Short-Term Memory) is a type of Recurrent Neural Network (RNN) specifically designed to address the issue of learning long-term dependencies. 
# RNNs have trouble retaining information over long sequences due to the vanishing gradient problem. 
# LSTMs solve this by introducing gates that control the flow of information, allowing them to "remember" important information over long sequences and "forget" irrelevant details.

# Key Components of LSTM:
# Cell State: The cell state is the "memory" of the network, which carries information 
# across time steps. The information in the cell state is updated by the gates at each time step.

# Forget Gate: Decides what information from the previous cell state should be discarded.

# Input Gate: Determines what new information should be stored in the cell state.

# Output Gate: Decides what part of the cell state should be output as the hidden state to the next time step.

# LSTM for Sentiment Analysis:
# LSTM is ideal for sentiment analysis because reviews, tweets, or any text data are sequential, meaning the order of words matters.
# LSTM networks capture the temporal dependencies in text by processing the sequence word by word, 
# making them effective for tasks like sentiment analysis, where the sentiment can depend on the context of words in a sentence.



# Question: What is the role of the Embedding layer in this model?
# Answer: The Embedding layer is used to convert the integer representation of words (obtained via tokenization) into dense vector representations (embeddings) of fixed size. 
# This helps capture semantic relationships between words, where similar words will have similar vector representations.

# 2. Question: Why do we use SpatialDropout1D in this model?
# Answer: SpatialDropout1D is used to drop entire 1D feature maps (rows of the input data) instead of individual features. This helps prevent overfitting by forcing the model to learn more robust features, improving generalization.

# 3. Question: What does pad_sequences do in the code?
# Answer: pad_sequences ensures that all input sequences (reviews) are of the same length by adding padding (usually zeros) to the sequences that are shorter than the defined length, and truncating those that are longer.

# 4. Question: What is the purpose of using to_categorical for the sentiment labels?
# Answer: to_categorical converts the sentiment labels into one-hot encoded vectors, which are required for multi-class classification. 
# Each label is converted into a vector of length 4, where the corresponding index for the sentiment class is set to 1, and the rest are 0.

# 5. Question: Why is softmax used in the output layer?
# Answer: Softmax is used in the output layer because it is a multi-class classification problem. 
# Softmax converts the output into probabilities, where the class with the highest probability is considered the predicted sentiment.

# 6. Question: How does LSTM handle long-term dependencies in text?
# Answer: LSTM networks use a memory cell and gates (forget, input, and output gates) to store, update, and output information over long sequences, allowing them to retain important information and discard irrelevant parts, thus handling long-term dependencies effectively.

SyntaxError: invalid syntax (2357671739.py, line 1)