In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout, Input
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import utils

---

# Data Preprocessing

In [None]:
def loading_and_processing(path):
    with open(path, 'r') as file:
        lines = file.readlines()
    
    labels = []
    texts = []
    for line in lines:
        parts = line.split(' ', 1)
        if len(parts) > 1:
            # we subtact 1 to conver the dataset into a list of [0,1] and not [1,2]
            label = int(parts[0].replace('__label__', '')) - 1
            text = parts[1].strip()
            labels.append(label)
            texts.append(text)
    
    data = pd.DataFrame({'label': labels, 'text': texts})
    
    return data

In [None]:
# Taking a smaller sample size of 50,000 reviews for training, and 10,000 for testing. 
dir = '../Datasets/amazon sentiment analysis'
train_dir = os.path.join(dir, 'train.txt')
test_dir = os.path.join(dir, 'test.txt')

In [None]:
train_data = loading_and_processing(train_dir)

train_data

In [None]:
test_data = loading_and_processing(test_dir)

test_data

---

# Model Building

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_data['text'])

max_len = 200

# There is not need for train_test_split since we have predefined train and test files
X_train = tokenizer.texts_to_sequences(train_data['text'])
X_test = tokenizer.texts_to_sequences(test_data['text'])
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

y_train = train_data['label'].values
y_test = test_data['label'].values

In [None]:
print(f"X_train size: {len(X_train)}, y_train size: {len(y_train)}")
print(f"X_test size: {len(X_test)}, y_test size: {len(y_test)}")

In [None]:
vocab_size = len(tokenizer.word_index) + 1

vocab_size

In [None]:
model = Sequential()
model.add(Input(shape=(max_len,)))
model.add(Embedding(vocab_size, 100))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64))
model.add(Dense(1, activation='sigmoid')) 
model.summary()

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=2)

---

# Model Evaluation

In [None]:
preds = model.predict(X_test)
PREDS = (preds > 0.5).astype(int).flatten()

PREDS

In [None]:
unique_values, counts = np.unique(PREDS, return_counts=True)
value_counts = dict(zip(unique_values, counts))

print(f'0 Counts: {value_counts[0]}.')
print(f'1 Counts: {value_counts[1]}.')

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

y_pred = (model.predict(X_test) > 0.5).astype(int)
print(classification_report(y_test, y_pred, zero_division=False))

cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

---
# Result Analysis

In analyzing the final modelâ€™s performance, I focused on its classification accuracy across sentiment classes. The model achieved an overall accuracy of 90.17% during training and 89.76% on the test set. I also examined precision, recall, and F1 score metrics, which revealed that the model performs consistently well with a macro F1 score of 0.90 for both sentiment classes.

The detailed metrics indicated that the model has strong performance for both sentiment classes. Specifically, the precision for class 0 (negative sentiment) was 0.88 and for class 1 (positive sentiment) was 0.91, while the recall was 0.91 and 0.88, respectively. This shows balanced performance, with slight variation in precision and recall between the classes.

Based on these insights, I can refine the model further by focusing on any minor imbalances or enhancing its ability to distinguish between sentiments more precisely.