# Sentiment Analysis of Amazon Electronics Reviews Using BERT

1. Data Preparation and Sampling 

In [1]:
import pandas as pd

# Load the Electronics dataset
file_path = "Electronics_5.json"
data = pd.read_json(file_path, lines=True)

# Retain only the necessary columns: 'reviewText' and 'overall'
data = data[['reviewText', 'overall']].dropna()

# Map star ratings to sentiment labels
def map_sentiment(star):
    if star <= 2:
        return "negative"
    elif star == 3:
        return "neutral"
    else:
        return "positive"

data['sentiment'] = data['overall'].apply(map_sentiment)

# Check class distribution
print("Class Distribution Before Sampling:")
print(data['sentiment'].value_counts())


# Sample a smaller dataset while maintaining class distribution
sample_size_per_class = 100000  # Adjust this size as needed
small_data = (
    data.groupby('sentiment')
    .apply(lambda x: x.sample(sample_size_per_class, random_state=42))
    .reset_index(drop=True)
)

# Use entire dataset
# small_data = data

# Check class distribution in the smaller dataset
print("\nClass Distribution After Sampling:")
print(small_data['sentiment'].value_counts())

# Split the smaller dataset into training and testing sets
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(
    small_data, test_size=0.2, stratify=small_data['sentiment'], random_state=42
)

# Save smaller datasets for use
train_data.to_csv("electronics_small_train.csv", index=False)
test_data.to_csv("electronics_small_test.csv", index=False)

print("\nSmaller dataset created and saved as electronics_small_train.csv and electronics_small_test.csv!")


Class Distribution Before Sampling:
sentiment
positive    5459749
negative     773776
neutral      504712
Name: count, dtype: int64


  .apply(lambda x: x.sample(sample_size_per_class, random_state=42))



Class Distribution After Sampling:
sentiment
negative    100000
neutral     100000
positive    100000
Name: count, dtype: int64

Smaller dataset created and saved as electronics_small_train.csv and electronics_small_test.csv!


2. Tokenization

In [2]:
import pandas as pd
from transformers import AutoTokenizer

# Load the tokenizer for a pre-trained model
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the smaller training and test datasets
train_data = pd.read_csv("electronics_small_train.csv")
test_data = pd.read_csv("electronics_small_test.csv")

# Ensure reviewText column is clean and valid
train_data['reviewText'] = train_data['reviewText'].astype(str)
test_data['reviewText'] = test_data['reviewText'].astype(str)
train_data = train_data[train_data['reviewText'].str.strip() != ""]
test_data = test_data[test_data['reviewText'].str.strip() != ""]

# Tokenize the training and testing data
def tokenize_data(data, tokenizer, max_length=128):
    return tokenizer(
        data['reviewText'].tolist(),
        truncation=True,
        padding=True,
        max_length=max_length,
        return_tensors="pt"
    )

print("Tokenizing training data...")
train_encodings = tokenize_data(train_data, tokenizer)

print("Tokenizing test data...")
test_encodings = tokenize_data(test_data, tokenizer)

# Save labels for training and testing
train_labels = train_data['sentiment'].map({"negative": 0, "neutral": 1, "positive": 2}).values
test_labels = test_data['sentiment'].map({"negative": 0, "neutral": 1, "positive": 2}).values

print("\nTokenization complete!")


Tokenizing training data...
Tokenizing test data...

Tokenization complete!


3. Fine-tuning

In [None]:
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments
import torch

# Load the pre-trained model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

# Prepare datasets for Hugging Face Trainer
class ReviewDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

train_dataset = ReviewDataset(train_encodings, train_labels)
test_dataset = ReviewDataset(test_encodings, test_labels)

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",          # output directory
    num_train_epochs=3,              # number of training epochs
    per_device_train_batch_size=16,  # batch size for training
    per_device_eval_batch_size=64,   # batch size for evaluation
    eval_strategy="epoch",     # evaluate each epoch
    save_strategy="epoch",           # save model each epoch
    logging_dir="./logs",            # logging directory
    logging_steps=10,
    load_best_model_at_end=True
)

# Define the trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer
)

# Start fine-tuning
print("Starting fine-tuning...")
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Starting fine-tuning...


  0%|          | 0/45000 [00:00<?, ?it/s]

{'loss': 1.1183, 'grad_norm': 5.032028675079346, 'learning_rate': 4.998888888888889e-05, 'epoch': 0.0}
{'loss': 1.0672, 'grad_norm': 4.460709571838379, 'learning_rate': 4.997777777777778e-05, 'epoch': 0.0}
{'loss': 1.0839, 'grad_norm': 3.456252098083496, 'learning_rate': 4.996666666666667e-05, 'epoch': 0.0}
{'loss': 0.9457, 'grad_norm': 5.423242092132568, 'learning_rate': 4.995555555555556e-05, 'epoch': 0.0}
{'loss': 0.822, 'grad_norm': 9.602535247802734, 'learning_rate': 4.994444444444445e-05, 'epoch': 0.0}
{'loss': 0.8247, 'grad_norm': 5.927615165710449, 'learning_rate': 4.993333333333334e-05, 'epoch': 0.0}
{'loss': 0.6922, 'grad_norm': 12.152961730957031, 'learning_rate': 4.9922222222222226e-05, 'epoch': 0.0}
{'loss': 0.829, 'grad_norm': 8.805343627929688, 'learning_rate': 4.991111111111111e-05, 'epoch': 0.01}
{'loss': 0.7683, 'grad_norm': 8.257232666015625, 'learning_rate': 4.99e-05, 'epoch': 0.01}
{'loss': 0.7872, 'grad_norm': 6.353187084197998, 'learning_rate': 4.9888888888888894

  0%|          | 0/938 [00:00<?, ?it/s]

{'eval_loss': 0.5333630442619324, 'eval_runtime': 39.7094, 'eval_samples_per_second': 1510.976, 'eval_steps_per_second': 23.622, 'epoch': 1.0}
{'loss': 0.5497, 'grad_norm': 4.0691657066345215, 'learning_rate': 3.332222222222222e-05, 'epoch': 1.0}
{'loss': 0.4256, 'grad_norm': 6.955343246459961, 'learning_rate': 3.3311111111111116e-05, 'epoch': 1.0}
{'loss': 0.395, 'grad_norm': 3.287321090698242, 'learning_rate': 3.33e-05, 'epoch': 1.0}
{'loss': 0.4382, 'grad_norm': 6.85208797454834, 'learning_rate': 3.328888888888889e-05, 'epoch': 1.0}
{'loss': 0.4338, 'grad_norm': 2.8766653537750244, 'learning_rate': 3.327777777777778e-05, 'epoch': 1.0}
{'loss': 0.4329, 'grad_norm': 2.4362070560455322, 'learning_rate': 3.326666666666667e-05, 'epoch': 1.0}
{'loss': 0.5185, 'grad_norm': 6.052011966705322, 'learning_rate': 3.325555555555556e-05, 'epoch': 1.0}
{'loss': 0.4825, 'grad_norm': 4.2942023277282715, 'learning_rate': 3.3244444444444445e-05, 'epoch': 1.01}
{'loss': 0.429, 'grad_norm': 4.5312790870

  0%|          | 0/938 [00:00<?, ?it/s]

{'eval_loss': 0.5311068892478943, 'eval_runtime': 39.357, 'eval_samples_per_second': 1524.505, 'eval_steps_per_second': 23.833, 'epoch': 2.0}
{'loss': 0.3076, 'grad_norm': 4.849832057952881, 'learning_rate': 1.6655555555555558e-05, 'epoch': 2.0}
{'loss': 0.3557, 'grad_norm': 4.909651279449463, 'learning_rate': 1.6644444444444445e-05, 'epoch': 2.0}
{'loss': 0.3538, 'grad_norm': 4.386287689208984, 'learning_rate': 1.6633333333333336e-05, 'epoch': 2.0}
{'loss': 0.2583, 'grad_norm': 6.32199764251709, 'learning_rate': 1.6622222222222223e-05, 'epoch': 2.0}
{'loss': 0.3112, 'grad_norm': 4.893469333648682, 'learning_rate': 1.661111111111111e-05, 'epoch': 2.0}
{'loss': 0.4112, 'grad_norm': 6.16323184967041, 'learning_rate': 1.66e-05, 'epoch': 2.0}
{'loss': 0.3805, 'grad_norm': 5.304479598999023, 'learning_rate': 1.658888888888889e-05, 'epoch': 2.0}
{'loss': 0.3095, 'grad_norm': 6.834551811218262, 'learning_rate': 1.6577777777777778e-05, 'epoch': 2.01}
{'loss': 0.3107, 'grad_norm': 5.83831882476

  0%|          | 0/938 [00:00<?, ?it/s]

{'eval_loss': 0.5958405137062073, 'eval_runtime': 37.8508, 'eval_samples_per_second': 1585.173, 'eval_steps_per_second': 24.782, 'epoch': 3.0}
{'train_runtime': 2350.4257, 'train_samples_per_second': 306.327, 'train_steps_per_second': 19.145, 'train_loss': 0.469470033454895, 'epoch': 3.0}


TrainOutput(global_step=45000, training_loss=0.469470033454895, metrics={'train_runtime': 2350.4257, 'train_samples_per_second': 306.327, 'train_steps_per_second': 19.145, 'total_flos': 4.736041519104e+16, 'train_loss': 0.469470033454895, 'epoch': 3.0})

3. Evaluation

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Evaluate the model
print("Evaluating the model...")
metrics = trainer.evaluate()
print(metrics)

# Get predictions from the model
predictions = trainer.predict(test_dataset)

# Extract the predicted labels and true labels
predicted_labels = np.argmax(predictions.predictions, axis=1)
true_labels = test_labels

# Print classification report
print("\nClassification Report:")
print(classification_report(true_labels, predicted_labels, target_names=["negative", "neutral", "positive"]))

# Confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(true_labels, predicted_labels))

Evaluating the model...


  0%|          | 0/938 [00:00<?, ?it/s]

{'eval_loss': 0.5311068892478943, 'eval_runtime': 40.5319, 'eval_samples_per_second': 1480.315, 'eval_steps_per_second': 23.142, 'epoch': 3.0}


  0%|          | 0/938 [00:00<?, ?it/s]


Classification Report:
              precision    recall  f1-score   support

    negative       0.80      0.78      0.79     20000
     neutral       0.69      0.69      0.69     20000
    positive       0.86      0.87      0.86     20000

    accuracy                           0.78     60000
   macro avg       0.78      0.78      0.78     60000
weighted avg       0.78      0.78      0.78     60000


Confusion Matrix:
[[15548  4026   426]
 [ 3714 13817  2469]
 [  279  2318 17403]]


4. Gradio Demo

In [6]:
import gradio as gr
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer

# Load the fine-tuned model and tokenizer
model_path = "./fine_tuned_model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Create a pipeline for sentiment analysis
sentiment_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer, device=0)  # Use GPU (device=0)

# Define the prediction function
def predict_sentiment(review_text):
    result = sentiment_pipeline(review_text)
    label = result[0]['label']
    confidence = result[0]['score']
    return f"Sentiment: {label}, Confidence: {confidence:.2f}"

# Create a Gradio interface
interface = gr.Interface(
    fn=predict_sentiment,
    inputs="text",
    outputs="text",
    title="Sentiment Analysis Demo",
    description="Input a product review to see its predicted sentiment."
)

# Launch the app with a public link
interface.launch(share=True)


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://e7675153a9d668b668.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


