<a href="https://colab.research.google.com/github/kr5red/automated-customer-reviews/blob/main/main_ki.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Product Review Sentiment Notebook (Classical Setup with Transformers)

This notebook:
1. Installs **transformers** and **pandas** dependencies.
2. Loads your CSV of reviews.
3. Merges `reviews.title` and `reviews.text` into `text_merged`.
4. Light-cleans text, drops missing/duplicates.
5. Runs **cardiffnlp/twitter-roberta-base-sentiment** to get **negative/neutral/positive**.
6. Saves results to `reviews_with_sentiment.csv` and shows quick summaries.

> Tip: If your CSV is zipped, see the *Unzip (optional)* cell.


In [1]:
device = -1  # force CPU

## 0) Install dependencies (run once)

In [2]:

# If running locally, uncomment the next line to install once per environment.
# You may need to restart the kernel after first install.
# %%capture
!pip install -q --upgrade pip
!pip install -q pandas numpy transformers torch tqdm


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.4/1.8 MB[0m [31m11.5 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.8/1.8 MB[0m [31m24.0 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.8/1.8 MB[0m [31m24.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[?25h

## 1) Imports

In [3]:

import os
import re
import math
import pandas as pd
import numpy as np
from tqdm import tqdm

from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
import torch


## 2) Load the Dataset

In [4]:

#Set up the csv_path
import pandas as pd
from pathlib import Path
import kagglehub
pd.set_option('display.max_columns', None)

# Download all files
path = kagglehub.dataset_download("datafiniti/consumer-reviews-of-amazon-products")
csv_paths = list(Path(path).glob("*.csv"))


dfs = [pd.read_csv(p) for p in csv_paths]
df = pd.concat(dfs, ignore_index=True)

df = df.drop_duplicates(
    subset=['id', 'dateAdded', 'dateUpdated', 'reviews.text', 'reviews.title', 'reviews.username'],
    keep='first'
)

print(df.shape)
df.head()


Using Colab cache for faster access to the 'consumer-reviews-of-amazon-products' dataset.


  dfs = [pd.read_csv(p) for p in csv_paths]


(67896, 27)


Unnamed: 0,id,dateAdded,dateUpdated,name,asins,brand,categories,primaryCategories,imageURLs,keys,manufacturer,manufacturerNumber,reviews.date,reviews.dateAdded,reviews.dateSeen,reviews.doRecommend,reviews.id,reviews.numHelpful,reviews.rating,reviews.sourceURLs,reviews.text,reviews.title,reviews.username,sourceURLs,reviews.didPurchase,reviews.userCity,reviews.userProvince
0,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2017-09-03T00:00:00.000Z,,"2018-05-27T00:00:00Z,2017-09-18T00:00:00Z,2017...",False,,0.0,3.0,http://reviews.bestbuy.com/3545/5442403/review...,I thought it would be as big as small paper bu...,Too small,llyyue,https://www.newegg.com/Product/Product.aspx%25...,,,
1,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2017-06-06T00:00:00.000Z,,"2018-05-27T00:00:00Z,2017-07-07T00:00:00Z,2017...",True,,0.0,5.0,http://reviews.bestbuy.com/3545/5442403/review...,This kindle is light and easy to use especiall...,Great light reader. Easy to use at the beach,Charmi,https://www.newegg.com/Product/Product.aspx%25...,,,
2,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2018-04-20T00:00:00.000Z,,2018-05-27T00:00:00Z,True,,0.0,4.0,https://reviews.bestbuy.com/3545/5442403/revie...,Didnt know how much i'd use a kindle so went f...,Great for the price,johnnyjojojo,https://www.newegg.com/Product/Product.aspx%25...,,,
3,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2017-11-02T17:33:31.000Z,,2018-10-09T00:00:00Z,True,177283626.0,3.0,5.0,https://redsky.target.com/groot-domain-api/v1/...,I am 100 happy with my purchase. I caught it o...,A Great Buy,Kdperry,https://www.newegg.com/Product/Product.aspx%25...,,,
4,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2018-04-24T00:00:00.000Z,,2018-05-27T00:00:00Z,True,,0.0,5.0,https://reviews.bestbuy.com/3545/5442403/revie...,Solid entry level Kindle. Great for kids. Gift...,Solid entry-level Kindle. Great for kids,Johnnyblack,https://www.newegg.com/Product/Product.aspx%25...,,,


## 3.1) Merge title + text

In [6]:
#merge title and text
def merge_title_text(row):
    t1 = str(row.get("reviews.title") if pd.notna(row.get("reviews.title")) else "").strip()
    t2 = str(row.get("reviews.text") if pd.notna(row.get("reviews.text")) else "").strip()
    if t1 and t2:
        return f"{t1}. {t2}"
    return t1 or t2

df["review_text"] = df.apply(merge_title_text, axis=1)
df.head()

Unnamed: 0,id,dateAdded,dateUpdated,name,asins,brand,categories,primaryCategories,imageURLs,keys,manufacturer,manufacturerNumber,reviews.date,reviews.dateAdded,reviews.dateSeen,reviews.doRecommend,reviews.id,reviews.numHelpful,reviews.rating,reviews.sourceURLs,reviews.text,reviews.title,reviews.username,sourceURLs,reviews.didPurchase,reviews.userCity,reviews.userProvince,review_text
0,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2017-09-03T00:00:00.000Z,,"2018-05-27T00:00:00Z,2017-09-18T00:00:00Z,2017...",False,,0.0,3.0,http://reviews.bestbuy.com/3545/5442403/review...,I thought it would be as big as small paper bu...,Too small,llyyue,https://www.newegg.com/Product/Product.aspx%25...,,,,Too small. I thought it would be as big as sma...
1,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2017-06-06T00:00:00.000Z,,"2018-05-27T00:00:00Z,2017-07-07T00:00:00Z,2017...",True,,0.0,5.0,http://reviews.bestbuy.com/3545/5442403/review...,This kindle is light and easy to use especiall...,Great light reader. Easy to use at the beach,Charmi,https://www.newegg.com/Product/Product.aspx%25...,,,,Great light reader. Easy to use at the beach. ...
2,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2018-04-20T00:00:00.000Z,,2018-05-27T00:00:00Z,True,,0.0,4.0,https://reviews.bestbuy.com/3545/5442403/revie...,Didnt know how much i'd use a kindle so went f...,Great for the price,johnnyjojojo,https://www.newegg.com/Product/Product.aspx%25...,,,,Great for the price. Didnt know how much i'd u...
3,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2017-11-02T17:33:31.000Z,,2018-10-09T00:00:00Z,True,177283626.0,3.0,5.0,https://redsky.target.com/groot-domain-api/v1/...,I am 100 happy with my purchase. I caught it o...,A Great Buy,Kdperry,https://www.newegg.com/Product/Product.aspx%25...,,,,A Great Buy. I am 100 happy with my purchase. ...
4,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2018-04-24T00:00:00.000Z,,2018-05-27T00:00:00Z,True,,0.0,5.0,https://reviews.bestbuy.com/3545/5442403/revie...,Solid entry level Kindle. Great for kids. Gift...,Solid entry-level Kindle. Great for kids,Johnnyblack,https://www.newegg.com/Product/Product.aspx%25...,,,,Solid entry-level Kindle. Great for kids. Soli...


## 3.2) Cleaning

In [7]:
# Lowercase, strip URLs/emails, collapse whitespace
def clean_text(s: str) -> str:
    s = str(s)
    s = s.lower()
    s = re.sub(r"[^\w\s'.,!?-]", " ", s)  # keep common punctuation
    s = re.sub(r"\s+", " ", s).strip()
    return s

df["review_text"].fillna("").apply(clean_text)

# Drop missing/empty
df = df[df["review_text"].str.len() > 0].copy()

# Drop exact duplicates of the clean text
before = len(df)
df = df.drop_duplicates(subset=["review_text"]).reset_index(drop=True)
after = len(df)
print(f"Dropped {before - after} duplicate rows. Remaining: {after}")
df.head()


Dropped 20576 duplicate rows. Remaining: 47320


Unnamed: 0,id,dateAdded,dateUpdated,name,asins,brand,categories,primaryCategories,imageURLs,keys,manufacturer,manufacturerNumber,reviews.date,reviews.dateAdded,reviews.dateSeen,reviews.doRecommend,reviews.id,reviews.numHelpful,reviews.rating,reviews.sourceURLs,reviews.text,reviews.title,reviews.username,sourceURLs,reviews.didPurchase,reviews.userCity,reviews.userProvince,review_text
0,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2017-09-03T00:00:00.000Z,,"2018-05-27T00:00:00Z,2017-09-18T00:00:00Z,2017...",False,,0.0,3.0,http://reviews.bestbuy.com/3545/5442403/review...,I thought it would be as big as small paper bu...,Too small,llyyue,https://www.newegg.com/Product/Product.aspx%25...,,,,Too small. I thought it would be as big as sma...
1,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2017-06-06T00:00:00.000Z,,"2018-05-27T00:00:00Z,2017-07-07T00:00:00Z,2017...",True,,0.0,5.0,http://reviews.bestbuy.com/3545/5442403/review...,This kindle is light and easy to use especiall...,Great light reader. Easy to use at the beach,Charmi,https://www.newegg.com/Product/Product.aspx%25...,,,,Great light reader. Easy to use at the beach. ...
2,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2018-04-20T00:00:00.000Z,,2018-05-27T00:00:00Z,True,,0.0,4.0,https://reviews.bestbuy.com/3545/5442403/revie...,Didnt know how much i'd use a kindle so went f...,Great for the price,johnnyjojojo,https://www.newegg.com/Product/Product.aspx%25...,,,,Great for the price. Didnt know how much i'd u...
3,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2017-11-02T17:33:31.000Z,,2018-10-09T00:00:00Z,True,177283626.0,3.0,5.0,https://redsky.target.com/groot-domain-api/v1/...,I am 100 happy with my purchase. I caught it o...,A Great Buy,Kdperry,https://www.newegg.com/Product/Product.aspx%25...,,,,A Great Buy. I am 100 happy with my purchase. ...
4,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2018-04-24T00:00:00.000Z,,2018-05-27T00:00:00Z,True,,0.0,5.0,https://reviews.bestbuy.com/3545/5442403/revie...,Solid entry level Kindle. Great for kids. Gift...,Solid entry-level Kindle. Great for kids,Johnnyblack,https://www.newegg.com/Product/Product.aspx%25...,,,,Solid entry-level Kindle. Great for kids. Soli...


## 4) Load sentiment model

In [8]:

# Load chosen sentiment model: cardiffnlp/twitter-roberta-base-sentiment
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch

model_name = "cardiffnlp/twitter-roberta-base-sentiment"

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Max length safety: many RoBERTa-family models use 512 tokens (special tokens make the 514 you saw)
MAX_LEN = min(getattr(tokenizer, "model_max_length", 512), 512)

# Build id2label mapping robustly
id2label = getattr(model.config, "id2label", None)
if not id2label or not isinstance(id2label, dict) or len(id2label) < 3:
    id2label = {0: "negative", 1: "neutral", 2: "positive"}

# Use HF pipeline; we will pass truncation/padding at call time
pipe = pipeline(
    task="text-classification",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1,
    return_all_scores=True
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Device set to use cpu


## 5) Run inference in batches and collect predictions

In [14]:
from tqdm import tqdm

texts = df["review_text"].tolist()
batch_size = 16  # reduce if RAM is tight

pred_labels, pred_scores = [], []

for i in tqdm(range(0, len(texts), batch_size)):
    batch = texts[i:i+batch_size]
    # 🔑 KEY: enforce truncation/padding and a safe max_length
    outputs = pipe(batch, truncation=True, padding=True, max_length=MAX_LEN)

    for scores in outputs:
        # pick top class
        best = max(scores, key=lambda d: d["score"])
        label = best["label"].lower()
        # Normalize labels if they come as LABEL_0/1/2
        if label.startswith("label_"):
            idx = int(label.split("_")[-1])
            label = id2label.get(idx, str(idx)).lower()
        if label not in {"negative", "neutral", "positive"}:
            # fallback via rank order if labels are unexpected
            idx = scores.index(best)
            label = id2label.get(idx, "neutral").lower()

        pred_labels.append(label)
        pred_scores.append(float(best["score"]))

df["sentiment"] = pred_labels
df["sentiment_confidence"] = pred_scores

  0%|          | 3/2958 [00:11<3:07:32,  3.81s/it]


KeyboardInterrupt: 

In [15]:
df.head()

Unnamed: 0,id,dateAdded,dateUpdated,name,asins,brand,categories,primaryCategories,imageURLs,keys,manufacturer,manufacturerNumber,reviews.date,reviews.dateAdded,reviews.dateSeen,reviews.doRecommend,reviews.id,reviews.numHelpful,reviews.rating,reviews.sourceURLs,reviews.text,reviews.title,reviews.username,sourceURLs,reviews.didPurchase,reviews.userCity,reviews.userProvince,review_text,sentiment,sentiment_confidence,true_sentiment
0,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2017-09-03T00:00:00.000Z,,"2018-05-27T00:00:00Z,2017-09-18T00:00:00Z,2017...",False,,0.0,3.0,http://reviews.bestbuy.com/3545/5442403/review...,I thought it would be as big as small paper bu...,Too small,llyyue,https://www.newegg.com/Product/Product.aspx%25...,,,,Too small. I thought it would be as big as sma...,label_0,0.690747,neutral
1,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2017-06-06T00:00:00.000Z,,"2018-05-27T00:00:00Z,2017-07-07T00:00:00Z,2017...",True,,0.0,5.0,http://reviews.bestbuy.com/3545/5442403/review...,This kindle is light and easy to use especiall...,Great light reader. Easy to use at the beach,Charmi,https://www.newegg.com/Product/Product.aspx%25...,,,,Great light reader. Easy to use at the beach. ...,label_2,0.984449,positive
2,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2018-04-20T00:00:00.000Z,,2018-05-27T00:00:00Z,True,,0.0,4.0,https://reviews.bestbuy.com/3545/5442403/revie...,Didnt know how much i'd use a kindle so went f...,Great for the price,johnnyjojojo,https://www.newegg.com/Product/Product.aspx%25...,,,,Great for the price. Didnt know how much i'd u...,label_2,0.94964,positive
3,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2017-11-02T17:33:31.000Z,,2018-10-09T00:00:00Z,True,177283626.0,3.0,5.0,https://redsky.target.com/groot-domain-api/v1/...,I am 100 happy with my purchase. I caught it o...,A Great Buy,Kdperry,https://www.newegg.com/Product/Product.aspx%25...,,,,A Great Buy. I am 100 happy with my purchase. ...,label_2,0.934534,positive
4,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,Amazon,B00ZV9PXP2,2018-04-24T00:00:00.000Z,,2018-05-27T00:00:00Z,True,,0.0,5.0,https://reviews.bestbuy.com/3545/5442403/revie...,Solid entry level Kindle. Great for kids. Gift...,Solid entry-level Kindle. Great for kids,Johnnyblack,https://www.newegg.com/Product/Product.aspx%25...,,,,Solid entry-level Kindle. Great for kids. Soli...,label_2,0.983082,positive


## 6) Save results

In [11]:

out_path = "reviews_with_sentiment.csv"
df.to_csv(out_path, index=False)
out_path, df.shape


('reviews_with_sentiment.csv', (47320, 31))

## 7) Quick summary & sanity checks

In [16]:

print(df["sentiment"].value_counts(dropna=False))
df.sample(5, random_state=42)[["reviews.title", "reviews.text", "review_text", "sentiment", "sentiment_confidence", "true_sentiment"]]


sentiment
label_2    42955
label_0     2485
label_1     1880
Name: count, dtype: int64


Unnamed: 0,reviews.title,reviews.text,review_text,sentiment,sentiment_confidence,true_sentiment
8790,Great for Readers,I bought this for my daughter. She loves it! S...,Great for Readers. I bought this for my daught...,label_2,0.988677,positive
16840,Good starter tablet,I bought this product as a Christmas gift and ...,Good starter tablet. I bought this product as ...,label_2,0.986684,positive
29906,Not quite there,Alexa is OK for basic weather and turning on/o...,Not quite there. Alexa is OK for basic weather...,label_2,0.588906,neutral
25565,"Great device, kids love it!","Device is great, easy setup (less than 10 minu...","Great device, kids love it!. Device is great, ...",label_2,0.980153,positive
42618,So fare working great.,Got these for some LED motion lights. So fare ...,So fare working great.. Got these for some LED...,label_2,0.970427,positive


In [17]:
from sklearn.metrics import accuracy_score, classification_report

# Map ratings (1–5) to sentiment
def rating_to_sentiment(r):
    if r <= 2:
        return "negative"
    elif r == 3:
        return "neutral"
    else:
        return "positive"

df["true_sentiment"] = df["reviews.rating"].apply(rating_to_sentiment)

# Compare with model predictions
y_true = df["true_sentiment"]
y_pred = df["sentiment"]

print("Accuracy:", accuracy_score(y_true, y_pred))
print(classification_report(y_true, y_pred))


Accuracy: 0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

     label_0       0.00      0.00      0.00       0.0
     label_1       0.00      0.00      0.00       0.0
     label_2       0.00      0.00      0.00       0.0
    negative       0.00      0.00      0.00    1843.0
     neutral       0.00      0.00      0.00    2073.0
    positive       0.00      0.00      0.00   43404.0

    accuracy                           0.00   47320.0
   macro avg       0.00      0.00      0.00   47320.0
weighted avg       0.00      0.00      0.00   47320.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
