# Install and import dependencies

In [None]:
# Install PyTorch with CUDA 11.8 support
# This installs PyTorch deep learning library with GPU acceleration capabilities

!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Looking in indexes: https://download.pytorch.org/whl/cu118



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
# Import necessary libraries
# transformers: Hugging Face library for pre-trained models
# torch: PyTorch deep learning framework
# requests: For making HTTP requests
# re: For regular expressions
# pandas: For data manipulation and analysis
# numpy: For numerical operations

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
import re
import pandas as pd
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


# Instantiate model

In [None]:
# Load the pre-trained tokenizer for multilingual sentiment analysis
# This tokenizer converts text into tokens that the model can understand
# The model is trained to handle sentiment analysis in multiple languages

tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

# Load the pre-trained sentiment analysis model
# This model is fine-tuned to classify text into 5 sentiment levels (1-5 stars)

model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


# Encode and calculate sentiment

In [None]:
# Test the model with a sample sentence
# Convert the text to tokens that the model can process
# return_tensors='pt' means return PyTorch tensors

tokens = tokenizer.encode('It was good but couldve been better. Great', return_tensors='pt')

In [None]:
# Pass the tokens through the model to get sentiment predictions

result = model(tokens)

In [None]:
# Display the raw logits (prediction scores before softmax)
# These are the unnormalized scores for each sentiment class

result.logits

tensor([[-2.7768, -1.2353,  1.4419,  1.9804,  0.4584]],
       grad_fn=<AddmmBackward0>)

In [None]:
# Get the predicted sentiment score
# torch.argmax finds the position with highest value
# Add 1 because the model outputs 0-4 but we want 1-5 star ratings

int(torch.argmax(result.logits))+1

4

# Loada the dataset

In [None]:
# Load the Amazon reviews dataset from a CSV file
# This dataset contains reviews that we'll analyze for sentiment

amazon_reviews_df = pd.read_csv(r'C:\Users\Prashant\Documents\Nitin\Projects\7817_1.csv')

In [None]:
# Display the column names to understand the dataset structure

amazon_reviews_df.columns

Index(['id', 'asins', 'brand', 'categories', 'colors', 'dateAdded',
       'dateUpdated', 'dimension', 'ean', 'keys', 'manufacturer',
       'manufacturerNumber', 'name', 'prices', 'reviews.date',
       'reviews.doRecommend', 'reviews.numHelpful', 'reviews.rating',
       'reviews.sourceURLs', 'reviews.text', 'reviews.title',
       'reviews.userCity', 'reviews.userProvince', 'reviews.username', 'sizes',
       'upc', 'weight'],
      dtype='object')

In [None]:
# Create a reference to the loaded DataFrame

reviews = amazon_reviews_df

In [None]:
# Extract only the review text column for sentiment analysis

reviews = reviews[['reviews.text']] 

In [None]:
# Create a new DataFrame with the review texts
# Converting to numpy array first and then back to DataFrame
# This creates a clean DataFrame with just one column named 'review'

df = pd.DataFrame(np.array(reviews), columns=['review'])

In [None]:
# Display the first few rows to verify the data

df.head()

Unnamed: 0,review
0,I initially had trouble deciding between the p...
1,Allow me to preface this with a little history...
2,I am enjoying it so far. Great for reading. Ha...
3,I bought one of the first Paperwhites and have...
4,I have to say upfront - I don't like coroporat...


# Calculate the score

In [None]:
# Examine the first review in the DataFrame

df['review'].iloc[0]

"I initially had trouble deciding between the paperwhite and the voyage because reviews more or less said the same thing: the paperwhite is great, but if you have spending money, go for the voyage.Fortunately, I had friends who owned each, so I ended up buying the paperwhite on this basis: both models now have 300 ppi, so the 80 dollar jump turns out pricey the voyage's page press isn't always sensitive, and if you are fine with a specific setting, you don't need auto light adjustment).It's been a week and I am loving my paperwhite, no regrets! The touch screen is receptive and easy to use, and I keep the light at a specific setting regardless of the time of day. (In any case, it's not hard to change the setting either, as you'll only be changing the light level at a certain time of day, not every now and then while reading).Also glad that I went for the international shipping option with Amazon. Extra expense, but delivery was on time, with tracking, and I didnt need to worry about cu

In [None]:
# Define a function to calculate sentiment scores for any review text

def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [None]:
# Test the sentiment_score function on the second review

sentiment_score(df['review'].iloc[1])

5

In [None]:
# Apply the sentiment_score function to all reviews in the DataFrame
# The [:512] limits each review to 512 tokens as BERT has a maximum input length
# This creates a new column 'sentiment' with scores 1-5 for each review

df['sentiment'] = df['review'].apply(lambda x: sentiment_score(x[:512]))

In [None]:
# Display the DataFrame with both reviews and their sentiment scores

df

Unnamed: 0,review,sentiment
0,I initially had trouble deciding between the p...,4
1,Allow me to preface this with a little history...,4
2,I am enjoying it so far. Great for reading. Ha...,4
3,I bought one of the first Paperwhites and have...,4
4,I have to say upfront - I don't like coroporat...,2
...,...,...
1592,This is not the same remote that I got for my ...,2
1593,I have had to change the batteries in this rem...,1
1594,"Remote did not activate, nor did it connect to...",1
1595,It does the job but is super over priced. I fe...,3


In [None]:
# Display the fourth review text to examine it

df['review'].iloc[3]

'I bought one of the first Paperwhites and have been very pleased with it its been a constant companion and I suppose Ive read, on average, a book every three days for the past however many years on it. I wouldnt give it up youd have to pry it from my cold dead fingers.For sundry logistical reasons, Ive also made good use of Amazons Kindle app on my iPhone. No Paperwhite screen, naturally, and all the cool usability that delivers, but it works well and has its own attractions as a companion to the Kindle.Of course, there are aspects of the Paperwhite which I would like to critique. Ah you knew that was coming somewhere, didnt you.As a member of BookBub, I get a daily list of alerts and book deals in my chosen genres. I take on many of them, however, Ive found that, even with the best will in the world, I cant keep up. Some days it seems that for every book I read, Ive bought two. Theres just so much good stuff out there! The accumulative effect of this is that the number of books actua