# 1. Install and Import Dependencies

In [4]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

Looking in indexes: https://download.pytorch.org/whl/cu121


In [3]:
!pip install transformers pandas numpy



In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import time

In [2]:
# Check for CUDA
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

device(type='cpu')

# 2. Instantiate Model

In [3]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

model = model.to(device)

  torch.utils._pytree._register_pytree_node(


# 3. Encode and Calculate Sentiment

In [4]:
tokens = tokenizer.encode('Too bad', return_tensors='pt')
tokens = tokens.to(device)

In [5]:
result = model(tokens)

In [6]:
result.logits

tensor([[ 2.8941,  1.9527,  0.2461, -2.0751, -2.4096]],
       grad_fn=<AddmmBackward0>)

In [7]:
int(torch.argmax(result.logits))+1

1

# 4. Load Reviews into DataFrame and Score

In [8]:
import numpy as np
import pandas as pd

In [9]:
df = pd.read_csv('selected_1000_reviews.csv')

In [10]:
df

Unnamed: 0,review_text,human_rating
0,"I bought both boxed sets, books 1-5. Really a...",5
1,I enjoyed this short book. But it was way way ...,3
2,I really enjoyed this adventure and look forwa...,4
3,It was a decent read.. typical story line. Not...,3
4,"This is the First book in the Trilogy, and I'm...",5
...,...,...
995,"Exciting, riveting and well plotted. With each...",5
996,Found this book in a give-away area of my loca...,2
997,I love this series and i can't wait for the la...,5
998,this is a beautiful coming to age story. I lik...,3


In [11]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors='pt')
    tokens = tokens.to(device)
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [12]:
sentiment_score(df['review_text'].iloc[0])

5

In [13]:
# Find Time Taken to Process
start_time = time.time()
df['Sentiment'] = df['review_text'].apply(lambda x: sentiment_score(x[:512]))
end_time = time.time()
elapsed_time = end_time - start_time

print(f"Elapsed time: {elapsed_time} seconds")

Elapsed time: 107.43370962142944 seconds


In [14]:
df

Unnamed: 0,review_text,human_rating,Sentiment
0,"I bought both boxed sets, books 1-5. Really a...",5,5
1,I enjoyed this short book. But it was way way ...,3,3
2,I really enjoyed this adventure and look forwa...,4,5
3,It was a decent read.. typical story line. Not...,3,3
4,"This is the First book in the Trilogy, and I'm...",5,4
...,...,...,...
995,"Exciting, riveting and well plotted. With each...",5,5
996,Found this book in a give-away area of my loca...,2,3
997,I love this series and i can't wait for the la...,5,5
998,this is a beautiful coming to age story. I lik...,3,4


In [15]:
df['review_text'].iloc[30]

'Good advice!'

# 5. Find Average and Varience

In [16]:
# Calculate the average of the 'human_rating' column
average_human_rating = df['human_rating'].mean()

# Calculate the average of the 'Sentiment' column
average_sentiment = df['Sentiment'].mean()

print("Average human_rating:", average_human_rating)
print("Average Sentiment:", average_sentiment)

Average human_rating: 4.386
Average Sentiment: 4.16


In [17]:
# save the DataFrame to a CSV file
df.to_csv('BERT_reviews.csv', index=False)