# 1. Install and Import Dependencies

In [1]:
!pip install torch torchvision torchaudio



In [2]:
!pip install transformers requests beautifulsoup4 pandas numpy



In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re

# 2. Instantiate BERT Model

In [4]:
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

# 3. Collect GoodReads Reviews

In [55]:
#for i in range(3020)
gr_url = requests.get("https://www.goodreads.com/book/show/52578297-the-midnight-library")
soup = BeautifulSoup(gr_url.text, "html.parser")
regex = re.compile(".*reviewText stacked.*")
results = soup.find_all("div", {"class": regex})

reviews = [result.text for result in results]
clean_reviews = []
for element in reviews:
        clean_reviews.append(element.strip())


In [56]:
results

[<div class="reviewText stacked">
 <span class="readable" id="reviewTextContainer3731565592">
 <span id="freeTextContainer370743448725700184">I liked this book until it suddenly decided to moonlight as a self-help manual, replete with messages that would look great and profound on an Instagram post next to a well-posed cup of coffee with those foam pictures on top. Or embroidered on a pillow — pick your poison.<br><blockquote>
 <i> “She realised that she hadn’t tried to end her life because she was miserable, but because she had managed to convince herself that there was no way out of her misery.”</i>
 </blockquote><br/><b>And these messages have the subtlety of a sledgehammer</b>.</br></span>
 <span id="freeText370743448725700184" style="display:none">I liked this book until it suddenly decided to moonlight as a self-help manual, replete with messages that would look great and profound on an Instagram post next to a well-posed cup of coffee with those foam pictures on top. Or embroide

# 4. Load Reviews into DataFrame!

In [57]:
import numpy as np
import pandas as pd

In [58]:
df = pd.DataFrame(np.array(clean_reviews), columns=["Midnight Library Review"])

In [59]:
df.head()

Unnamed: 0,Midnight Library Review
0,I liked this book until it suddenly decided to...
1,"“The only way to learn is to live”Fuck, this b..."
2,Everybody probably knows the premise of this b...
3,Okay! No more words! This is one of the best s...
4,It is no secret that Matt Haig has mental heal...


In [60]:
# function that allows you to pass string into it, and into BERT Model
def sentiment_score(review):
    # Return pytorch tensors on each review, and store inside tokens
    tokens = tokenizer.encode(review, return_tensors="pt")
    # Calculate sentiment
    result = model(tokens)
    # Extract sentiment score
    return int(torch.argmax(result.logits))+1

In [61]:
df["Sentiment"] = df["Midnight Library Review"].apply(lambda x: sentiment_score(x[:512]))

In [62]:
# Format and print average BERT sentiment analysis score
average_sentiment = df["Sentiment"].mean()
format_average_sentiment = "{:.2f}".format(average_sentiment)
print("Average sentiment analysis score: " + str(format_average_sentiment))

Average sentiment analysis score: 3.53
