1. Install and Import Dependencies

In [None]:
!pip install transformers requests beautifulsoup4 pandas numpy

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re

2. Instantiate Model

In [None]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

3. Encode and Calculate Sentiment

In [None]:
tokens = tokenizer.encode('It was good but couldve been better. Great', return_tensors='pt')

In [None]:
result = model(tokens)

In [None]:
result.logits

In [None]:
int(torch.argmax(result.logits))+1

4. Collect Reviews

In [None]:
r = requests.get('https://www.yelp.com/biz/social-brew-cafe-pyrmont')
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile('.*comment.*')
results = soup.find_all('p', {'class':regex})
reviews = [result.text for result in results]

In [None]:
reviews

5. Load Reviews into DataFrame and Score

In [None]:
import numpy as np
import pandas as pd

In [None]:
# Create a DataFrame 'df' from the array 'reviews' with a single column named 'review'
df = pd.DataFrame(np.array(reviews), columns=['review'])

In [None]:
df['review'].iloc[0]

In [None]:
# Function to calculate sentiment score of a given review
def sentiment_score(review):
    # Tokenize the review and obtain tokens
    tokens = tokenizer.encode(review, return_tensors='pt')
    
    # Pass tokens through the model to get sentiment prediction
    result = model(tokens)
    
    # Get the index of the maximum value in the logits and convert to integer
    # Shift sentiment score from 0-based to 1-based and return
    return int(torch.argmax(result.logits)) + 1


In [None]:
# Obtain sentiment score for the review at index 1 of the DataFrame 'df'
sentiment_score(df['review'].iloc[1])


In [None]:
# Add a new column 'sentiment' to DataFrame 'df' by applying the sentiment_score function
df['sentiment'] = df['review'].apply(lambda x: sentiment_score(x[:512]))
df

In [None]:
df['review].iloc[3]

# end of project