# 1. Install and Import Dependencies

In [17]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re

import numpy as np
import pandas as pd

# 2. Instantiate Model

In [6]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

Downloading (…)okenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading (…)lve/main/config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/669M [00:00<?, ?B/s]

# 3. Encode and Calculate Sentiment

In [7]:
token = tokenizer.encode('This restaurant has good ambience but the food is average.',return_tensors='pt')
token

tensor([[  101, 10372, 21688, 10438, 12050, 10627, 11754, 10421, 10502, 10103,
         15225, 10127, 11237,   119,   102]])

In [8]:
results = model(token)
results

SequenceClassifierOutput(loss=None, logits=tensor([[-1.3876,  0.4345,  1.9524,  0.8416, -1.5450]],
       grad_fn=<AddmmBackward>), hidden_states=None, attentions=None)

In [9]:
results.logits

tensor([[-1.3876,  0.4345,  1.9524,  0.8416, -1.5450]],
       grad_fn=<AddmmBackward>)

In [10]:
int(torch.argmax(results.logits))+1

3

# 4. Collect Reviews

In [12]:
r = requests.get('https://www.yelp.com/biz/social-brew-cafe-pyrmont')
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile('.*comment.*')
results = soup.find_all('p', {'class':regex})
reviews = [result.text for result in results]

In [65]:
reviews = []
for i in range(10,141,10):
    link = f'https://www.yelp.com/biz/social-brew-cafe-pyrmont?start={i}'
    print(f'Extracting from page {int(i/10)}')
    r = requests.get(link)
    soup = BeautifulSoup(r.text, 'html.parser')
    regex = re.compile('.*comment.*')
    results = soup.find_all('p', {'class':regex})
    review = [result.text for result in results]
    reviews.extend(review)
    print(f'Page {int(i/10)} extraction complete')

Extracting from page 1
Page 1 extraction complete
Extracting from page 2
Page 2 extraction complete
Extracting from page 3
Page 3 extraction complete
Extracting from page 4
Page 4 extraction complete
Extracting from page 5
Page 5 extraction complete
Extracting from page 6
Page 6 extraction complete
Extracting from page 7
Page 7 extraction complete
Extracting from page 8
Page 8 extraction complete
Extracting from page 9
Page 9 extraction complete
Extracting from page 10
Page 10 extraction complete
Extracting from page 11
Page 11 extraction complete
Extracting from page 12
Page 12 extraction complete
Extracting from page 13
Page 13 extraction complete
Extracting from page 14
Page 14 extraction complete


In [69]:
len(reviews)

129

# 5. Load Reviews into DataFrame and Score

In [71]:
df = pd.DataFrame(np.array(reviews),columns=['review'])
df.head()

Unnamed: 0,review
0,Delicious. The waitress was hot. The burger wa...
1,Good coffee and toasts. Straight up and down -...
2,5 stars all around for the staff and delicious...
3,This is one of my absolute favorite places to ...
4,Found Social Brew Cafe on my last day in Sydne...


In [72]:
df.shape,df['review'][0]

((129, 1),
 "Delicious. The waitress was hot. The burger was juicy but messy that was the only thing I didn't like but food was delicious service was great. Went for breakfast on a Monday.")

In [73]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, truncation=True, return_tensors='pt')
    result = model(tokens,)
    
    return int(torch.argmax(result.logits))+1

In [74]:
# sentiment_score(df['review'].iloc[17])
sentiment_score(df['review'][10])

4

In [76]:
# applying sentiment score next to reviews in df
df['sentiment score'] = df['review'].apply(lambda x: sentiment_score(x))
df.head()

Unnamed: 0,review,sentiment score
0,Delicious. The waitress was hot. The burger wa...,4
1,Good coffee and toasts. Straight up and down -...,5
2,5 stars all around for the staff and delicious...,5
3,This is one of my absolute favorite places to ...,5
4,Found Social Brew Cafe on my last day in Sydne...,5


In [78]:
#overall rating
round(df['sentiment score'].mean(),1)

4.5