# Install & Import Dependencies

In [1]:
!pip install transformers 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 8.7 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 56.6 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 51.6 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


In [21]:
import re
import requests
from bs4 import BeautifulSoup
import torch
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Instantiate the [Model](https://huggingface.co/nlptown/bert-base-multilingual-uncased-sentiment?text=meh%2C+It%27s++good)

In [3]:
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

Downloading:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/953 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/872k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/669M [00:00<?, ?B/s]

# Encode & Calculate Sentiment


In [4]:
tokens = tokenizer.encode("It was really good, BRAVO!", return_tensors='pt')
tokens

tensor([[  101, 10197, 10140, 25165, 12050,   117, 33727,   106,   102]])

In [6]:
result = model(tokens)
result.logits

tensor([[-2.3015, -2.2911, -0.2969,  1.7386,  2.5406]],
       grad_fn=<AddmmBackward0>)

In [7]:
# rating
int(torch.argmax(result.logits))+1

5

# Collect Reviews

In [8]:
# choose a random Restaurant 
r = requests.get("https://www.yelp.com/biz/pauls-linz")
soup = BeautifulSoup(r.text, 'html.parser')

In [14]:
# regex = re.complie('.*raw.*')
spans = soup.find_all('span', {'class': lambda x: x and 'raw' in x})

In [20]:
reviews = [span.text for span in spans[5:]]
reviews

['Best place for dinner in Linz. Centrally located in old town, and a great Waygu burger on the menu. They have lots of different steak options, which I have never heard complaints about. Tried the seasonal fish here and that was a big mistake. Stick to the burger, trust me!!This place is always busy so I highly recommend a reservation. This place is good for a nice dinner and also good enough to take out of town guests and colleagues.',
 'Good food and beer selection. Nice atmosphere and view of the Dom. Very relaxing. I will return next time i am in Linz.',
 'Very nice restaurant and bar. The location is conveniently located right by the Cathedral and you can park underground for easy access to both locations. The service was great and the waiting time was minimal. The atmosphere of the restaurant has the old style feel of living in the past. The food was great and some dishes were very unique on taste and visual appearance which was very yummy. If you are used to an American style t

# Load Reviews into DataFrame and Score

In [23]:
df = pd.DataFrame(data=reviews, columns=['review'])
df

Unnamed: 0,review
0,Best place for dinner in Linz. Centrally locat...
1,Good food and beer selection. Nice atmosphere ...
2,Very nice restaurant and bar. The location is ...
3,"During my travels, stopped in to have a burge..."
4,"awesome food (the burgers are amazing, so are ..."
5,this place is amazing. huge selection ales and...


In [25]:
df.review.iloc[0]

'Best place for dinner in Linz. Centrally located in old town, and a great Waygu burger on the menu. They have lots of different steak options, which I have never heard complaints about. Tried the seasonal fish here and that was a big mistake. Stick to the burger, trust me!!This place is always busy so I highly recommend a reservation. This place is good for a nice dinner and also good enough to take out of town guests and colleagues.'

In [26]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

sentiment_score(df.review.iloc[0])

5

In [28]:
df['sentiment'] = df.review.apply(lambda x: sentiment_score(x))
df

Unnamed: 0,review,sentiment
0,Best place for dinner in Linz. Centrally locat...,5
1,Good food and beer selection. Nice atmosphere ...,4
2,Very nice restaurant and bar. The location is ...,4
3,"During my travels, stopped in to have a burge...",5
4,"awesome food (the burgers are amazing, so are ...",5
5,this place is amazing. huge selection ales and...,5


# Using Pipeline

In [29]:
from transformers import pipeline

classifier = pipeline('sentiment-analysis',
                    model='nlptown/bert-base-multilingual-uncased-sentiment')

In [30]:
classifier(
    [df.review.iloc[0], df.review.iloc[1]]
)

[{'label': '5 stars', 'score': 0.869023323059082},
 {'label': '4 stars', 'score': 0.5727699398994446}]