# Install and Import Dependencies

In [1]:
!pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio===0.8.1 -f https://download.pytorch.org/whl/torch_stable.html

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch==1.8.1+cu111
  Downloading https://download.pytorch.org/whl/cu111/torch-1.8.1%2Bcu111-cp38-cp38-linux_x86_64.whl (1982.2 MB)
[K     |█████████████▌                  | 834.1 MB 1.3 MB/s eta 0:15:09tcmalloc: large alloc 1147494400 bytes == 0x3a044000 @  0x7f466cc08615 0x5d6f4c 0x51edd1 0x51ef5b 0x4f750a 0x4997a2 0x4fd8b5 0x4997c7 0x4fd8b5 0x49abe4 0x4f5fe9 0x55e146 0x4f5fe9 0x55e146 0x4f5fe9 0x55e146 0x5d8868 0x5da092 0x587116 0x5d8d8c 0x55dc1e 0x55cd91 0x5d8941 0x49abe4 0x55cd91 0x5d8941 0x4990ca 0x5d8868 0x4997a2 0x4fd8b5 0x49abe4
[K     |█████████████████               | 1055.7 MB 1.2 MB/s eta 0:12:56tcmalloc: large alloc 1434370048 bytes == 0x7e69a000 @  0x7f466cc08615 0x5d6f4c 0x51edd1 0x51ef5b 0x4f750a 0x4997a2 0x4fd8b5 0x4997c7 0x4fd8b5 0x49abe4 0x4f5fe9 0x55e146 0x4f5fe9 0x55e146 0x4f5fe9 0x55e14

In [2]:
!pip install transformers requests beautifulsoup4 pandas numpy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 17.0 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 90.4 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 67.6 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re

# Instantiate Model
This a bert-base-multilingual-uncased model finetuned for sentiment analysis on product reviews in six languages: English, Dutch, German, French, Spanish and Italian. It predicts the sentiment of the review as a number of stars (between 1 and 5).

This model is intended for direct use as a sentiment analysis model for product reviews in any of the six languages above, or for further finetuning on related sentiment analysis tasks.

In [4]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

Downloading:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/953 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/872k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/669M [00:00<?, ?B/s]

# Encode to tokens and Calculate Sentiment

In [5]:
tokens = tokenizer.encode('It was good but couldve been better. Great', return_tensors='pt')

In [6]:
result = model(tokens)

In [7]:
result.logits

tensor([[-2.7768, -1.2353,  1.4419,  1.9804,  0.4584]],
       grad_fn=<AddmmBackward>)

In [8]:
int(torch.argmax(result.logits))+1

4

# Data - Collect Reviews from yelp

In [13]:
# can change by bussines name in yelp - requests.get('https://www.yelp.com/biz/peets-coffee-san-francisco-12')
#r = requests.get('https://www.yelp.com/biz/dunkin-south-san-francisco-2')
r = requests.get('https://www.yelp.com/biz/peets-coffee-san-francisco-12')
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile('.*comment.*')
results = soup.find_all('p', {'class':regex})
reviews = [result.text for result in results]

In [14]:
reviews

["Service has gotten a lot better here. When I went before, it was mainly one guy who was absolutely terrible. I think he is the same one others have been complaining g about. But it seems that he has chilled out now. The other employees are awesome. They are very sweet, friendly and helpful. It's a shame that the whole store got bad reviews for the actions of one or two people. But that is all it takes sometimes. I haven't had anymore bad experiences here. The guy who threw the straw at my drink started to run to the back of the store whenever I came in \xa0but he stopped. Hopefully figured out no one is out to get him. Just don't be a terrible person to your costumers.",
 "Corporate customer service number for complaints: 1-800-999-2132**press option 2I have been coming here for three months now and the barista's have been rude for no reason. Today was literally the last straw when a barista threw a straw at my drink and walked away. I will ask them to do simple things that should've

# Load Reviews into DataFrame and Score

In [21]:
import numpy as np
import pandas as pd

df = pd.DataFrame(np.array(reviews), columns=['review'])
df['review'].iloc[0]

"Service has gotten a lot better here. When I went before, it was mainly one guy who was absolutely terrible. I think he is the same one others have been complaining g about. But it seems that he has chilled out now. The other employees are awesome. They are very sweet, friendly and helpful. It's a shame that the whole store got bad reviews for the actions of one or two people. But that is all it takes sometimes. I haven't had anymore bad experiences here. The guy who threw the straw at my drink started to run to the back of the store whenever I came in \xa0but he stopped. Hopefully figured out no one is out to get him. Just don't be a terrible person to your costumers."

In [18]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [22]:
sentiment_score(df['review'].iloc[0])

5

In [23]:
df['sentiment'] = df['review'].apply(lambda x: sentiment_score(x[:512]))

In [24]:
df

Unnamed: 0,review,sentiment
0,Service has gotten a lot better here. When I w...,4
1,Corporate customer service number for complain...,1
2,Interesting place. A decent size coffee shop w...,3
3,I keep going to different coffee shops to try ...,1
4,I usually have a good experience with this pla...,4
5,Worst Peet's experience I ever had. Waited 30 ...,1
6,The two team members on duty here today get br...,5
7,Trash service. Horrible. That's why you work a...,1
8,I get my iced matcha in any Peet's and always ...,2
9,"I always like Peet's coffee, and today is the ...",2


In [25]:
df['review'].iloc[3]

"I keep going to different coffee shops to try coffee. \xa0And because I want to find the best coffee shop. \xa0What can I say about this cafe. \xa0The service in this cafe is disgusting, the coffee is not tasty. \xa0There is a Starbucks nearby, so go there. \xa0someone teach this manager how to deal with people. \xa0the girl who was at the checkout is what a manager should be there. \xa0kind smiling. \xa0easily finds a common language with everyone. \xa0even if you have problems with English. \xa0This is America, there are a lot of tourists here. \xa0Therefore, the manager must be an adequate person. Read reviews. Don't make my mistakes. There are just units here. And this will continue as long as this manager works there."