# install transformers using pip

In [1]:
!pip install -q transformers


# import the required libraries

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
import torch
import requests

# Define tokenizer and model

In [3]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

# test the tokenizer and model on a sample

In [4]:
text = 'the plan is working for us now'

In [5]:
token = tokenizer.encode(text, return_tensors='pt')

In [6]:
result = model(token)

In [7]:
result

SequenceClassifierOutput([('logits',
                           tensor([[-2.3277, -1.5535,  0.5637,  1.4955,  1.3144]],
                                  grad_fn=<AddmmBackward>))])

# use torch argmax to acquire the sentiment of the text

In [8]:
int(torch.argmax(result.logits))+1

4

# extract reviews from yelp using beautifulsoup

In [9]:
r = requests.get('https://www.yelp.com/biz/siam-orchid-traditional-thai-massage-san-francisco-2')
soup = BeautifulSoup(r.text, 'html.parser')

In [10]:
regex = re.compile('.*comment.*')

In [11]:
result = soup.find_all('p', {'class':regex})

In [12]:
texts = [r.text for r in result]

# convert the collected reviews to a pandas dataframe

In [13]:
df = pd.DataFrame(np.asanyarray(texts), columns=['reviews'])

In [14]:
df.head()

Unnamed: 0,reviews
0,My experience this past weekend was transforma...
1,I had an amazing time getting a massage from J...
2,This place is amazing! Very clean and Covid sa...
3,"Now, this is massage! Two hrs of Thai deep tis..."
4,Definitely seems like I'm in the minority here...


# define a function for labeling

In [15]:
def annotate(text):
  token = tokenizer.encode(text, return_tensors='pt')
  result = model(token)
  return int(torch.argmax(result.logits)) + 1

# applying the fucntion 

In [16]:
df['label'] = df.reviews.apply(lambda x: annotate(x[:512]))

In [17]:
df.head()

Unnamed: 0,reviews,label
0,My experience this past weekend was transforma...,5
1,I had an amazing time getting a massage from J...,5
2,This place is amazing! Very clean and Covid sa...,5
3,"Now, this is massage! Two hrs of Thai deep tis...",5
4,Definitely seems like I'm in the minority here...,3


# convert the numeric label to positvie, negative and neutral

In [18]:
def rate(label):
  if label <3:
    return 'negative'
  if label == 3:
    return 'neutral'
  else:
    return 'positive'

In [19]:
df['sentiment'] = df.label.apply(rate)
df.head()

Unnamed: 0,reviews,label,sentiment
0,My experience this past weekend was transforma...,5,positive
1,I had an amazing time getting a massage from J...,5,positive
2,This place is amazing! Very clean and Covid sa...,5,positive
3,"Now, this is massage! Two hrs of Thai deep tis...",5,positive
4,Definitely seems like I'm in the minority here...,3,neutral


# convert the sentiment to a binary sentiment

In [20]:
df['binary_sentiment'] = df['sentiment'].map({'positive':1, 'neutral':1, 'negative':0})
df.head()

Unnamed: 0,reviews,label,sentiment,binary_sentiment
0,My experience this past weekend was transforma...,5,positive,1
1,I had an amazing time getting a massage from J...,5,positive,1
2,This place is amazing! Very clean and Covid sa...,5,positive,1
3,"Now, this is massage! Two hrs of Thai deep tis...",5,positive,1
4,Definitely seems like I'm in the minority here...,3,neutral,1


#Reference
 
 [HuggingFace](https://huggingface.co/nlptown/bert-base-multilingual-uncased-sentiment)


 [YELP](https://www.yelp.com/biz/siam-orchid-traditional-thai-massage-san-francisco-2)