# Getting the data from the website

In [2]:
import requests
from bs4 import BeautifulSoup

In [3]:
r = requests.get('https://www.yelp.com/biz/tesla-san-francisco?osq=Tesla+Dealership')

In [4]:
#check request status
r.status_code

200

In [5]:
r.text



In [18]:
#getting the review block from the website
soup = BeautifulSoup(r.text,'html.parser')
#soup.findAll(class_="comment__09f24__gu0rG css-1sufhje")
divs = soup.findAll(class_="comment__09f24__gu0rG css-1sufhje")


In [23]:
#adding the review text to review array
reviews = []
for div in divs:
    reviews.append(div.find('span').text)
    #print(div.find('span').text,'\n')


# Analysing the data

In [47]:
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
import nltk

In [28]:
df = pd.DataFrame(np.array(reviews),columns=['review'])
len(df['review'])

10

In [32]:
#df['review'].apply(lambda x: len(x.split()))
df['word_count']= df['review'].apply(lambda x: len(x.split()))
df['char_count']= df['review'].apply(lambda x: len(x))

In [33]:
def average_words(x):
    words = x.split()
    return sum(len(word) for word in words)/len(words)

In [34]:
df['avg_word_length']= df['review'].apply(lambda x: average_words(x))

In [48]:
nltk.download('stopwords')
stop_words = stopwords.words('english')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/venkat/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [50]:
df['stopword_coun'] = df['review'].apply(lambda x: len([x for x in x.split() if x in stop_words]))

# cleaning the data

In [51]:
# Lower case all words
df['review_lower'] = df['review'].apply(lambda x: " ".join(x.lower() for x in x.split()))
# Remove Punctuation
df['review_nopunc'] = df['review_lower'].str.replace('[^\w\s]', '')

  df['review_nopunc'] = df['review_lower'].str.replace('[^\w\s]', '')


In [52]:
# Remove Stopwords
df['review_nopunc_nostop'] = df['review_nopunc'].apply(lambda x: " ".join(x for x in x.split() if x not in stop_words))

In [57]:
pd.Series(" ".join(df['review_nopunc_nostop']).split()).value_counts()

service          16
car              13
tesla            11
called            9
appointment       8
                 ..
kids              1
helpful           1
understanding     1
incredibly        1
avoided           1
Length: 442, dtype: int64

In [58]:
other_stopwords = ['get', 'us', 'see', 'use', 'said', 'asked', 'day', 'go' 
  'even', 'ive', 'right', 'left', 'always', 'would', 'told', 
  'get', 'us', 'would', 'get', 'one', 'ive', 'go', 'even', 
  'also', 'ever', 'x', 'take', 'let' ]

In [59]:
df['review_nopunc_nostop_nocommon'] = df['review_nopunc_nostop'].apply(lambda x: "".join(" ".join(x for x in x.split() if x not in other_stopwords)))

In [60]:
pd.Series(" ".join(df['review_nopunc_nostop_nocommon']).split()).value_counts()

service          16
car              13
tesla            11
called            9
appointment       8
                 ..
understanding     1
incredibly        1
shop              1
long              1
avoided           1
Length: 425, dtype: int64

# Lemmatization

In [62]:
# Import textblob
from textblob import Word

In [67]:
#nltk.download('wordnet')
#nltk.download('omw-1.4')
# Lemmatize final review format
df['cleaned_review'] = df['review_nopunc_nostop_nocommon'].apply(lambda x: " ".join([Word(word).lemmatize() for word in x.split()]))

[nltk_data] Downloading package wordnet to /Users/venkat/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /Users/venkat/nltk_data...
[nltk_data]   Unzipping corpora/omw-1.4.zip.


# Sentiment analysis

In [68]:
from textblob import TextBlob

In [69]:
# Calculate polarity
df['polarity'] = df['cleaned_review'].apply(lambda x: TextBlob(x).sentiment[0])
# Calculate subjectivity
df['subjectivity'] = df['cleaned_review'].apply(lambda x: TextBlob(x).sentiment[1])

In [70]:
df.head()

Unnamed: 0,review,word_count,char_count,avg_word_length,stopword_coun,review_lower,review_nopunc,review_nopunc_nostop,review_nopunc_nostop_nocommon,cleaned_review,polarity,subjectivity
0,"Consider the Tesla Model 3. Mine is fun, which...",41,230,4.609756,14,"consider the tesla model 3. mine is fun, which...",consider the tesla model 3 mine is fun which b...,consider tesla model 3 mine fun brings joy con...,consider tesla model 3 mine fun brings joy con...,consider tesla model 3 mine fun brings joy con...,0.4,0.375
1,My experience with Tesla has been nothing shor...,397,2066,4.20403,155,my experience with tesla has been nothing shor...,my experience with tesla has been nothing shor...,experience tesla nothing short complete disast...,experience tesla nothing short complete disast...,experience tesla nothing short complete disast...,0.093529,0.521601
2,This is for our amazing experience with the se...,110,587,4.3,49,this is for our amazing experience with the se...,this is for our amazing experience with the se...,amazing experience service center went beyond ...,amazing experience service center went beyond ...,amazing experience service center went beyond ...,0.213542,0.458333
3,Do you love having your car held hostage for t...,59,309,4.254237,24,do you love having your car held hostage for t...,do you love having your car held hostage for t...,love car held hostage things arent fault locat...,love car held hostage things arent fault locat...,love car held hostage thing arent fault locati...,-0.066667,0.566667
4,I am appalled by the poor service at this Tesl...,134,696,4.171642,58,i am appalled by the poor service at this tesl...,i am appalled by the poor service at this tesl...,appalled poor service tesla location first can...,appalled poor service tesla location first can...,appalled poor service tesla location first can...,-0.039773,0.462085


In [71]:
df.drop(['review_lower','review_nopunc','review_nopunc_nostop','review_nopunc_nostop_nocommon'],axis=1,inplace=True)

In [74]:
df.sort_values(by='polarity')
df.head()


Unnamed: 0,review,word_count,char_count,avg_word_length,stopword_coun,cleaned_review,polarity,subjectivity
0,"Consider the Tesla Model 3. Mine is fun, which...",41,230,4.609756,14,consider tesla model 3 mine fun brings joy con...,0.4,0.375
1,My experience with Tesla has been nothing shor...,397,2066,4.20403,155,experience tesla nothing short complete disast...,0.093529,0.521601
2,This is for our amazing experience with the se...,110,587,4.3,49,amazing experience service center went beyond ...,0.213542,0.458333
3,Do you love having your car held hostage for t...,59,309,4.254237,24,love car held hostage thing arent fault locati...,-0.066667,0.566667
4,I am appalled by the poor service at this Tesl...,134,696,4.171642,58,appalled poor service tesla location first can...,-0.039773,0.462085
