# Scrapping using BeautifulSoup

In [83]:
import requests

In [84]:
from bs4 import BeautifulSoup

In [85]:
r=requests.get('https://www.yelp.com/biz/tesla-san-francisco?osq=Tesla+Dealership')

In [86]:
r.status_code

200

In [87]:
r.text



In [88]:
soup= BeautifulSoup(r.text,'html.parser')

In [89]:
divs=soup.findAll(class_="y-css-cluvhg")

In [90]:
soup = BeautifulSoup(r.text, 'html.parser')
results = soup.findAll(class_='raw__09f24__T4Ezm', attrs={'lang':'en'})
reviews = [result.text for result in results]

In [91]:
reviews[0]

"Staff is very professional and courteous.  Once you get inside, it's beautiful and spacious.  The waiting room has free drinks, coffee and snacks.  Plenty of tables and couches to relax or get on your iPad:  I would give them 5 stars, but I'm. It crazy about the location on Van Ness."

# Analyzing the data

In [92]:
import pandas as pd
import numpy as np


In [93]:
df=pd.DataFrame(np.array(reviews), columns=['review'])

In [94]:
df

Unnamed: 0,review
0,Staff is very professional and courteous. Onc...
1,"Had my car all day for a simple tire change, t..."
2,I have an old Tesla and it's getting a little ...
3,Kevin who provided the car for a demo drive ch...
4,DO NOT go there. I had my car serviced (they ...
5,Came here to get warranty work done as my driv...
6,"I was told I would get an IRS tax rebate, even..."
7,Helena KElon Musk!Is climbing the highest moun...
8,I am giving one star for the young man Kenny. ...
9,I took my car to Tesla for a recall. First- th...


In [95]:
len(df['review'])

11

In [96]:
df['word_count']=df['review'].apply(lambda x: len(x.split()))

In [97]:
df['char_count']=df['review'].apply(lambda x: len(x))

In [98]:
df

Unnamed: 0,review,word_count,char_count
0,Staff is very professional and courteous. Onc...,51,284
1,"Had my car all day for a simple tire change, t...",68,355
2,I have an old Tesla and it's getting a little ...,135,769
3,Kevin who provided the car for a demo drive ch...,201,1220
4,DO NOT go there. I had my car serviced (they ...,47,257
5,Came here to get warranty work done as my driv...,84,445
6,"I was told I would get an IRS tax rebate, even...",92,484
7,Helena KElon Musk!Is climbing the highest moun...,125,745
8,I am giving one star for the young man Kenny. ...,59,303
9,I took my car to Tesla for a recall. First- th...,96,454


In [99]:
def avg_word(review):
  words = review.split()
  return (sum(len(word) for word in words) / len(words))

# Calculate average words
df['avg_word'] = df['review'].apply(lambda x: avg_word(x))

In [100]:
df

Unnamed: 0,review,word_count,char_count,avg_word
0,Staff is very professional and courteous. Onc...,51,284,4.509804
1,"Had my car all day for a simple tire change, t...",68,355,4.235294
2,I have an old Tesla and it's getting a little ...,135,769,4.703704
3,Kevin who provided the car for a demo drive ch...,201,1220,5.074627
4,DO NOT go there. I had my car serviced (they ...,47,257,4.446809
5,Came here to get warranty work done as my driv...,84,445,4.261905
6,"I was told I would get an IRS tax rebate, even...",92,484,4.25
7,Helena KElon Musk!Is climbing the highest moun...,125,745,4.96
8,I am giving one star for the young man Kenny. ...,59,303,4.152542
9,I took my car to Tesla for a recall. First- th...,96,454,3.739583


In [101]:
from nltk.corpus import stopwords

In [102]:
import nltk

In [103]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [104]:
stop_words=stopwords.words('english')

In [105]:
stop_words

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [106]:
df['stopword_count'] = df['review'].apply(lambda x: len([x for x in x.split() if x in stop_words]))


In [107]:
df['stopword_rate']=df['stopword_count']/df['word_count']

In [108]:
df

Unnamed: 0,review,word_count,char_count,avg_word,stopword_count,stopword_rate
0,Staff is very professional and courteous. Onc...,51,284,4.509804,19,0.372549
1,"Had my car all day for a simple tire change, t...",68,355,4.235294,33,0.485294
2,I have an old Tesla and it's getting a little ...,135,769,4.703704,57,0.422222
3,Kevin who provided the car for a demo drive ch...,201,1220,5.074627,67,0.333333
4,DO NOT go there. I had my car serviced (they ...,47,257,4.446809,19,0.404255
5,Came here to get warranty work done as my driv...,84,445,4.261905,30,0.357143
6,"I was told I would get an IRS tax rebate, even...",92,484,4.25,39,0.423913
7,Helena KElon Musk!Is climbing the highest moun...,125,745,4.96,43,0.344
8,I am giving one star for the young man Kenny. ...,59,303,4.152542,25,0.423729
9,I took my car to Tesla for a recall. First- th...,96,454,3.739583,41,0.427083


In [109]:
df.sort_values(by='stopword_rate')

Unnamed: 0,review,word_count,char_count,avg_word,stopword_count,stopword_rate
3,Kevin who provided the car for a demo drive ch...,201,1220,5.074627,67,0.333333
7,Helena KElon Musk!Is climbing the highest moun...,125,745,4.96,43,0.344
5,Came here to get warranty work done as my driv...,84,445,4.261905,30,0.357143
0,Staff is very professional and courteous. Onc...,51,284,4.509804,19,0.372549
4,DO NOT go there. I had my car serviced (they ...,47,257,4.446809,19,0.404255
2,I have an old Tesla and it's getting a little ...,135,769,4.703704,57,0.422222
8,I am giving one star for the young man Kenny. ...,59,303,4.152542,25,0.423729
6,"I was told I would get an IRS tax rebate, even...",92,484,4.25,39,0.423913
9,I took my car to Tesla for a recall. First- th...,96,454,3.739583,41,0.427083
10,Wow! The best tesla service center I have ever...,109,531,3.880734,47,0.431193


# Data Cleaning

In [110]:
df['lower_case']=df['review'].str.lower()

In [111]:

df

Unnamed: 0,review,word_count,char_count,avg_word,stopword_count,stopword_rate,lower_case
0,Staff is very professional and courteous. Onc...,51,284,4.509804,19,0.372549,staff is very professional and courteous. onc...
1,"Had my car all day for a simple tire change, t...",68,355,4.235294,33,0.485294,"had my car all day for a simple tire change, t..."
2,I have an old Tesla and it's getting a little ...,135,769,4.703704,57,0.422222,i have an old tesla and it's getting a little ...
3,Kevin who provided the car for a demo drive ch...,201,1220,5.074627,67,0.333333,kevin who provided the car for a demo drive ch...
4,DO NOT go there. I had my car serviced (they ...,47,257,4.446809,19,0.404255,do not go there. i had my car serviced (they ...
5,Came here to get warranty work done as my driv...,84,445,4.261905,30,0.357143,came here to get warranty work done as my driv...
6,"I was told I would get an IRS tax rebate, even...",92,484,4.25,39,0.423913,"i was told i would get an irs tax rebate, even..."
7,Helena KElon Musk!Is climbing the highest moun...,125,745,4.96,43,0.344,helena kelon musk!is climbing the highest moun...
8,I am giving one star for the young man Kenny. ...,59,303,4.152542,25,0.423729,i am giving one star for the young man kenny. ...
9,I took my car to Tesla for a recall. First- th...,96,454,3.739583,41,0.427083,i took my car to tesla for a recall. first- th...


In [117]:
pip install neattext

Collecting neattext
  Downloading neattext-0.1.3-py3-none-any.whl.metadata (12 kB)
Downloading neattext-0.1.3-py3-none-any.whl (114 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/114.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.7/114.7 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: neattext
Successfully installed neattext-0.1.3


In [118]:
import neattext.functions as ntx

In [120]:
df['punctuation']=df['lower_case'].apply(ntx.remove_puncts)
#df['clean_tweet'].head()

In [121]:
df

Unnamed: 0,review,word_count,char_count,avg_word,stopword_count,stopword_rate,lower_case,punctuation
0,Staff is very professional and courteous. Onc...,51,284,4.509804,19,0.372549,staff is very professional and courteous. onc...,staff is very professional and courteous once...
1,"Had my car all day for a simple tire change, t...",68,355,4.235294,33,0.485294,"had my car all day for a simple tire change, t...",had my car all day for a simple tire change te...
2,I have an old Tesla and it's getting a little ...,135,769,4.703704,57,0.422222,i have an old tesla and it's getting a little ...,i have an old tesla and its getting a little l...
3,Kevin who provided the car for a demo drive ch...,201,1220,5.074627,67,0.333333,kevin who provided the car for a demo drive ch...,kevin who provided the car for a demo drive ch...
4,DO NOT go there. I had my car serviced (they ...,47,257,4.446809,19,0.404255,do not go there. i had my car serviced (they ...,do not go there i had my car serviced (they w...
5,Came here to get warranty work done as my driv...,84,445,4.261905,30,0.357143,came here to get warranty work done as my driv...,came here to get warranty work done as my driv...
6,"I was told I would get an IRS tax rebate, even...",92,484,4.25,39,0.423913,"i was told i would get an irs tax rebate, even...",i was told i would get an irs tax rebate even ...
7,Helena KElon Musk!Is climbing the highest moun...,125,745,4.96,43,0.344,helena kelon musk!is climbing the highest moun...,helena kelon muskis climbing the highest mount...
8,I am giving one star for the young man Kenny. ...,59,303,4.152542,25,0.423729,i am giving one star for the young man kenny. ...,i am giving one star for the young man kenny h...
9,I took my car to Tesla for a recall. First- th...,96,454,3.739583,41,0.427083,i took my car to tesla for a recall. first- th...,i took my car to tesla for a recall first the ...


In [123]:
df['stopwords']=df['punctuation'].apply(ntx.remove_stopwords)

In [124]:
df

Unnamed: 0,review,word_count,char_count,avg_word,stopword_count,stopword_rate,lower_case,punctuation,stopwords
0,Staff is very professional and courteous. Onc...,51,284,4.509804,19,0.372549,staff is very professional and courteous. onc...,staff is very professional and courteous once...,staff professional courteous inside beautiful ...
1,"Had my car all day for a simple tire change, t...",68,355,4.235294,33,0.485294,"had my car all day for a simple tire change, t...",had my car all day for a simple tire change te...,car day simple tire change texted end day wasn...
2,I have an old Tesla and it's getting a little ...,135,769,4.703704,57,0.422222,i have an old tesla and it's getting a little ...,i have an old tesla and its getting a little l...,old tesla getting little long tooth recently n...
3,Kevin who provided the car for a demo drive ch...,201,1220,5.074627,67,0.333333,kevin who provided the car for a demo drive ch...,kevin who provided the car for a demo drive ch...,kevin provided car demo drive cheerfully answe...
4,DO NOT go there. I had my car serviced (they ...,47,257,4.446809,19,0.404255,do not go there. i had my car serviced (they ...,do not go there i had my car serviced (they w...,car serviced (they worked suspension) forgot n...
5,Came here to get warranty work done as my driv...,84,445,4.261905,30,0.357143,came here to get warranty work done as my driv...,came here to get warranty work done as my driv...,came warranty work driver headlight went bad c...
6,"I was told I would get an IRS tax rebate, even...",92,484,4.25,39,0.423913,"i was told i would get an irs tax rebate, even...",i was told i would get an irs tax rebate even ...,told irs tax rebate thought told salesmen inco...
7,Helena KElon Musk!Is climbing the highest moun...,125,745,4.96,43,0.344,helena kelon musk!is climbing the highest moun...,helena kelon muskis climbing the highest mount...,helena kelon muskis climbing highest mount wor...
8,I am giving one star for the young man Kenny. ...,59,303,4.152542,25,0.423729,i am giving one star for the young man kenny. ...,i am giving one star for the young man kenny h...,giving star young man kenny great personality ...
9,I took my car to Tesla for a recall. First- th...,96,454,3.739583,41,0.427083,i took my car to tesla for a recall. first- th...,i took my car to tesla for a recall first the ...,took car tesla recall person check rude sent a...


# Lemmatization

In [125]:
from textblob import Word

In [127]:
nltk.download('wordnet')


[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [130]:
df['lemmatize']=df['stopwords'].apply(lambda x: " ".join(Word(word).lemmatize() for word in x.split()))

In [131]:
df

Unnamed: 0,review,word_count,char_count,avg_word,stopword_count,stopword_rate,lower_case,punctuation,stopwords,lemmatize
0,Staff is very professional and courteous. Onc...,51,284,4.509804,19,0.372549,staff is very professional and courteous. onc...,staff is very professional and courteous once...,staff professional courteous inside beautiful ...,staff professional courteous inside beautiful ...
1,"Had my car all day for a simple tire change, t...",68,355,4.235294,33,0.485294,"had my car all day for a simple tire change, t...",had my car all day for a simple tire change te...,car day simple tire change texted end day wasn...,car day simple tire change texted end day wasn...
2,I have an old Tesla and it's getting a little ...,135,769,4.703704,57,0.422222,i have an old tesla and it's getting a little ...,i have an old tesla and its getting a little l...,old tesla getting little long tooth recently n...,old tesla getting little long tooth recently n...
3,Kevin who provided the car for a demo drive ch...,201,1220,5.074627,67,0.333333,kevin who provided the car for a demo drive ch...,kevin who provided the car for a demo drive ch...,kevin provided car demo drive cheerfully answe...,kevin provided car demo drive cheerfully answe...
4,DO NOT go there. I had my car serviced (they ...,47,257,4.446809,19,0.404255,do not go there. i had my car serviced (they ...,do not go there i had my car serviced (they w...,car serviced (they worked suspension) forgot n...,car serviced (they worked suspension) forgot n...
5,Came here to get warranty work done as my driv...,84,445,4.261905,30,0.357143,came here to get warranty work done as my driv...,came here to get warranty work done as my driv...,came warranty work driver headlight went bad c...,came warranty work driver headlight went bad c...
6,"I was told I would get an IRS tax rebate, even...",92,484,4.25,39,0.423913,"i was told i would get an irs tax rebate, even...",i was told i would get an irs tax rebate even ...,told irs tax rebate thought told salesmen inco...,told irs tax rebate thought told salesman inco...
7,Helena KElon Musk!Is climbing the highest moun...,125,745,4.96,43,0.344,helena kelon musk!is climbing the highest moun...,helena kelon muskis climbing the highest mount...,helena kelon muskis climbing highest mount wor...,helena kelon muskis climbing highest mount wor...
8,I am giving one star for the young man Kenny. ...,59,303,4.152542,25,0.423729,i am giving one star for the young man kenny. ...,i am giving one star for the young man kenny h...,giving star young man kenny great personality ...,giving star young man kenny great personality ...
9,I took my car to Tesla for a recall. First- th...,96,454,3.739583,41,0.427083,i took my car to tesla for a recall. first- th...,i took my car to tesla for a recall first the ...,took car tesla recall person check rude sent a...,took car tesla recall person check rude sent a...


# Sentiment Analysis

In [132]:
from textblob import TextBlob

In [137]:
df['polarity']=df['lemmatize'].apply(lambda x: TextBlob(x).sentiment[0])

In [138]:
df['subjectivity']=df['lemmatize'].apply(lambda x: TextBlob(x).sentiment[1])

In [139]:
df

Unnamed: 0,review,word_count,char_count,avg_word,stopword_count,stopword_rate,lower_case,punctuation,stopwords,lemmatize,polarity,subjectivity
0,Staff is very professional and courteous. Onc...,51,284,4.509804,19,0.372549,staff is very professional and courteous. onc...,staff is very professional and courteous once...,staff professional courteous inside beautiful ...,staff professional courteous inside beautiful ...,0.27,0.76
1,"Had my car all day for a simple tire change, t...",68,355,4.235294,33,0.485294,"had my car all day for a simple tire change, t...",had my car all day for a simple tire change te...,car day simple tire change texted end day wasn...,car day simple tire change texted end day wasn...,0.15,0.439286
2,I have an old Tesla and it's getting a little ...,135,769,4.703704,57,0.422222,i have an old tesla and it's getting a little ...,i have an old tesla and its getting a little l...,old tesla getting little long tooth recently n...,old tesla getting little long tooth recently n...,0.130147,0.566176
3,Kevin who provided the car for a demo drive ch...,201,1220,5.074627,67,0.333333,kevin who provided the car for a demo drive ch...,kevin who provided the car for a demo drive ch...,kevin provided car demo drive cheerfully answe...,kevin provided car demo drive cheerfully answe...,0.183642,0.614198
4,DO NOT go there. I had my car serviced (they ...,47,257,4.446809,19,0.404255,do not go there. i had my car serviced (they ...,do not go there i had my car serviced (they w...,car serviced (they worked suspension) forgot n...,car serviced (they worked suspension) forgot n...,0.333333,0.833333
5,Came here to get warranty work done as my driv...,84,445,4.261905,30,0.357143,came here to get warranty work done as my driv...,came here to get warranty work done as my driv...,came warranty work driver headlight went bad c...,came warranty work driver headlight went bad c...,0.038988,0.501786
6,"I was told I would get an IRS tax rebate, even...",92,484,4.25,39,0.423913,"i was told i would get an irs tax rebate, even...",i was told i would get an irs tax rebate even ...,told irs tax rebate thought told salesmen inco...,told irs tax rebate thought told salesman inco...,0.1,0.475
7,Helena KElon Musk!Is climbing the highest moun...,125,745,4.96,43,0.344,helena kelon musk!is climbing the highest moun...,helena kelon muskis climbing the highest mount...,helena kelon muskis climbing highest mount wor...,helena kelon muskis climbing highest mount wor...,0.316667,0.674074
8,I am giving one star for the young man Kenny. ...,59,303,4.152542,25,0.423729,i am giving one star for the young man kenny. ...,i am giving one star for the young man kenny h...,giving star young man kenny great personality ...,giving star young man kenny great personality ...,0.5,0.4125
9,I took my car to Tesla for a recall. First- th...,96,454,3.739583,41,0.427083,i took my car to tesla for a recall. first- th...,i took my car to tesla for a recall first the ...,took car tesla recall person check rude sent a...,took car tesla recall person check rude sent a...,-0.246875,0.6125


In [140]:
df.sort_values(by='polarity')

Unnamed: 0,review,word_count,char_count,avg_word,stopword_count,stopword_rate,lower_case,punctuation,stopwords,lemmatize,polarity,subjectivity
9,I took my car to Tesla for a recall. First- th...,96,454,3.739583,41,0.427083,i took my car to tesla for a recall. first- th...,i took my car to tesla for a recall first the ...,took car tesla recall person check rude sent a...,took car tesla recall person check rude sent a...,-0.246875,0.6125
5,Came here to get warranty work done as my driv...,84,445,4.261905,30,0.357143,came here to get warranty work done as my driv...,came here to get warranty work done as my driv...,came warranty work driver headlight went bad c...,came warranty work driver headlight went bad c...,0.038988,0.501786
6,"I was told I would get an IRS tax rebate, even...",92,484,4.25,39,0.423913,"i was told i would get an irs tax rebate, even...",i was told i would get an irs tax rebate even ...,told irs tax rebate thought told salesmen inco...,told irs tax rebate thought told salesman inco...,0.1,0.475
2,I have an old Tesla and it's getting a little ...,135,769,4.703704,57,0.422222,i have an old tesla and it's getting a little ...,i have an old tesla and its getting a little l...,old tesla getting little long tooth recently n...,old tesla getting little long tooth recently n...,0.130147,0.566176
1,"Had my car all day for a simple tire change, t...",68,355,4.235294,33,0.485294,"had my car all day for a simple tire change, t...",had my car all day for a simple tire change te...,car day simple tire change texted end day wasn...,car day simple tire change texted end day wasn...,0.15,0.439286
10,Wow! The best tesla service center I have ever...,109,531,3.880734,47,0.431193,wow! the best tesla service center i have ever...,wow the best tesla service center i have ever ...,wow best tesla service center previous experie...,wow best tesla service center previous experie...,0.173333,0.379167
3,Kevin who provided the car for a demo drive ch...,201,1220,5.074627,67,0.333333,kevin who provided the car for a demo drive ch...,kevin who provided the car for a demo drive ch...,kevin provided car demo drive cheerfully answe...,kevin provided car demo drive cheerfully answe...,0.183642,0.614198
0,Staff is very professional and courteous. Onc...,51,284,4.509804,19,0.372549,staff is very professional and courteous. onc...,staff is very professional and courteous once...,staff professional courteous inside beautiful ...,staff professional courteous inside beautiful ...,0.27,0.76
7,Helena KElon Musk!Is climbing the highest moun...,125,745,4.96,43,0.344,helena kelon musk!is climbing the highest moun...,helena kelon muskis climbing the highest mount...,helena kelon muskis climbing highest mount wor...,helena kelon muskis climbing highest mount wor...,0.316667,0.674074
4,DO NOT go there. I had my car serviced (they ...,47,257,4.446809,19,0.404255,do not go there. i had my car serviced (they ...,do not go there i had my car serviced (they w...,car serviced (they worked suspension) forgot n...,car serviced (they worked suspension) forgot n...,0.333333,0.833333
