### Sentiment Analysis can help us decipher the mood and emotions of general public and gather insightful information regarding the context. Sentiment Analysis is a process of analyzing data and classifying it based on the need of the research 

In [None]:
import requests

In [None]:
from bs4 import BeautifulSoup

In [None]:
r = requests.get('https://www.yelp.com/biz/tesla-san-francisco?osq=Tesla+Dealership')
# you can choose any website

In [None]:
r.status_code

In [None]:
r.text

In [None]:
soup = BeautifulSoup(r.text, 'html.parser')

In [None]:
divs = soup.findAll(class_="comment__09f24__gu0rG css-1sufhje")
# choose the class which is common to the content of interest

In [None]:
reviews = []
for div in divs:
    reviews.append(div.text)

In [None]:
reviews

## Analysing the data

In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.DataFrame(np.array(reviews), columns = ['review'])

In [None]:
df.head()

In [None]:
len(df['review'])

In [None]:
df['word_count']=df['review'].apply(lambda x : len(x.split()))

In [None]:
df.head()

In [None]:
df['char_count']=df['review'].apply(lambda x : len(x))

In [None]:
df.head()

In [None]:
def average_words(x):
    words = x.split()
    return sum(len(word) for word in words)/ len(words)

In [None]:
df['avereage_word_length']= df['review'].apply(lambda x: average_words(x))

In [None]:
df.head()

In [None]:
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')

In [None]:
stop_words = stopwords.words('english')

In [None]:
df['stopword_count'] = df['review'].apply(lambda x : len([word for word in x.split() if word.lower() in stop_words]))

In [None]:
df.head()

In [None]:
df['stopword_rate']= df['stopword_count']/df['word_count']

In [None]:
df.head()

In [None]:
df.sort_values(by = 'stopword_rate')

In [None]:
df.describe()

## Data Cleaning

In [None]:
df['lowercase'] =  df['review'].apply(lambda x : " ".join(word.lower() for word in x.split()))

In [None]:
df.head()

In [None]:
df['punctuation']  = df['lowercase'].str.replace('[^\w\s]', '')

In [None]:
df.head()

In [None]:
df["stopwords"] = df['punctuation'].apply(lambda x : " ".join(word for word in x.split() if word not in stop_words))

In [None]:
df.head()

In [None]:
pd.Series(" ".join(df['stopwords']).split()).value_counts()[:50]

In [None]:
other_stop_words = ['get','also','us','new']

In [None]:
len(other_stop_words)

In [None]:
df.head()

In [None]:
df['cleanreview']  = df['stopwords'].apply(lambda x : " ".join(word for word in x.split() if word not in other_stop_words))

In [None]:
df.head()

##  Lemmatization

In [None]:
from textblob import Word
nltk.download('omw-1.4')

In [None]:
df['lemmatize']=df['cleanreview'].apply(lambda x :" ".join(Word(word).lemmatize() for word in x.split()))

In [None]:
df.head()

##  Sentiment Analysis

### TextBlob returns polarity and subjectivity of a sentence. Polarity lies between [-1,1], -1 defines a negative sentiment and 1 defines a positive sentiment. Negation words reverse the polarity. TextBlob has semantic labels that help with fine-grained analysis. For example — emoticons, exclamation mark, emojis, etc. Subjectivity lies between [0,1]. Subjectivity quantifies the amount of personal opinion and factual information contained in the text. The higher subjectivity means that the text contains personal opinion rather than factual information. 

In [None]:
df['clean_review_word_count'] = df['cleanreview'].apply(lambda x : len(x.split()))

In [None]:
from textblob import TextBlob

In [None]:
df['clean_rate']=df['clean_review_word_count']/df['word_count']

In [None]:
df.head()

In [None]:
df['polarity'] = df['lemmatize'].apply(lambda x: TextBlob(x).sentiment[0])

In [None]:
df['subjectivity'] = df['lemmatize'].apply(lambda x: TextBlob(x).sentiment[1])

In [None]:
df.head()

In [None]:
df.sort_values(by='polarity')