In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [2]:
r = requests.get('http://disruptrstudio.com/AI/reviews.html')

In [3]:
print(r.status_code)

200


In [4]:
soup = BeautifulSoup(r.text, 'html.parser')

In [5]:
divs = soup.findAll(class_='we-truncate we-truncate--multi-line we-truncate--interactive ember-view we-customer-review__body')

In [6]:
reviews = []
for div in divs:
   reviews.append(div.find('p').text)

In [8]:
df = pd.DataFrame(np.array(reviews), columns=['reviews'])

In [9]:
df.head(15)

Unnamed: 0,reviews
0,Thanks to Dubai it is easy application saves t...
1,This for sure has been the most interactive Mo...
2,The response to my request for police report w...
3,My iOS software is 12.4.4 and i canât downlo...
4,When i open the app it closes unexpectedly in ...
5,Despite the hefty fines usage of mobile phones...
6,It has everything and speed things up!
7,I like the new update a lot specially the part...
8,I have reported a simple accident and I got th...
9,"Quick, Advanced and User friendly application."


In [10]:
len(df['reviews'])

280

In [11]:
df ['word_count'] = df['reviews'].apply(lambda x: len(x.split()))

In [12]:
df['char_count'] = df['reviews'].apply(lambda x: len(x))

In [13]:
df.head()

Unnamed: 0,reviews,word_count,char_count
0,Thanks to Dubai it is easy application saves t...,14,88
1,This for sure has been the most interactive Mo...,30,163
2,The response to my request for police report w...,18,97
3,My iOS software is 12.4.4 and i canât downlo...,25,119
4,When i open the app it closes unexpectedly in ...,25,129


In [14]:
def average_words(x):
    words = x.split()
    return sum(len(word) for word in words) / len(words)

In [15]:
df['average_words_length'] = df['reviews'].apply(lambda x: average_words(x))

In [16]:
df.head()

Unnamed: 0,reviews,word_count,char_count,average_words_length
0,Thanks to Dubai it is easy application saves t...,14,88,5.285714
1,This for sure has been the most interactive Mo...,30,163,4.466667
2,The response to my request for police report w...,18,97,4.444444
3,My iOS software is 12.4.4 and i canât downlo...,25,119,3.8
4,When i open the app it closes unexpectedly in ...,25,129,4.2


In [17]:
#Filtering stop words
from nltk.corpus import stopwords

In [18]:
stop_words = stopwords.words('english')

In [19]:
len(stop_words)

179

In [20]:
df['stopword_count'] = df['reviews'].apply(lambda x: len([word for word in x.split() if word.lower() in stop_words]))

In [21]:
df['stopword_rate'] = df['stopword_count'] / df['word_count']

In [23]:
df.head()

Unnamed: 0,reviews,word_count,char_count,average_words_length,stopword_count,stopword_rate
0,Thanks to Dubai it is easy application saves t...,14,88,5.285714,5,0.357143
1,This for sure has been the most interactive Mo...,30,163,4.466667,15,0.5
2,The response to my request for police report w...,18,97,4.444444,8,0.444444
3,My iOS software is 12.4.4 and i canât downlo...,25,119,3.8,11,0.44
4,When i open the app it closes unexpectedly in ...,25,129,4.2,8,0.32


In [207]:
df.to_csv(r'DPReviews.csv', sep='\t', encoding='utf-8', header='true')

In [28]:
df['lowercase'] = df['reviews'].apply(lambda x: " ".join(word.lower() for word in x.split()))

In [29]:
df.head()

Unnamed: 0,reviews,word_count,char_count,average_words_length,stopword_count,stopword_rate,lowercase
0,Thanks to Dubai it is easy application saves t...,14,88,5.285714,5,0.357143,thanks to dubai it is easy application saves t...
1,This for sure has been the most interactive Mo...,30,163,4.466667,15,0.5,this for sure has been the most interactive mo...
2,The response to my request for police report w...,18,97,4.444444,8,0.444444,the response to my request for police report w...
3,My iOS software is 12.4.4 and i canât downlo...,25,119,3.8,11,0.44,my ios software is 12.4.4 and i canât downlo...
4,When i open the app it closes unexpectedly in ...,25,129,4.2,8,0.32,when i open the app it closes unexpectedly in ...


In [31]:
df['puntuaction'] = df['lowercase'].str.replace('[^\w\s]','')

In [34]:
from nltk.corpus import stopwords
stop_words = stopwords.words('english')
stop_words

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [38]:
df['stopwords'] = df ['puntuaction'].apply(lambda x: " ".join(word for word in x.split() if word not in stop_words))

In [41]:
df.head()

Unnamed: 0,reviews,word_count,char_count,average_words_length,stopword_count,stopword_rate,lowercase,puntuaction,stopwords
0,Thanks to Dubai it is easy application saves t...,14,88,5.285714,5,0.357143,thanks to dubai it is easy application saves t...,thanks to dubai it is easy application saves t...,thanks dubai easy application saves time getti...
1,This for sure has been the most interactive Mo...,30,163,4.466667,15,0.5,this for sure has been the most interactive mo...,this for sure has been the most interactive mo...,sure interactive mobile app iâve ever used wis...
2,The response to my request for police report w...,18,97,4.444444,8,0.444444,the response to my request for police report w...,the response to my request for police report w...,response request police report prompt acted up...
3,My iOS software is 12.4.4 and i canât downlo...,25,119,3.8,11,0.44,my ios software is 12.4.4 and i canât downlo...,my ios software is 1244 and i canât download t...,ios software 1244 canât download app donât kno...
4,When i open the app it closes unexpectedly in ...,25,129,4.2,8,0.32,when i open the app it closes unexpectedly in ...,when i open the app it closes unexpectedly in ...,open app closes unexpectedly like 57 seconds i...


In [46]:
pd.Series(" ".join(df['stopwords']).split()).value_counts()[:30]

app            88
police         74
dubai          71
easy           35
application    32
good           32
best           30
excellent      25
thanks         23
great          22
service        20
friendly       20
user           19
use            18
time           16
useful         14
helpful        14
services       13
thank          13
much           12
like           12
world          11
really         11
always         10
keep           10
uae             9
ð               9
one             9
fine            9
love            9
dtype: int64

In [47]:
other_stop_words = ['much', 'keep', 'ð', 'really', 'one']

In [48]:
len(other_stop_words)

5

In [50]:
df['cleanreviews'] = df['stopwords'].apply(lambda x: " ".join(word for word in x.split() if word not in other_stop_words))

In [53]:
df.head()

Unnamed: 0,reviews,word_count,char_count,average_words_length,stopword_count,stopword_rate,lowercase,puntuaction,stopwords,cleanreviews
0,Thanks to Dubai it is easy application saves t...,14,88,5.285714,5,0.357143,thanks to dubai it is easy application saves t...,thanks to dubai it is easy application saves t...,thanks dubai easy application saves time getti...,thanks dubai easy application saves time getti...
1,This for sure has been the most interactive Mo...,30,163,4.466667,15,0.5,this for sure has been the most interactive mo...,this for sure has been the most interactive mo...,sure interactive mobile app iâve ever used wis...,sure interactive mobile app iâve ever used wis...
2,The response to my request for police report w...,18,97,4.444444,8,0.444444,the response to my request for police report w...,the response to my request for police report w...,response request police report prompt acted up...,response request police report prompt acted up...
3,My iOS software is 12.4.4 and i canât downlo...,25,119,3.8,11,0.44,my ios software is 12.4.4 and i canât downlo...,my ios software is 1244 and i canât download t...,ios software 1244 canât download app donât kno...,ios software 1244 canât download app donât kno...
4,When i open the app it closes unexpectedly in ...,25,129,4.2,8,0.32,when i open the app it closes unexpectedly in ...,when i open the app it closes unexpectedly in ...,open app closes unexpectedly like 57 seconds i...,open app closes unexpectedly like 57 seconds i...


# Lemmatization

In [56]:
from textblob import Word

In [61]:
df['lemmatized'] = df['cleanreviews'].apply(lambda x: " ".join (Word(word).lemmatize() for word in x.split()))

In [62]:
df.head()

Unnamed: 0,reviews,word_count,char_count,average_words_length,stopword_count,stopword_rate,lowercase,puntuaction,stopwords,cleanreviews,lemmatized
0,Thanks to Dubai it is easy application saves t...,14,88,5.285714,5,0.357143,thanks to dubai it is easy application saves t...,thanks to dubai it is easy application saves t...,thanks dubai easy application saves time getti...,thanks dubai easy application saves time getti...,thanks dubai easy application save time gettin...
1,This for sure has been the most interactive Mo...,30,163,4.466667,15,0.5,this for sure has been the most interactive mo...,this for sure has been the most interactive mo...,sure interactive mobile app iâve ever used wis...,sure interactive mobile app iâve ever used wis...,sure interactive mobile app iâve ever used wis...
2,The response to my request for police report w...,18,97,4.444444,8,0.444444,the response to my request for police report w...,the response to my request for police report w...,response request police report prompt acted up...,response request police report prompt acted up...,response request police report prompt acted up...
3,My iOS software is 12.4.4 and i canât downlo...,25,119,3.8,11,0.44,my ios software is 12.4.4 and i canât downlo...,my ios software is 1244 and i canât download t...,ios software 1244 canât download app donât kno...,ios software 1244 canât download app donât kno...,io software 1244 canât download app donât know...
4,When i open the app it closes unexpectedly in ...,25,129,4.2,8,0.32,when i open the app it closes unexpectedly in ...,when i open the app it closes unexpectedly in ...,open app closes unexpectedly like 57 seconds i...,open app closes unexpectedly like 57 seconds i...,open app close unexpectedly like 57 second iph...


# Sentiment Analysis

In [63]:
from textblob import TextBlob

In [65]:
df['polarity'] = df['lemmatized'].apply(lambda x: TextBlob(x).sentiment[0])

In [69]:
df['subjectivity'] = df['lemmatized'].apply(lambda x: TextBlob(x).sentiment[1])

In [74]:
df.drop(['lowercase', 'puntuaction', 'stopwords', 'cleanreviews', 'lemmatized'], axis=1, inplace=True)

In [77]:
df.sort_values(by='polarity')

Unnamed: 0,reviews,word_count,char_count,average_words_length,stopword_count,stopword_rate,polarity,subjectivity
202,App not working with iPhone. Unable to work th...,15,75,4.066667,5,0.333333,-0.500000,0.500000
164,Sometime the app crashes randomly.,5,34,6.000000,1,0.200000,-0.500000,0.500000
131,The robot doesnt understand my questions and i...,9,59,5.666667,4,0.444444,-0.400000,0.800000
116,Very slow and always keep loading and nothing ...,9,53,5.000000,3,0.333333,-0.300000,0.400000
17,We need to pay for the black points. There is ...,12,55,3.666667,7,0.583333,-0.166667,0.433333
170,the app is not working even with a full intern...,22,119,4.454545,11,0.500000,-0.116667,0.738889
105,"Terrible app, no customer service or support, ...",16,107,5.750000,5,0.312500,-0.105556,0.694444
223,"Save alot of time, just in case of anything us...",35,184,4.285714,14,0.400000,-0.093750,0.321429
16,Very slow.. Doing it for the 5 time but not ready,11,49,3.545455,7,0.636364,-0.050000,0.450000
112,"Take ages to load , this app took long time to...",19,90,3.789474,6,0.315789,-0.050000,0.400000


In [78]:
df.describe()

Unnamed: 0,word_count,char_count,average_words_length,stopword_count,stopword_rate,polarity,subjectivity
count,280.0,280.0,280.0,280.0,280.0,280.0,280.0
mean,9.964286,55.664286,4.949465,3.835714,0.327651,0.399189,0.471313
std,7.55663,41.264878,1.303219,3.658565,0.184584,0.350747,0.324801
min,1.0,4.0,2.333333,0.0,0.0,-0.5,0.0
25%,4.75,24.0,4.142857,1.0,0.222222,0.0,0.2
50%,8.0,43.0,4.707143,3.0,0.333333,0.4,0.5
75%,14.0,78.0,5.5,6.0,0.451136,0.7,0.741667
max,35.0,184.0,12.0,18.0,0.8,1.0,1.0
