In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('web_scrapping.csv')

In [3]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,Customer Name,Review Rating,Review Title,Review Content,Product Name
0,0,Aman More,4.0 out of 5 stars,A good daily driver.,"Pros:1) Clean and bloatfree OxygenOS, which ...","OnePlus Nord 5G (Blue Marble, 8GB RAM, 128GB S..."
1,1,Abhishek Agarwal,3.0 out of 5 stars,Bad bad camera,It's not very often I leave a critical revie...,"OnePlus Nord 5G (Blue Marble, 8GB RAM, 128GB S..."
2,2,Kiran KS,4.0 out of 5 stars,The original segment of One Plus,Battery usage update: Drains faster than oth...,"OnePlus Nord 5G (Blue Marble, 8GB RAM, 128GB S..."
3,3,Nikhil,5.0 out of 5 stars,*Read before you buy!!*,"Yea..pre-ordered on 28 July, got it on 4 Aug...","OnePlus Nord 5G (Blue Marble, 8GB RAM, 128GB S..."
4,4,Deblina Roy,2.0 out of 5 stars,Disappointing,Heavily disappointed. So much of hype and th...,"OnePlus Nord 5G (Blue Marble, 8GB RAM, 128GB S..."


### Removing upper case letters

In [4]:
df['Review Content'] = df['Review Content'].apply(lambda x: " ".join(x.lower() for x in x.split()))
df['Review Content'].head()

0    pros:1) clean and bloatfree oxygenos, which ru...
1    it's not very often i leave a critical review ...
2    battery usage update: drains faster than other...
3    yea..pre-ordered on 28 july, got it on 4 augus...
4    heavily disappointed. so much of hype and the ...
Name: Review Content, dtype: object

### Removing punctuations

In [5]:
df['Review Content'] = df['Review Content'].str.replace('[^\w\s]','')
df['Review Content'].head()

0    pros1 clean and bloatfree oxygenos which runs ...
1    its not very often i leave a critical review f...
2    battery usage update drains faster than other ...
3    yeapreordered on 28 july got it on 4 august pa...
4    heavily disappointed so much of hype and the c...
Name: Review Content, dtype: object

### Removal of stopwords

In [6]:
from nltk.corpus import stopwords
stop = stopwords.words('english')
df['Review Content'] = df['Review Content'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
df['Review Content'].sample(10)

57    giving review spending 20 days phone retail us...
38    totally impressed built looks 13k thought gett...
29    writing review using phone 7 days believe real...
87                                   super mobile phone
8     front camera bad low light photo also bad need...
9     prosmooth os lags allamoled screen good qualit...
30      dont buy 10 to15 mins gaming phone lagging much
28    obviously might done extensive research watche...
86    expect price everything perfecta beautiful mob...
68    opinion dont think much phone get attaching ni...
Name: Review Content, dtype: object

In [7]:
df.head()

Unnamed: 0.1,Unnamed: 0,Customer Name,Review Rating,Review Title,Review Content,Product Name
0,0,Aman More,4.0 out of 5 stars,A good daily driver.,pros1 clean bloatfree oxygenos runs smooth com...,"OnePlus Nord 5G (Blue Marble, 8GB RAM, 128GB S..."
1,1,Abhishek Agarwal,3.0 out of 5 stars,Bad bad camera,often leave critical review product excited ne...,"OnePlus Nord 5G (Blue Marble, 8GB RAM, 128GB S..."
2,2,Kiran KS,4.0 out of 5 stars,The original segment of One Plus,battery usage update drains faster one plus mo...,"OnePlus Nord 5G (Blue Marble, 8GB RAM, 128GB S..."
3,3,Nikhil,5.0 out of 5 stars,*Read before you buy!!*,yeapreordered 28 july got 4 august package nic...,"OnePlus Nord 5G (Blue Marble, 8GB RAM, 128GB S..."
4,4,Deblina Roy,2.0 out of 5 stars,Disappointing,heavily disappointed much hype camera even upt...,"OnePlus Nord 5G (Blue Marble, 8GB RAM, 128GB S..."


### Assigning Polarity Scores

In [8]:
from textblob import TextBlob

In [9]:
#Create quick lambda functions to find the polarity
#Polarity is float which lies in the range of [-1,1] where 1 means positive statement and -1 means a negative statement.
df['Review Content']= df['Review Content'].astype(str)
pol = lambda x: TextBlob(x).sentiment.polarity
df['polarity'] = df['Review Content'].apply(pol)
df

Unnamed: 0.1,Unnamed: 0,Customer Name,Review Rating,Review Title,Review Content,Product Name,polarity
0,0,Aman More,4.0 out of 5 stars,A good daily driver.,pros1 clean bloatfree oxygenos runs smooth com...,"OnePlus Nord 5G (Blue Marble, 8GB RAM, 128GB S...",0.215650
1,1,Abhishek Agarwal,3.0 out of 5 stars,Bad bad camera,often leave critical review product excited ne...,"OnePlus Nord 5G (Blue Marble, 8GB RAM, 128GB S...",0.083182
2,2,Kiran KS,4.0 out of 5 stars,The original segment of One Plus,battery usage update drains faster one plus mo...,"OnePlus Nord 5G (Blue Marble, 8GB RAM, 128GB S...",0.344118
3,3,Nikhil,5.0 out of 5 stars,*Read before you buy!!*,yeapreordered 28 july got 4 august package nic...,"OnePlus Nord 5G (Blue Marble, 8GB RAM, 128GB S...",0.266724
4,4,Deblina Roy,2.0 out of 5 stars,Disappointing,heavily disappointed much hype camera even upt...,"OnePlus Nord 5G (Blue Marble, 8GB RAM, 128GB S...",0.091026
...,...,...,...,...,...,...,...
95,95,Sasa,4.0 out of 5 stars,Oppo A52,chose mobile others later delivery area go loc...,"OPPO A52 (Stream White, 6GB RAM, 128GB Storage...",0.298611
96,96,Krishnakant Nigam,1.0 out of 5 stars,Heating problem,heating problem therevery bad experience,"OPPO A52 (Stream White, 6GB RAM, 128GB Storage...",-0.700000
97,97,abhishek,5.0 out of 5 stars,Camera and processor is good,nice camera,"OPPO A52 (Stream White, 6GB RAM, 128GB Storage...",0.600000
98,98,DARSHA H JIWANI,5.0 out of 5 stars,Nice phone,nice phone,"OPPO A52 (Stream White, 6GB RAM, 128GB Storage...",0.600000


In [10]:
df.to_csv('Amazon_reviews_scores.csv', index=False)