# Amazon Customer Reviews (Web-Scraping)

## Importing the Libraries

In [1]:
from bs4 import BeautifulSoup
import requests

In [2]:
#base_url Changes according to the review pages
base_url = "https://www.amazon.in/Test-Exclusive-547/product-reviews/B078BNQ318/ref=cm_cr_arp_d_paging_btm_next_2?ie=UTF8&reviewerType=all_reviews&pageNumber={}"

In [3]:
#Checking the page information and trying to retrive them with class names
url = "https://www.amazon.in/Test-Exclusive-547/product-reviews/B078BNQ318/ref=cm_cr_arp_d_paging_btm_next_2?ie=UTF8&reviewerType=all_reviews&pageNumber=2"
page = requests.get(url)
page
soup = BeautifulSoup(page.content,'html.parser')  #making a object of BeautifulSoup


In [4]:
#Retriving the Name of Customer by Inspect Method and Checking the Class
names = soup.select('span.a-profile-name')
len(names)     

#Every review page Contains 10 reviews but here it is 12 beacuse every page contain two extra reviews (i.e, Top Positive review and Top Critical Review)

12

In [5]:
# Droping the Top Positive review and Top Critical Review
names = soup.select('span.a-profile-name')[2:] 
names

[<span class="a-profile-name">Vikas Shah</span>,
 <span class="a-profile-name">Prasad G.</span>,
 <span class="a-profile-name">Vishal Baboo</span>,
 <span class="a-profile-name">Himanshu Chaudhary</span>,
 <span class="a-profile-name">Shivangi Sahni</span>,
 <span class="a-profile-name">K Sudarshan Reddy</span>,
 <span class="a-profile-name">Amazon Customer</span>,
 <span class="a-profile-name">Raghul</span>,
 <span class="a-profile-name">Sanjay kr gupta</span>,
 <span class="a-profile-name">Pranip</span>]

In [6]:
len(names)

10

In [7]:
#Retriving the Title 
titles = soup.select('a.review-title span')
titles

[<span>best and most reliable phone at this price</span>,
 <span>Iconic but Camera could have been even better</span>,
 <span>Not as expected, Think before buy....</span>,
 <span>Camera not upto the mark.</span>,
 <span>Warranty card not received</span>,
 <span>Not a good deal for camera</span>,
 <span>Awful Amoled Screen</span>,
 <span>Two major defects on the phone and now replacing it</span>,
 <span>Not satisfied</span>,
 <span>Don't buy this device..</span>]

In [8]:
#Retriving the Star vote
stars = soup.select('span.a-icon-alt')[3:]
stars

[<span class="a-icon-alt">5.0 out of 5 stars</span>,
 <span class="a-icon-alt">5.0 out of 5 stars</span>,
 <span class="a-icon-alt">2.0 out of 5 stars</span>,
 <span class="a-icon-alt">2.0 out of 5 stars</span>,
 <span class="a-icon-alt">1.0 out of 5 stars</span>,
 <span class="a-icon-alt">1.0 out of 5 stars</span>,
 <span class="a-icon-alt">1.0 out of 5 stars</span>,
 <span class="a-icon-alt">1.0 out of 5 stars</span>,
 <span class="a-icon-alt">1.0 out of 5 stars</span>,
 <span class="a-icon-alt">1.0 out of 5 stars</span>]

In [9]:
#Retriving the Product name
product = soup.select('a.a-link-normal')[0].get_text()
product

'OnePlus 8 (Glacial Green 6GB RAM+128GB Storage)'

In [10]:
#Retriving the dates
dates = soup.select('span.review-date')[2:]
dates

[<span class="a-size-base a-color-secondary review-date" data-hook="review-date">Reviewed in India on 21 June 2020</span>,
 <span class="a-size-base a-color-secondary review-date" data-hook="review-date">Reviewed in India on 17 June 2020</span>,
 <span class="a-size-base a-color-secondary review-date" data-hook="review-date">Reviewed in India on 17 June 2020</span>,
 <span class="a-size-base a-color-secondary review-date" data-hook="review-date">Reviewed in India on 19 June 2020</span>,
 <span class="a-size-base a-color-secondary review-date" data-hook="review-date">Reviewed in India on 18 June 2020</span>,
 <span class="a-size-base a-color-secondary review-date" data-hook="review-date">Reviewed in India on 23 June 2020</span>,
 <span class="a-size-base a-color-secondary review-date" data-hook="review-date">Reviewed in India on 23 June 2020</span>,
 <span class="a-size-base a-color-secondary review-date" data-hook="review-date">Reviewed in India on 25 June 2020</span>,
 <span class="a-

In [13]:
#Making list to contain values for every columns
cust_name = []
ratings = []
rev_title = []
rev_date = []
rev_text = []
product_name = []

for n in range(1,11):    #Loop for changing the pages, here range is number of pages
    scrape_url = base_url.format(n)    #Changing the url for every page
    res = requests.get(scrape_url)
    
    soup = BeautifulSoup(res.text,'lxml')
    product = soup.select('a.a-link-normal')[0]
    names = soup.select('span.a-profile-name')[2:]
    titles = soup.select('a.review-title span')
    stars = soup.select('span.a-icon-alt')[3:]
    dates = soup.select('span.review-date')[2:]
    text = soup.select("span.review-text-content span")
   
    #To append each customer values as each page contains 10 reviews
    for i in range(10):
        product_name.append(product.get_text())
        cust_name.append(names[i].get_text())
        ratings.append(stars[i].get_text())
        rev_title.append(titles[i].get_text())
        rev_date.append(dates[i].get_text().replace("Reviewed in India on ",""))
        rev_text.append(text[i].get_text().strip( '\n' ))

In [14]:
#Forming a Data Frame
import pandas as pd
df = pd.DataFrame()
df['Product'] = product_name
df['Date'] = rev_date
df['Customer Name'] = cust_name
df['Ratings'] = ratings
df['Title'] = rev_title
df['Reviews'] =  rev_text

In [15]:
#Saving the DataFrame to the local machine
df.to_csv('amazon_oneplus8_Reviews.csv')

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Product        100 non-null    object
 1   Date           100 non-null    object
 2   Customer Name  100 non-null    object
 3   Ratings        100 non-null    object
 4   Title          100 non-null    object
 5   Reviews        100 non-null    object
dtypes: object(6)
memory usage: 4.8+ KB


In [17]:
df.sample(10)

Unnamed: 0,Product,Date,Customer Name,Ratings,Title,Reviews
47,OnePlus 8 (Glacial Green 6GB RAM+128GB Storage),7 August 2020,VRKREDDY,5.0 out of 5 stars,Very good phone,I loved it. Actually this is placed for my b...
26,OnePlus 8 (Glacial Green 6GB RAM+128GB Storage),22 June 2020,Shubham,1.0 out of 5 stars,Not worth the price,Camera is mediocre. Finger print detection i...
31,OnePlus 8 (Glacial Green 6GB RAM+128GB Storage),28 June 2020,Mayank Agrawal,1.0 out of 5 stars,DON'T PURCHASE,Worst phone. Don't purchase from Amazon. App...
81,OnePlus 8 (Glacial Green 6GB RAM+128GB Storage),21 June 2020,Bapi Majumder,5.0 out of 5 stars,Beast Of ONEPLUS Breed...😎,A great power comes with great responsibilit...
4,OnePlus 8 (Glacial Green 6GB RAM+128GB Storage),18 June 2020,Aparna Uniyal,1.0 out of 5 stars,Battery/Heating/Network issue,I have bought one plus 8 just 3 days back an...
23,OnePlus 8 (Glacial Green 6GB RAM+128GB Storage),17 June 2020,Dr. Mohammed Riyaz Sayyed,5.0 out of 5 stars,Just one word for this beauty killer killer ki...,OnePlus ❤️ again u proove that u r the only...
67,OnePlus 8 (Glacial Green 6GB RAM+128GB Storage),17 August 2020,Joe Thomas,4.0 out of 5 stars,Great Phone,I was using one plus 5t since 2018. No issue...
32,OnePlus 8 (Glacial Green 6GB RAM+128GB Storage),14 July 2020,Megha,1.0 out of 5 stars,Dont buy one plus products,There's always an issue with my speaker wher...
38,OnePlus 8 (Glacial Green 6GB RAM+128GB Storage),27 June 2020,Sunil Sharma,1.0 out of 5 stars,Heating problem,Phone got heating issue and network issue.we...
63,OnePlus 8 (Glacial Green 6GB RAM+128GB Storage),16 September 2020,Arul Chendhur,5.0 out of 5 stars,Worth the money 💰💰💰,"Its the overall good phone, and i have used ..."


In [18]:
df['Reviews'][2]

"  Great phone!Gaming performance is so great, device don't get heat up at all even at high settings!I played PUBG Mobile and details it provides were so exciting and make it feel real. Shadows of tree,stones, mud, etc. If you have small hands don't buy it for gaming, don't feel comfortable with small hands.Camera is good, can't say it is awesome!Battery life is great too, charge too fast!UI is as expected stock android, some pre-installed third party apps were also installed, you can uninstall all of them except Netflix.The back cover they provide is loose and cheap. Have to buy a new better one!Over all a great phone!Cheers! :)"

In [20]:
df1 = pd.read_csv('amazon_Customer_Reviews.csv')
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 790 entries, 0 to 789
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Unnamed: 0     790 non-null    int64 
 1   Product        790 non-null    object
 2   Date           790 non-null    object
 3   Customer Name  790 non-null    object
 4   Ratings        790 non-null    object
 5   Title          790 non-null    object
 6   Reviews        790 non-null    object
dtypes: int64(1), object(6)
memory usage: 43.3+ KB


In [21]:
df1.head()

Unnamed: 0.1,Unnamed: 0,Product,Date,Customer Name,Ratings,Title,Reviews
0,0,Apple iPhone XR (128GB) - Black,12 December 2018,Sameer Patil,3.0 out of 5 stars,"Which iPhone you should Purchase ? iPhone 8, X...",NOTE:@ This is detailed comparison between i...
1,1,Apple iPhone XR (128GB) - Black,17 November 2018,Amazon Customer,1.0 out of 5 stars,Don't buy iPhone xr from Amazon.,Very bad experience with this iPhone xr phon...
2,2,Apple iPhone XR (128GB) - Black,27 January 2019,A,5.0 out of 5 stars,Happy with the purchase,Amazing phone with amazing camera coming fro...
3,3,Apple iPhone XR (128GB) - Black,2 May 2019,Shubham Dutta,1.0 out of 5 stars,Amazon is not an apple authorised reseller. Pl...,So I got the iPhone XR just today. The produ...
4,4,Apple iPhone XR (128GB) - Black,24 May 2019,Nepuni Lokho,5.0 out of 5 stars,Excellent Battery life and buttery smooth UI,I've been an android user all my life until ...


In [22]:
df2 = pd.read_csv("amazon_Samsung Galaxy Note 20 Ultra 5G_Reviews.csv")
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Unnamed: 0     100 non-null    int64 
 1   Product        100 non-null    object
 2   Date           100 non-null    object
 3   Customer Name  100 non-null    object
 4   Ratings        100 non-null    object
 5   Title          100 non-null    object
 6   Reviews        100 non-null    object
dtypes: int64(1), object(6)
memory usage: 5.6+ KB


In [23]:
df1 = df1.append(df2, ignore_index=True, sort=False)

In [24]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 890 entries, 0 to 889
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Unnamed: 0     890 non-null    int64 
 1   Product        890 non-null    object
 2   Date           890 non-null    object
 3   Customer Name  890 non-null    object
 4   Ratings        890 non-null    object
 5   Title          890 non-null    object
 6   Reviews        890 non-null    object
dtypes: int64(1), object(6)
memory usage: 48.8+ KB


In [25]:
df1.head()

Unnamed: 0.1,Unnamed: 0,Product,Date,Customer Name,Ratings,Title,Reviews
0,0,Apple iPhone XR (128GB) - Black,12 December 2018,Sameer Patil,3.0 out of 5 stars,"Which iPhone you should Purchase ? iPhone 8, X...",NOTE:@ This is detailed comparison between i...
1,1,Apple iPhone XR (128GB) - Black,17 November 2018,Amazon Customer,1.0 out of 5 stars,Don't buy iPhone xr from Amazon.,Very bad experience with this iPhone xr phon...
2,2,Apple iPhone XR (128GB) - Black,27 January 2019,A,5.0 out of 5 stars,Happy with the purchase,Amazing phone with amazing camera coming fro...
3,3,Apple iPhone XR (128GB) - Black,2 May 2019,Shubham Dutta,1.0 out of 5 stars,Amazon is not an apple authorised reseller. Pl...,So I got the iPhone XR just today. The produ...
4,4,Apple iPhone XR (128GB) - Black,24 May 2019,Nepuni Lokho,5.0 out of 5 stars,Excellent Battery life and buttery smooth UI,I've been an android user all my life until ...


In [26]:
df1.drop('Unnamed: 0',axis=1,inplace=True)

In [27]:
df1.head()

Unnamed: 0,Product,Date,Customer Name,Ratings,Title,Reviews
0,Apple iPhone XR (128GB) - Black,12 December 2018,Sameer Patil,3.0 out of 5 stars,"Which iPhone you should Purchase ? iPhone 8, X...",NOTE:@ This is detailed comparison between i...
1,Apple iPhone XR (128GB) - Black,17 November 2018,Amazon Customer,1.0 out of 5 stars,Don't buy iPhone xr from Amazon.,Very bad experience with this iPhone xr phon...
2,Apple iPhone XR (128GB) - Black,27 January 2019,A,5.0 out of 5 stars,Happy with the purchase,Amazing phone with amazing camera coming fro...
3,Apple iPhone XR (128GB) - Black,2 May 2019,Shubham Dutta,1.0 out of 5 stars,Amazon is not an apple authorised reseller. Pl...,So I got the iPhone XR just today. The produ...
4,Apple iPhone XR (128GB) - Black,24 May 2019,Nepuni Lokho,5.0 out of 5 stars,Excellent Battery life and buttery smooth UI,I've been an android user all my life until ...
