In [26]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [27]:
# Function to extract Product Title
def get_title(parser):
    
    try:
        title = parser.find("span", attrs={"id":"productTitle"}).text.strip()
        
    except AttributeError:
        title = ''
        
    return title    

# Function to extract Product Price
def get_price(parser):
    
    try:
        price = parser.find("span", attrs={'class': 'a-price-whole'}).text
        
    except AttributeError:
        price = ''
        
    if '.' in price:
        price = price.strip('.')
        return price
    
    else:
        return price

# Function to extract Product Rating
def get_rating(parser):
    
    try:
        rating_in_string = parser.find("span", attrs={'class': 'a-icon-alt'}).text
        
        count = 0
        new_ratings = ''
        
        # sample rating_in_string - '4.8 out of 5 stars' 
        # Modifying the string to only fetch the actual rating(4.8) and convert it to float.
        
        for x in rating_in_string:
            count += 1
            new_ratings += x
            if count == 3:
                break
                
    except AttributeError:
        new_ratings = ''
                
    if new_ratings == 'Pre':
        return 'No Ratings'
    else:
        return float(new_ratings)
    
# Function to extract Number of User Reviews
def get_review_count(parser):
    
    try:
        count = parser.find("span", attrs={'id':'acrCustomerReviewText'}).text
    
    except AttributeError:
        count = ''
    
    if count == '':
        return 'No Reviews'
    
    return count.split(' ')[0] 

# Function to extract Availability Status
def get_availability(parser):
    
    try:
        avail = parser.find("div", attrs={'id' : 'availability'})
        avail = avail.find("span", attrs={'class':'a-size-medium a-color-success'}).text.strip()
        
    except AttributeError:
        avail = 'Not Available'
        
    if avail == '':
        return 'Not Available'
        
    return avail    
      

In [29]:
if __name__ == '__main__':
    
    # The webpage URL
    URL = "https://www.amazon.in/s?k=fossil+watches+for+men&crid=F9V2HJV8ICHJ&sprefix=fossil+watches%2Caps%2C214&ref=nb_sb_ss_ts-doa-p_3_14"
    
    # add your user agent 
    HEADERS = ({'User-Agent': '', 'Accept-Language': 'en-US, en;q=0.5'})
    
    # HTTP Request
    response = requests.get(URL, headers=HEADERS)
    
    # fetch response content
    content = response.content
    
    # Soup Object containing all data
    parser = BeautifulSoup(content, 'html.parser')
    
    # Fetch links as List of Tag Objects
    links = parser.find_all("a", attrs={'class': 'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})
    
    # Store the links inside a list
    list_of_links = []
    
    # Loop for extracting links from Tag Objects
    for link in links:
        href = link.get('href')
        list_of_links.append(href)
      
    # New dictionary that stores required values from the webpage as a list
    new_dict = {"Title":[], "Price in ₹":[], "Rating(Out of 5)":[], "Total reviews":[],"Availability":[]}  
    
    # Loop for extracting product details from each link 
    for link in list_of_links:
        
        actual_link = 'https://amazon.in' + link
        
        new_response = requests.get(actual_link, headers=HEADERS)
        
        new_parser = BeautifulSoup(new_response.content, 'html.parser')
        
        # Function calls to display all necessary product information
        productTitle = get_title(new_parser)
        productPrice = get_price(new_parser)
        productRating = get_rating(new_parser)
        productReview = get_review_count(new_parser)
        productAvailability = get_availability(new_parser)
        
        # Adding the result of function calls to appropriate dictionary keys
        new_dict['Title'].append(productTitle)
        new_dict['Price in ₹'].append(productPrice)
        new_dict['Rating(Out of 5)'].append(productRating)
        new_dict['Total reviews'].append(productReview)
        new_dict['Availability'].append(productAvailability)
    
    # Convert python dictionary to pandas dataframe
    amazon_df = pd.DataFrame.from_dict(new_dict)
    
    # Replacing empty title values with null, NaN stands for "missing data" in pandas
    amazon_df['Title'].replace('', np.nan, inplace=True)
    
    # Dropping the data/rows with null title values as they don't make sense.
    amazon_df = amazon_df.dropna(subset=['Title'])
    
    # Finally convert the dataframe to csv file.
    amazon_df.to_csv("amazon_watch_data.csv", header=True, index=False)    
    

In [38]:
# Command to display all the rows of pandas dataframe.
pd.options.display.max_rows = None

amazon_df

Unnamed: 0,Title,Price in ₹,Rating(Out of 5),Total reviews,Availability
0,Fossil Autocross Analog Black Dial Men's Watch...,8995.0,4.0,2,Only 3 left in stock.
1,Fossil Bannon Analog Green Dial Men's Watch-BQ...,8995.0,4.4,59,In stock.
2,Fossil Bronson Analog Black Dial Men's Watch-F...,10995.0,4.5,2,Only 3 left in stock.
3,Fossil Stay Fashionable Watch,14995.0,4.3,295,In stock.
4,Fossil Analog Black Dial Men's Watch-FS5164,5997.0,4.3,2029,In stock.
5,Fossil Gen 5 Touchscreen Men's Smartwatch with...,9198.0,3.8,14491,Not Available
6,Fossil Analog Blue Dial Men's Watch - FS5237,7145.0,4.3,1328,In stock.
7,Fossil Chronograph White Dial Men's Watch-CH2882,5195.0,4.3,3149,In stock.
8,"Fossil Gen 5E Smartwatch with AMOLED Screen, W...",11995.0,3.9,11756,In stock.
9,Fossil Chronograph Men's Watch (Black Dial Bla...,7145.0,4.1,1335,In stock.
