In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import numpy as np

In [2]:
# Function to extract product title
def get_title(soup):
    
    try:
        # Product title
        title = soup.find('h1', attrs={'class':'x-item-title__mainTitle'}).find('span', attrs={'class':'ux-textspans ux-textspans--BOLD'}).text
        
    except AttributeError:
        title = ""
    
    return title
    
# Function to extract product price
def get_price(soup):
    
    try:
        # Product price
        price = soup.find('div', attrs={'class':'x-price-primary'}).text
        
    except AttributeError:
        price = ""
        
    return price
        
# Function to extract quantity sold
def get_availability(soup):
    
    try:
        # Outer tag object
        available = soup.find('div', attrs={'class':'d-quantity__availability'}).text
    
    except AttributeError:
        available = 0
        
    return available

# Function to extract seller rating
def get_rating(soup):
    try:
        # Seller rating 
        rating = soup.find('div', attrs={'class':'x-sellercard-atf__info'})
        rating = rating.find('span', attrs={'class':'ux-textspans ux-textspans--PSEUDOLINK'}).text      
        
    except AttributeError:
        rating = "No rating"
    
    return rating

# Function to extract review
def get_review(soup):
    try:
        # Number of reviews for seller
        review = soup.find('li', attrs={'data-testid':'x-sellercard-atf__about-seller'})
        review = review.find('span', attrs={'class','ux-textspans ux-textspans--SECONDARY'}).text
        
    except AttributeError:
        review = "No review"
        
    return review


In [3]:
if __name__ == '__main__':
    
    # Headers for request
    headers = ({'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', 'Accept-Language': 'en-US, en;q=0.5'})
    
    # Website URL
    URL = 'https://www.ebay.com/sch/i.html?_from=R40&_nkw=shoes&_sacat=0&_odkw=nike&_osacat=0'

    # HTTP request
    webpage = requests.get(URL, headers=headers)
    
    # Parse HTML format
    soup = BeautifulSoup(webpage.content, "html.parser")
    
    # Find product links
    links = soup.find_all("a", class_="s-item__link")
    
    # Create empty list link
    links_list = []
    
    # Loop to extract each link in page
    for link in links[1:]:
        links_list.append(link.get('href'))
        
    d = {"title":[], "price":[], "availability":[], "rating":[], "review":[]}
    
    # Loop for extracting product details from each page
    for link in links_list:
        product_page = requests.get(link, headers=headers)
        product_soup = BeautifulSoup(product_page.content, "html.parser")
        
        # Function calls to show all necessary product information
        d['title'].append(get_title(product_soup))
        d['price'].append(get_price(product_soup))
        d['availability'].append(get_availability(product_soup))
        d['rating'].append(get_rating(product_soup))
        d['review'].append(get_review(product_soup))
        
    # Convert product details dictionary to dataframe
    ebay_df = pd.DataFrame.from_dict(d)
    
    # Replace empty title strings with null values
    ebay_df['title'].replace('', np.nan, inplace=True)
    
    # Drop null value titles
    ebay_df = ebay_df.dropna(subset=['title'])
    
    # Convert dataframe to csv file
    ebay_df.to_csv('ebay_shoes.csv', header=True, index=False)
    

In [4]:
ebay_df

Unnamed: 0,title,price,availability,rating,review
0,Reebok Nano X2 Men's Training Shoes,US $59.97,Limited quantity available / 357 sold,97% positive,(132726)
1,Reebok Flexagon Force 4 Men's Training Shoes,US $49.96,Limited quantity available / 586 sold,97% positive,(132726)
2,Reebok Men's Club C 85 Shoes,US $75.00,10 available / 100 sold,97% positive,(132726)
3,Reebok Club C Revenge Vintage Men's Shoes,US $90.00,10 available / 178 sold,97% positive,(132726)
4,adidas men Advantage Shoes,US $42.00,"More than 10 available / 5,867 sold",97.8% positive,(470709)
...,...,...,...,...,...
63,Men's Predator 20.3 Shoes,US $34.99,7 available / 16 sold,99.5% positive,(4494)
64,Reebok Ultra Flash Shoes,US $29.97,10 available / 39 sold,97% positive,(132726)
65,Reebok Court Advance Shoes,US $29.97,10 available / 241 sold,97% positive,(132726)
66,adidas men NMD_V3 Shoes,US $68.00,"Limited quantity available / 4,565 sold",97.8% positive,(470709)
