<a href="https://colab.research.google.com/github/linhvien/Data-Scraping/blob/main/Ebay_Scrape_Searching_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Scraping ebay search

In [1]:
import csv
import requests
from lxml import html

In [2]:
def get_page_items(tree):
    container = tree.xpath("//ul[contains(@class, 'srp-results')]")
    if container:
        return container[0].xpath(".//li[contains(@class, 's-item')]")
    else:
        return []

In [3]:
def create_search_record(item):
    title = "".join(item.xpath(".//h3/text()"))
    sub_title = "".join(item.xpath(".//div[@class='s-item__subtitle']/text()"))
    sub_title += " " + "".join(item.xpath(".//div[@class='s-item__subtitle']//span[@class='SECONDARY_INFO']/text()"))
    rating = "".join(item.xpath(".//div[@class='x-star-rating']//span[@class='clipped']/text()"))
    item_price = "".join(item.xpath(".//span[@class='s-item__price']/text()"))
    trending_price = "".join(item.xpath(".//span[@class='s-item__trending-price']/span[@class='STRIKETHROUGH']/text()"))
    item_link = "".join(item.xpath(".//a[@class='s-item__link']/@href"))
    return (title, sub_title, rating, item_price, trending_price, item_link)

In [4]:
def get_next_page(tree):
    return "".join(tree.xpath("//a[@class='pagination__next']/@href"))

In [5]:
def search_ebay(keywords):
    url = "https://www.ebay.com/sch/i.html?&_nkw=" + keywords.replace(" ", "+")
    response = requests.get(url)

    # get first page
    etree = html.fromstring(response.text)
    page_data = []

    # get remaining pages if existing
    while True:
        items = get_page_items(etree)
        if not items:
            break

        for item in items:
            page_data.append(create_search_record(item))

        next_page = get_next_page(etree)
        if not next_page:
            break

        response = requests.get(next_page)
        if response.status_code != 200:
            break

        etree = html.fromstring(response.text)

    return page_data

In [6]:
def save_results(records, filename, save_method='w'):
    with open(filename, save_method, newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Title', 'SubTitle', 'Rating', 'ItemPrice', 'TrendingPrice', 'ItemLink'])
        writer.writerows(records)

In [11]:
# run the program

results = search_ebay('mobile')

if results:
    save_results(results, 'result.csv')

In [12]:
# show the first few results
results[:3]

[('Samsung Galaxy S10+ Plus G975U T-Mobile ATT Sprint Verizon Unlocked - A+ Stock -',
  'NEW FAST CHARGER - WARRANTY - 100% TESTED+CLEAN ESNSamsung Galaxy S10+128 GBUnlocked Open Box',
  '4.5 out of 5 stars.',
  'NT$ 8,288',
  '',
  'https://www.ebay.com/itm/255025116864?epid=16029464580&_trkparms=ispr%3D1&hash=item3b60ae76c0:g:LkYAAOSw3PFg0PH4&amdata=enc%3AAQAGAAACkPYe5NmHp%252B2JMhMi7yxGiTJkPrKr5t53CooMSQt2orsSg3Ye8yTWgOW7pmE1t838du1%252BdMZUJYLAiZvOkek9z4PUnOOu9q8bLfQcEPQKGEXvwvbU7WZ59gjln8qyg2fS0%252Bh1%252BpNMIjY2Nwo1iiY3xu2HgD6nRP8VF5%252FQJ8wWfbebKGADeQF25Jq87vbDWi1sdhfIVo3LfeDRu41diUuvYdMcP%252FE4%252FZ32wCVcgSZb%252F2BQSAqNd5zLFkZ9iHX7RhS2eqhex5LMvZTAuGCvAXKHiKs8nUq28eWyiLE%252F5o3MB%252F3%252FaXW0s155s4kWvK5fcQueol9SIiKJojN314TMDJud7n%252FvXAReZ7N4dPqdnUsn5Juw6eTElI0TiiB8ZzqSlsk%252BGPJTBW7uBXWkN1ZRcd48rm9DCMPKOUa6xC87%252BT9NdNPWh%252BEw8%252BHv2%252BwboRM2tJSAsfEf1VzXFYNpbR4qh8CCoQd360cJxkJLMNkixrVqBlEBrKKdS0GFGnZeAjq2rt%252FzChFUbYNNRI3TvtzszRiGCfK7EUfkae3ViD6RFbolSIVG345C