In [48]:
# text-wrap for notebook output
from IPython.display import HTML, display

def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))
get_ipython().events.register('pre_run_cell', set_css)

In [93]:
"""
TODO
1. Make a request to the ebay.com and get a page
2. collect data from each detail page
3. collect all links to details pages of each product
4. write scrapped data to a csv file
"""

import requests
from bs4 import BeautifulSoup
import re
import csv

def get_page(url):
    response=requests.get(url)

    if not response.ok:
        print('Server responded:', response.status_code)
        pass
        
    else:
        soup=BeautifulSoup(response.text, 'lxml') #response and lxml parser
        return soup

def get_detail_data(soup):
    # title
    try:
        title=soup.find('h1', id='itemTitle').get_text()[16:]
    except:
        title=""

    # currency & price
    try:
        try:
            p=soup.find('span',id="prcIsum").get_text()
        except:
            p=soup.find('span',id="mm-saleDscPrc").get_text()
        cur_reg=re.compile("[a-zA-Z\s$]+")
        currency=cur_reg.findall(p)[0]
        price_reg=re.compile("\d+.\d+")
        price=price_reg.findall(p)[0]
    except:
        currency=""
        price=""

    # item sold
    try:
        sold=soup.find('span',{'class':'w2b-sgl'}).get_text().split(' ')
        if "sold" not in sold:
            sold=soup.find('span',{'class':'vi-txt-underline'}).get_text().split(' ')[0]
        else:
            sold=sold[0]
    except:
        sold=""

    # data in dictionary
    data={
        'title':title,
        'price':price,
        'currency':currency,
        'total sold':sold
    }
    return data


def get_index_data(url):
    urls_all=[]
    for page_num in range(1,999):
        url_new=url+"&_pgn={}".format(page_num)
        soup=get_page(url)
        try:
            links=soup.findAll('a',{"class":"s-item__link"})
        except:
            pass

        urls_page=[item.get('href') for item in links]
        urls_all.append(urls_page)
    return urls_all


def write_csv(data, url):
    with open('output.csv','a') as csvfile:
        writer=csv.writer(csvfile)
        row=[data['title'], data['price'], data['currency'], data['total sold'],
             url]
        writer.writerow(row)


# manage calls from other functions and collect scrapped data
def main():
    url="https://www.ebay.com/sch/i.html?_nkw=book"
    products=get_index_data(url)

    for link in products:
        data = get_detail_data(get_page(link))
        write_csv(data,link)


# create entry point for scraper
if __name__ == '__main__':
    main()

KeyboardInterrupt: ignored