## eBay Baseball Card Data Scraper

In [44]:
import requests
import numpy as np
import pandas as pd
from scipy import stats
from bs4 import BeautifulSoup as bs

In [47]:
def get_grade(title):

    index = ''
    string = ''

    grading_companies = ['PSA', 'psa', 'BGS', 'bgs', 'HGA', 'hga', 'SGC', 'sgc']
    for company in grading_companies:
        index = title.find(company)
        if (index != -1):
            return title[index:index + 7]
    
    return None

def get_sale_type(item):
    sale = getattr(item.find('span', {'class': 's-item__purchase-options-with-icon'}), 'text', None)
    if sale is not None:
        return sale
    else:
        return 'Auction'
    

In [48]:
class EbayScraper:
    def __init__(self, keywords, pages):
        self.keywords = keywords
        self.pages = pages

    def add_plus(self):
        return self.keywords.replace(' ', '+')
    
    def href_builder(self):
        links = []
        keywords = self.add_plus()

        for i in range(self.pages):
            links.append('https://www.ebay.com/sch/i.html?_nkw=' + keywords + '&_sop=13&LH_Sold=1&LH_Complete=1&_pgn=' + str(i) + '&rt=nc')
        return links
        
    def get_data(self):

        soups = []
        links = self.href_builder()

        for link in links:
            page = requests.get(link)
            soup = bs(page.text, 'html.parser')
            soups.append(soup)

        return soups
        
    def parse(self):

        soups = self.get_data()
        results = []
        product_list = []

        for soup in soups:
            results.extend(soup.find_all('div', {'class': 's-item__info clearfix'}))

        for item in results:
            products = {
                'title': getattr(item.find('h3', {'class': 's-item__title'}), 'text', None),
                'grade': get_grade(getattr(item.find('h3', {'class': 's-item__title'}), 'text', None)),
                'soldprice': float(item.find('span', {'class': 's-item__price'}).text.replace("$", '').replace(',', '').strip()),
                'solddate': getattr(item.find('div', {'class': 's-item__title--tagblock'}), "POSITIVE", None),
                'saletype': get_sale_type(item),
                #'solddate': item.find('div', {'class': 's-item__title--tagblock'}).find('span', {'class': 'POSITIVE'}).text, ---- needs to be fixed
                'bids': getattr(item.find('span', {'class': 's-item__bids s-item__bidCount'}), 'text', None),
                'link': item.find('a', {'class': 's-item__link'})['href'],
                'image': getattr(item.find('img', {'class': 's-item__image-img'}), 'src', None),
            }
            product_list.append(products)
        return product_list

    def output(self):
        products = self.parse()
        df = pd.DataFrame(products)
        df.to_csv('output.csv', index=False)
        return df
    

In [49]:
zion = EbayScraper('2019 panini prizm zion williamson 248 psa 9', 8)
zion_out = zion.output()

In [50]:
lewis = EbayScraper('2020 lewis hamilton #1 psa 10', 7)
lewis_out = lewis.output()

In [51]:
lewis_out

Unnamed: 0,title,grade,soldprice,solddate,saletype,bids,link,image
0,Shop on eBay,,20.00,,Auction,,https://ebay.com/itm/123456?hash=item28caef0a3...,
1,New ListingLEWIS HAMILTON 2020 Topps Now Formu...,SGC 9.5,44.99,,or Best Offer,,https://www.ebay.com/itm/363939658412?hash=ite...,
2,LEWIS HAMILTON 2020 Topps Chrome F1 #197 Gold ...,PSA 8 P,349.99,,or Best Offer,,https://www.ebay.com/itm/155104953413?hash=ite...,
3,Lewis Hamilton 2020 Topps Chrome F1 70th ANNIV...,PSA 10,745.00,,or Best Offer,,https://www.ebay.com/itm/204063678546?hash=ite...,
4,Lewis Hamilton 2020 Topps Chrome F1 SAPPHIRE E...,PSA 9 M,495.00,,or Best Offer,,https://www.ebay.com/itm/203998078847?hash=ite...,
...,...,...,...,...,...,...,...,...
512,LEWIS HAMILTON PSA 8 2020 TOPPS CHROME F1 FORM...,PSA 8 2,1699.99,,or Best Offer,,https://www.ebay.com/itm/185523983505?hash=ite...,
513,2020 Topps Chrome F1 #1 Lewis Hamilton Sapphir...,PSA 9 M,150.00,,Auction,1 bid ·,https://www.ebay.com/itm/363934372101?hash=ite...,
514,2020 Topps Chrome Formula 1 LEWIS HAMILTON Car...,,15.50,,Auction,3 bids ·,https://www.ebay.com/itm/144671371319?hash=ite...,
515,2020 Topps Chrome Formula 1 Lewis Hamilton Gol...,,95.00,,or Best Offer,,https://www.ebay.com/itm/403741676420?hash=ite...,


In [52]:
zion_out

Unnamed: 0,title,grade,soldprice,solddate,saletype,bids,link,image
0,Shop on eBay,,20.00,,Auction,,https://ebay.com/itm/123456?hash=item28caef0a3...,
1,New Listing2019 PANINI PRIZM ZION WILLIAMSON R...,PSA 9 M,70.00,,Best offer accepted,,https://www.ebay.com/itm/155112369350?hash=ite...,
2,2019-20 Panini Prizm #248 Zion Williamson (RC)...,PSA 9,67.99,,Best offer accepted,,https://www.ebay.com/itm/314051874018?epid=240...,
3,2019-20 PANINI PRIZM RUBY WAVE #248 ZION WILLI...,PSA 9,299.00,,Best offer accepted,,https://www.ebay.com/itm/125446364542?hash=ite...,
4,🔥 2019-20 ZION WILLIAMSON PANINI SILVER PRIZM ...,PSA 9 🔥,495.00,,Buy It Now,,https://www.ebay.com/itm/314095452831?hash=ite...,
...,...,...,...,...,...,...,...,...
300,ALEN SMAILAGIC 2019-20 PANINI PRIZM DRAFT PICK...,PSA 9 M,25.00,,Buy It Now,,https://www.ebay.com/itm/125274362868?hash=ite...,
301,2019-20 Prizm Zion Williamson Rookie RC #248 P...,PSA 9 -,69.00,,or Best Offer,,https://www.ebay.com/itm/125251006785?hash=ite...,
302,2019-20 Panini Prizm ZION WILLIAMSON RC Pink P...,PSA 9 M,2999.00,,Best offer accepted,1 bid,https://www.ebay.com/itm/314042100185?hash=ite...,
303,2019 Zion Williamson Prizm #248 Silver Rookie ...,PSA 9,350.00,,Buy It Now,,https://www.ebay.com/itm/403714597011?hash=ite...,
