##Sports Cards Web Scraper

This program fetches details such as price, card title, vendor, and image links for graded and ungraded sports cards from the website www.sportscardspro.com, and adds them to a csv file which can be extracted by the user. To execute this process, the user inputs the url and grade for the particular card they wish to add and runs the code blocks below. When they are done adding cards, they can export the csv file by running the final code block. **NOTE: The user should have a google sheets file with the product template in their google drive in order to run this program. This file can be found on the github repository.

In [None]:
from IPython.display import Image
from decimal import *
import pandas as pd
from bs4 import BeautifulSoup
from urllib.request import urlopen
import numpy as np

In [None]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)


In [None]:
#access the products file by specifying the name of the file in your google drive below
worksheet = gc.open('product_template').sheet1
rows = worksheet.get_all_values()
products = pd.DataFrame.from_records(rows)
new_header = products.iloc[0] #grab the first row for the header
products = products[1:] #take the data less the header row
products.columns = new_header #set the header row as the df header
products

Unnamed: 0,Handle,Title,Body (HTML),Link,Vendor,Standardized Product Type,Custom Product Type,Tags,Published,Option1 Name,...,Google Shopping / Custom Label 0,Google Shopping / Custom Label 1,Google Shopping / Custom Label 2,Google Shopping / Custom Label 3,Google Shopping / Custom Label 4,Variant Image,Variant Weight Unit,Variant Tax Code,Cost per item,Status


In [None]:
def extractFeatures(url, grading_service, grade, sport, showcase):
  url = url
  page = urlopen(url)
  html = page.read().decode("utf-8")
  soup = BeautifulSoup(html, "html.parser")
  if(grade == 10):
    price = soup.find(title ="current Manual Only value")
    price = str(price)
    price = price[price.index('$'):price.find('\n', price.find('\n') + 1)]

  if(grade == 9.5):
    price = soup.find(title ="current Box Only value")
    price = str(price)
    price = price[price.index('$'):price.find('\n', price.find('\n') + 1)]

  if(grade == 9):
    price = soup.find(title ="current value in Graded condition")
    price = str(price)
    price = price[price.index('$'):price.find('\n', price.find('\n') + 1)]

  if(grade == 8):
    price = soup.find_all('span')
    price = str(price)
    priceArray = [i for i in range(2000) if price.startswith('$', i)]
    price = price[priceArray[6]:priceArray[7]]
    price = price[0:price.index("\n")]

  if(grade == 7):
    price = soup.find_all('span')
    price = str(price)
    priceArray = [i for i in range(2000) if price.startswith('$', i)]
    price = price[priceArray[3]:priceArray[4]]
    price = price[0:price.index("\n")]

  if(grade == 0):
    price = soup.find_all('span')
    price = str(price)
    priceArray = [i for i in range(2000) if price.startswith('$', i)]
    price = price[priceArray[0]:priceArray[1]]
    price = price[0:price.index("\n")]

  title = soup.find(title ="current product_name value")
  title = soup.find(title)
  title = str(title)
  title = title[title.index('<title>'):title.find('</title')]
  title = title[7:]
  title = title[:title.find('|', title.find('|') + 1)]
  title = title.replace('Prices ','')
  title = title.replace('| ','')
  title = title[:-1]

  handle = title.replace(' ','-')

  vendor = 'Other'
  if(title.find('Panini') >= 0):
      vendor = 'Panini'
  elif(title.find('Topps') >= 0):
      vendor = 'Topps'
  elif(title.find('Upper Deck') >= 0):
      vendor = 'Upper Deck'

  imageLink = soup.find(id = "product_details")
  imageLink = str(imageLink)
  imageLink = imageLink[imageLink.index('src'):imageLink.find('.jpg')]
  imageLink = imageLink[5:] + ".jpg"
  cardImage = Image(imageLink)

  markup = price
  markup = markup.replace('$','')
  markup = markup.replace(',','')
  markup = float(markup)
  markup = float(markup + .6 * (markup))
  formatted_markup = "${:,.2f}".format(markup)

  tags = sport + ", " + "Showcase" + showcase

  return handle, title, formatted_markup, vendor, sport, imageLink, grading_service, grade, showcase, tags, url

In [None]:
def extractFeaturesWithSetPrice(url, grading_service, grade, sport, showcase, price):
  url = url
  page = urlopen(url)
  html = page.read().decode("utf-8")
  soup = BeautifulSoup(html, "html.parser")
  price = price

  title = soup.find(title ="current product_name value")
  title = soup.find(title)
  title = str(title)
  title = title[title.index('<title>'):title.find('</title')]
  title = title[7:]
  title = title[:title.find('|', title.find('|') + 1)]
  title = title.replace('Prices ','')
  title = title.replace('| ','')
  title = title[:-1]

  handle = title.replace(' ','-')

  vendor = 'Other'
  if(title.find('Panini') >= 0):
      vendor = 'Panini'
  elif(title.find('Topps') >= 0):
      vendor = 'Topps'
  elif(title.find('Upper Deck') >= 0):
      vendor = 'Upper Deck'

  imageLink = soup.find(id = "product_details")
  imageLink = str(imageLink)
  imageLink = imageLink[imageLink.index('src'):imageLink.find('.jpg')]
  imageLink = imageLink[5:] + ".jpg"
  cardImage = Image(imageLink)

  markup = str(price)
  markup = markup.replace('$','')
  markup = markup.replace(',','')
  markup = float(markup)
  #markup = float(markup + .6 * (markup))
  formatted_markup = "${:,.2f}".format(markup)

  tags = sport + ", " + "Showcase" + showcase

  return handle, title, formatted_markup, vendor, sport, imageLink, grading_service, grade, showcase, tags, url

##Use the following code block to add a card and have its price set according to sportscardspro.com with a markup

In [40]:
#Use this block to have the price set by sportscardspro
#If the card is ungraded put N/A for grading service and 0 for grade
card_url = input("Enter the url of the card: ")
card_grading_service = input("Enter the grading service (PSA, SGC, etc.): ")
card_grade = float(input("Enter the grade of your card: "))
card_sport = input("Enter the sport: ")
card_showcase = input("Enter the showcase the card belongs to: ")
handle, title, markup, vendor, sport, imageLink, grading_service, grade, showcase, tags, url = extractFeatures(card_url, card_grading_service, card_grade, card_sport, card_showcase)
df2 = {'Handle': handle, 'Title': title, 'Vendor': vendor, 'Tags': tags, 'Published': 'TRUE', 'Option1 Name': 'Grading Service', 'Option1 Value': grading_service, 'Option2 Name': 'Grade', 'Option2 Value': grade, 'Option3 Name': 'Type', 'Option3 Value': sport, 'Option4 Name': 'Showcase', 'Option4 Value': showcase, 'Variant Grams': '0', 'Variant Inventory Tracker': 'shopify', 'Variant Inventory Qty': '1', 'Variant Inventory Policy': 'deny', 'Variant Fulfillment Service': 'manual', 'Variant Price': markup, 'Variant Requires Shipping': 'TRUE', 'Variant Taxable': "TRUE", 'Image Src': imageLink, 'Image Position': "1", 'Gift Card': 'FALSE','Google Shopping / Gender': 'Unisex', 'Google Shopping / Age Group': 'Adult', 'Google Shopping / AdWords Grouping': 'Sports Cards', 'Google Shopping / AdWords Labels': 'sports cards, collectible', 'Google Shopping / Condition': 'Graded', 'Google Shopping / Custom Product': 'FALSE', 'Variant Weight Unit': 'g', 'Status': 'active', 'Link': url}
products = products.append(df2, ignore_index = True)
products = products.fillna("")
products

Enter the url of the card: https://www.sportscardspro.com/game/basketball-cards-2013-panini-prizm/giannis-antetokounmpo-prizm-290
Enter the grading service (PSA, SGC, etc.): PSA
Enter the grade of your card: 10
Enter the sport: NBA
Enter the showcase the card belongs to: B


Unnamed: 0,Handle,Title,Body (HTML),Link,Vendor,Standardized Product Type,Custom Product Type,Tags,Published,Option1 Name,...,Google Shopping / Custom Label 0,Google Shopping / Custom Label 1,Google Shopping / Custom Label 2,Google Shopping / Custom Label 3,Google Shopping / Custom Label 4,Variant Image,Variant Weight Unit,Variant Tax Code,Cost per item,Status
0,JA-Morant-[Silver-Prizm]-#12-[Rookie]-2019-Pan...,JA Morant [Silver Prizm] #12 [Rookie] 2019 Pan...,,https://www.sportscardspro.com/game/basketball...,Panini,,,"NBA, ShowcaseA",True,Grading Service,...,,,,,,,g,,,active
1,JA-Morant-[Silver-Prizm]-#12-[Rookie]-2019-Pan...,JA Morant [Silver Prizm] #12 [Rookie] 2019 Pan...,,https://www.sportscardspro.com/game/basketball...,Panini,,,"NBA, ShowcaseB",True,Grading Service,...,,,,,,,g,,,active
2,Michael-Porter-Jr.-#182-2018-Panini-Donruss-Optic,Michael Porter Jr. #182 2018 Panini Donruss Optic,,https://www.sportscardspro.com/game/basketball...,Panini,,,"NBA, ShowcaseA",True,Grading Service,...,,,,,,,g,,,active
3,Joe-Burrow-[Genesis]-#201-[Rookie]-2020-Panini...,Joe Burrow [Genesis] #201 [Rookie] 2020 Panini...,,https://www.sportscardspro.com/game/football-c...,Panini,,,"NFL, ShowcaseC",True,Grading Service,...,,,,,,,g,,,active
4,Justin-Fields-#80-2021-Panini-Chronicles-Draft...,Justin Fields #80 2021 Panini Chronicles Draft...,,https://www.sportscardspro.com/game/football-c...,Panini,,,"NFL, ShowcaseA",True,Grading Service,...,,,,,,,g,,,active
5,Louis-King-[Green-Ice-Prizm]-#102-2019-Panini-...,Louis King [Green Ice Prizm] #102 2019 Panini ...,,https://www.sportscardspro.com/game/basketball...,Panini,,,"NBA, ShowcaseB",True,Grading Service,...,,,,,,,g,,,active
6,Mike-Trout-[SSP]-#27-2021-Topps,Mike Trout [SSP] #27 2021 Topps,,https://www.sportscardspro.com/game/baseball-c...,Topps,,,"MLB, ShowcaseC",True,Grading Service,...,,,,,,,g,,,active
7,Michael-Jordan-#57-[Rookie]-1986-Fleer,Michael Jordan #57 [Rookie] 1986 Fleer,,https://www.sportscardspro.com/game/basketball...,Other,,,"NBA, ShowcaseA",True,Grading Service,...,,,,,,,g,,,active
8,Joe-Montana-#1-1990-Action-Packed-All-Madden,Joe Montana #1 1990 Action Packed All Madden,,https://www.sportscardspro.com/game/football-c...,Other,,,"NFL, ShowcaseA",True,Grading Service,...,,,,,,,g,,,active
9,Michael-Jordan-#SP1-[Rookie]-1991-Upper-Deck,Michael Jordan #SP1 [Rookie] 1991 Upper Deck,,https://www.sportscardspro.com/game/baseball-c...,Upper Deck,,,"MLB, ShowcaseB",True,Grading Service,...,,,,,,,g,,,active


##Use the following code block to add a card with your own set price

In [None]:
#Use this block to add cards with your own set price
#If the card is ungraded put N/A for grading service and 0 for grade
card_url = input("Enter the url of the card: ")
card_grading_service = input("Enter the grading service (PSA, SGC, etc.): ")
card_grade = int(input("Enter the grade of your card: "))
card_sport = input("Enter the sport: ")
card_showcase = input("Enter the showcase the card belongs to: ")
card_price = input("Enter the price of the card: ")
handle, title, markup, vendor, sport, imageLink, grading_service, grade, showcase, tags, url = extractFeaturesWithSetPrice(card_url, card_grading_service, card_grade, card_sport, card_showcase, card_price)
df2 = {'Handle': handle, 'Title': title, 'Vendor': vendor, 'Tags': tags, 'Published': 'TRUE', 'Option1 Name': 'Grading Service', 'Option1 Value': grading_service, 'Option2 Name': 'Grade', 'Option2 Value': grade, 'Option3 Name': 'Type', 'Option3 Value': sport, 'Option4 Name': 'Showcase', 'Option4 Value': showcase, 'Variant Grams': '0', 'Variant Inventory Tracker': 'shopify', 'Variant Inventory Qty': '1', 'Variant Inventory Policy': 'deny', 'Variant Fulfillment Service': 'manual', 'Variant Price': markup, 'Variant Requires Shipping': 'TRUE', 'Variant Taxable': "TRUE", 'Image Src': imageLink, 'Image Position': "1", 'Gift Card': 'FALSE','Google Shopping / Gender': 'Unisex', 'Google Shopping / Age Group': 'Adult', 'Google Shopping / AdWords Grouping': 'Sports Cards', 'Google Shopping / AdWords Labels': 'sports cards, collectible', 'Google Shopping / Condition': 'Graded', 'Google Shopping / Custom Product': 'FALSE', 'Variant Weight Unit': 'g', 'Status': 'active', 'Link': url}
products = products.append(df2, ignore_index = True)
products = products.fillna("")
products

Enter the url of the card: https://www.sportscardspro.com/game/basketball-cards-2019-panini-prizm-instant-impact/ja-morant-silver-prizm-12
Enter the grading service (PSA, SGC, etc.): SGC
Enter the grade of your card: 10
Enter the sport: NBA
Enter the showcase the card belongs to: B
Enter the price of the card: 124.99


Unnamed: 0,Handle,Title,Body (HTML),Link,Vendor,Standardized Product Type,Custom Product Type,Tags,Published,Option1 Name,...,Google Shopping / Custom Label 0,Google Shopping / Custom Label 1,Google Shopping / Custom Label 2,Google Shopping / Custom Label 3,Google Shopping / Custom Label 4,Variant Image,Variant Weight Unit,Variant Tax Code,Cost per item,Status
0,JA-Morant-[Silver-Prizm]-#12-[Rookie]-2019-Pan...,JA Morant [Silver Prizm] #12 [Rookie] 2019 Pan...,,https://www.sportscardspro.com/game/basketball...,Panini,,,"NBA, ShowcaseA",True,Grading Service,...,,,,,,,g,,,active
1,JA-Morant-[Silver-Prizm]-#12-[Rookie]-2019-Pan...,JA Morant [Silver Prizm] #12 [Rookie] 2019 Pan...,,https://www.sportscardspro.com/game/basketball...,Panini,,,"NBA, ShowcaseB",True,Grading Service,...,,,,,,,g,,,active


In [17]:
#Run this block to drop the last row if you made a mistake
products.drop(index=products.index[-1],axis=0,inplace=True)

In [41]:
#print out what you have so far
products

Unnamed: 0,Handle,Title,Body (HTML),Link,Vendor,Standardized Product Type,Custom Product Type,Tags,Published,Option1 Name,...,Google Shopping / Custom Label 0,Google Shopping / Custom Label 1,Google Shopping / Custom Label 2,Google Shopping / Custom Label 3,Google Shopping / Custom Label 4,Variant Image,Variant Weight Unit,Variant Tax Code,Cost per item,Status
0,JA-Morant-[Silver-Prizm]-#12-[Rookie]-2019-Pan...,JA Morant [Silver Prizm] #12 [Rookie] 2019 Pan...,,https://www.sportscardspro.com/game/basketball...,Panini,,,"NBA, ShowcaseA",True,Grading Service,...,,,,,,,g,,,active
1,JA-Morant-[Silver-Prizm]-#12-[Rookie]-2019-Pan...,JA Morant [Silver Prizm] #12 [Rookie] 2019 Pan...,,https://www.sportscardspro.com/game/basketball...,Panini,,,"NBA, ShowcaseB",True,Grading Service,...,,,,,,,g,,,active
2,Michael-Porter-Jr.-#182-2018-Panini-Donruss-Optic,Michael Porter Jr. #182 2018 Panini Donruss Optic,,https://www.sportscardspro.com/game/basketball...,Panini,,,"NBA, ShowcaseA",True,Grading Service,...,,,,,,,g,,,active
3,Joe-Burrow-[Genesis]-#201-[Rookie]-2020-Panini...,Joe Burrow [Genesis] #201 [Rookie] 2020 Panini...,,https://www.sportscardspro.com/game/football-c...,Panini,,,"NFL, ShowcaseC",True,Grading Service,...,,,,,,,g,,,active
4,Justin-Fields-#80-2021-Panini-Chronicles-Draft...,Justin Fields #80 2021 Panini Chronicles Draft...,,https://www.sportscardspro.com/game/football-c...,Panini,,,"NFL, ShowcaseA",True,Grading Service,...,,,,,,,g,,,active
5,Louis-King-[Green-Ice-Prizm]-#102-2019-Panini-...,Louis King [Green Ice Prizm] #102 2019 Panini ...,,https://www.sportscardspro.com/game/basketball...,Panini,,,"NBA, ShowcaseB",True,Grading Service,...,,,,,,,g,,,active
6,Mike-Trout-[SSP]-#27-2021-Topps,Mike Trout [SSP] #27 2021 Topps,,https://www.sportscardspro.com/game/baseball-c...,Topps,,,"MLB, ShowcaseC",True,Grading Service,...,,,,,,,g,,,active
7,Michael-Jordan-#57-[Rookie]-1986-Fleer,Michael Jordan #57 [Rookie] 1986 Fleer,,https://www.sportscardspro.com/game/basketball...,Other,,,"NBA, ShowcaseA",True,Grading Service,...,,,,,,,g,,,active
8,Joe-Montana-#1-1990-Action-Packed-All-Madden,Joe Montana #1 1990 Action Packed All Madden,,https://www.sportscardspro.com/game/football-c...,Other,,,"NFL, ShowcaseA",True,Grading Service,...,,,,,,,g,,,active
9,Michael-Jordan-#SP1-[Rookie]-1991-Upper-Deck,Michael Jordan #SP1 [Rookie] 1991 Upper Deck,,https://www.sportscardspro.com/game/baseball-c...,Upper Deck,,,"MLB, ShowcaseB",True,Grading Service,...,,,,,,,g,,,active


In [42]:
#run this block to export the csv file
products.to_csv('products.csv')