In [71]:
import re
import requests
from bs4 import BeautifulSoup
from pymongo import MongoClient
from selenium import webdriver
from selenium.webdriver.firefox.options import Options

# Connect to MongoDB
client = MongoClient('192.168.0.209', 27017)
db = client['capstone_1']
cards = db['cards']
decks = db['deck_lists']

# Base URLs for building requests
scryfall_api_url = 'https://api.scryfall.com/{}'  # API docs: https://scryfall.com/docs/api
mtgtop8_url = 'https://www.mtgtop8.com/{}'

# mtgtop8.com format keys for building requests
mt8_format_keys = {
    'vintage': 'VI',
    'legacy': 'LE',
    'modern': 'MO',
    'pioneer': 'PI',
    'historic': 'HI',
    'standard': 'ST',
    'commander': 'EDH',
    'limited': 'LI',
    'pauper': 'PAU',
    'peasant': 'PEA',
    'block': 'BL',
    'extended': 'EX',
    'highlander': 'HIGH',
    'canadian_highlander': 'CHL'
}

def query(link, payload={}):
    """A requests wrapper function"""
    response = requests.get(link, params=payload)
    if response.status_code != 200:
        print('WARNING', response.status_code)
        print(response.content)
    print(response.headers)
    return response

def hot_soup(url, payload={}):
    """Cooks a steaming bowl of hot soup"""
    response = query(url, payload)
    soup = BeautifulSoup(response.content, 'html.parser')
    return soup

def get_card(name_str):
    """Returns data from Scryfall API on an individual card by name"""
    payload = {'fuzzy': '+'.join(name_str.split())}
    response = query(scryfall_api_url.format('cards/named'), payload)
    return response.json()

def soup_to_xpath(child):
    """Convert BeautifulSoup object to XPath address"""
    output = ['']
    for parent in child.parents:
        siblings = parent(child.name, recursive=False)
        add = '' if siblings == [child] else f'[{siblings.index(child) + 1}]'
        output.append(child.name + add)
        child = parent
    output.reverse()
    return '/'.join(output)

I originally inteded to scrape the official WOTC magic.wizards.com site for tournament results but they unfortunately don't make much available outside of very recent events. After some research, tournament data will be scraped from mtgtop8.com, who offer *years* worth of back data. Some individual card data could be scraped here as well, but scryfall.com offers much more robust card data via a convenient API.

interesting note on data gathering: https://www.dailyesports.gg/wizards-of-the-coast-mtg-frank-karsten-stop-publishing-gp-results-win-rates/

In [70]:
soup = hot_soup(mtgtop8_url.format('format'), {'f': mt8_format_keys['standard']})

{'Date': 'Mon, 22 Feb 2021 21:33:27 GMT', 'Server': 'Apache', 'Content-Location': 'format.php', 'Vary': 'negotiate', 'TCN': 'choice', 'Keep-Alive': 'timeout=5, max=500', 'Connection': 'Keep-Alive', 'Transfer-Encoding': 'chunked', 'Content-Type': 'text/html; charset=ISO-8859-1'}


In [103]:
list(soup.find_all('option', selected=''))

[<option selected="" value="format?f=ST&amp;meta=52">Last 2 Months</option>,
 <option value="format?f=ST&amp;meta=50">Last 2 Weeks</option>,
 <option value="format?f=ST&amp;meta=46">Large Events Last 2 Months</option>,
 <option value="format?f=ST&amp;meta=210">Players Tour Online</option>,
 <option value="format?f=ST&amp;meta=217">Standard 2020-2021 (M21 to Kaldheim)</option>,
 <option value="format?f=ST&amp;meta=187">Standard 2019-2020 (M20 to Eldraine)</option>,
 <option value="format?f=ST&amp;meta=175">Standard 2018-2019 (M19 to War of the Spark)</option>,
 <option value="format?f=ST&amp;meta=161">Standard 2017-2018 (Kaladesh to M19)</option>,
 <option value="format?f=ST&amp;meta=128">Standard 2016-2017 (Battle for Zendikar Block - Amonkhet Block)</option>,
 <option value="format?f=ST&amp;meta=133">Standard 2015-2016 (Tarkir Block - Battle for Zendikar Block)</option>,
 <option value="format?f=ST&amp;meta=114">Standard 2014-2015 (Theros Block - Tarkir Block)</option>,
 <option value

In [101]:
meta_select = soup.find('select', {'name': 'meta'})  # drop down selector for meta
metas = {opt.text: mtgtop8_url.format(opt['value']) for opt in meta_select.find_all('option')}  # meta keys for building URL
next_btn = soup.find(class_='Nav_PN')  # events list next button

events = soup.find_all(class_='Stable')[2].find_all(class_='hover_tr')  # 10 events list table

events_agg = {}
for event in events:
    link = event.a
    eid = re.search(r"e=(\d+)&", link['href'])
    stars = event.find(class_='O16').find_all('img')
    events_agg[eid.group(1)] = {
        'name': link.text,
        'date': event.find(class_='S10').text,
        'level': 4 if 'bigstar' in stars[0]['src'] else len(stars),
        'link': mtgtop8_url.format(link['href'])
    }


In [96]:
# Start a firefox instance for automation
options = Options()
options.add_argument('--headless')
driver = webdriver.Firefox(options=options)

# TODO: Smash things

driver.close()

TypeError: get() takes 2 positional arguments but 3 were given