# Part I: Scraping MTG Card Decks


In [None]:
from bs4 import BeautifulSoup as bs
from selenium import webdriver
import pandas as pd
import mtgutilities as mtgu
import json
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.chrome.options import Options
import pathlib
import time

In [2]:
with open('config.json', 'r') as config_file:
    contents = json.loads(config_file.read())

### Scraping deck URLs and metadata

In [13]:
# Obtaining commmander colors and creating directories
URL = 'https://edhrec.com/'
COLORS = ['w', 'u', 'b', 'g', 'r', 'colorless', 'wu', 'ub', 'br', 'rg', 'gw', 'wb', 'ur', 'bg', 'rw', 'gu', 'wub', 'ubr',
    'brg', 'rgw', 'gwu', 'wbg', 'uwr', 'bgu', 'rwb', 'gur', 'wubr', 'ubrg', 'brgw', 'rgwu', 'gwub', 'wubrg']

for color in COLORS:
    pathlib.Path(f"{contents['external-drive-location']}/commanders/{color}").mkdir(parents=True, exist_ok=True)
    

In [None]:
# Obtaining commander names with a minimum number of decks
colors_urls = [f"{URL}commanders/{color}" for color in COLORS][:1]
commanders = mtgu.get_color_commanders(colors_urls)

In [56]:
# Creating decks 
commander_decks = {}
for commander_color, commander_names in commanders.items():
    for commander_name in commander_names[:1]:
        pathlib.Path(f"{contents['external-drive-location']}/commanders/{commander_color[30:]}/{commander_name[11:]}").mkdir(parents=True, exist_ok=True)
        
        chrome_options = webdriver.ChromeOptions()
        opts = Options()
        opts.add_argument("--incognito")
        driver = webdriver.Chrome(contents["chrome-driver-location"], options=opts)
        driver.get(f"{URL}decks/{commander_name[11:]}")
        a = driver.page_source
        soup_commander = bs(a, features="html.parser")
        decks = {}
        while True:
            a = driver.page_source
            soup_commander = bs(a, features="html.parser")
            for a in soup_commander.find_all('a', href=True):
                if '/deckpreview/' in a["href"]:
                    deck_id = a["href"][13:]
                    deck_data = {}
                    for b in soup_commander.find_all("td"):
                        if deck_id in str(b):
                            #deck_data['url'] = a["href"]
                            deck_data['price'] = b.findNextSibling().text
                            deck_data['tribe'] = b.findNextSibling().findNextSibling().text
                            deck_data['theme'] = b.findNextSibling().findNextSibling().findNextSibling().text
                            decks[deck_id] = deck_data
            if mtgu.check_exists_by_xpath(driver, "//a[contains(text(), 'Next')]"):
                element = driver.find_element(By.XPATH, "//a[contains(text(), 'Next')]")
                driver.execute_script('arguments[0].scrollIntoView();', element)
                driver.execute_script('window.scrollBy(0, -200);')
                element.click()
            else:
                break
        commander_decks[commander_name] = decks

In [64]:
for commander in commander_decks.keys():
    data = commander_decks[commander]
    
    driver.get(f"{URL}{data['url'][1:]}")
    time.sleep(3)
    mtgu.click_element_in_href(driver, 'Text View')
    c = driver.page_source
    soup_deck = bs(c, features="html.parser")
    with open(f"{contents['external-drive-location']}/commanders/{commander_color[30:]}/{commander_name[11:]}/{commander}.txt", 'w') as f:
        for j in soup_deck.find_all('a', href=True):
            if '/cards/' in str(j):
                f.write(f"{j.text}\n")

In [65]:
data

{'pqiGif_6EKNXI56w7lXJVA': {'url': '/deckpreview/pqiGif_6EKNXI56w7lXJVA',
  'price': '$834',
  'tribe': 'Angel',
  'theme': 'Lifegain'},
 'eXU7EFoA4pBILPazCvdyKg': {'url': '/deckpreview/eXU7EFoA4pBILPazCvdyKg',
  'price': '$565',
  'tribe': 'Angel',
  'theme': ''},
 'Y21-0FqEt00fPz-eDddVHw': {'url': '/deckpreview/Y21-0FqEt00fPz-eDddVHw',
  'price': '$470',
  'tribe': 'Angel',
  'theme': ''},
 'CTR7JFLhY10t2RgjVPtNdw': {'url': '/deckpreview/CTR7JFLhY10t2RgjVPtNdw',
  'price': '$788',
  'tribe': 'Angel',
  'theme': ''},
 'eYo_nF7hTlwJM4I5vFAVTQ': {'url': '/deckpreview/eYo_nF7hTlwJM4I5vFAVTQ',
  'price': '$379',
  'tribe': 'Angel',
  'theme': ''},
 'ZsFc9W-a8T9sU3qAihusHA': {'url': '/deckpreview/ZsFc9W-a8T9sU3qAihusHA',
  'price': '$1330',
  'tribe': 'Angel',
  'theme': ''},
 'wpAlpoGCnPzm1m8x5h4O2g': {'url': '/deckpreview/wpAlpoGCnPzm1m8x5h4O2g',
  'price': '$497',
  'tribe': 'Angel',
  'theme': 'Lifegain'},
 'QBZJAZ-_oxe42mCzHl6Afg': {'url': '/deckpreview/QBZJAZ-_oxe42mCzHl6Afg',
  'pri