# Part I: Scraping MTG Cards


For this project, I will be scraping the information available on the Card Kingdom website regarding Magic the Gathering cards.

In [1]:
from bs4 import BeautifulSoup as bs
from selenium import webdriver
import re
import time
import pandas as pd
import mtgutilities as mtgu
import json

In [2]:
with open('config.json', 'r') as config_file:
    contents = json.loads(config_file.read())

### Getting all of the expansion URLs

In [3]:
soup = mtgu.get_soup_from_website("https://www.cardkingdom.com/catalog/magic_the_gathering/by_az", options=True)
expansions = dict()
for a in soup.find_all('a', href=True):
        urls = dict()
        if 'mtg/' in a['href'] and 'art-series' not in a['href'] and 'graded-magic' \
        not in a['href'] and 'complete-sets' not in a['href'] and 'commander-deck' not in a['href'] \
        and 'card-kingdom-tokens' not in a['href'] and 'variants' not in a['href'] \
        and 'starter-kits' not in a['href'] and 'jpn' not in a['href']:
            
            urls["url"] = f"https://www.cardkingdom.com{a['href']}"
            expansions[a.string.replace(' ', '-').replace('---','-').replace(':','')\
                .replace('&','').replace('.','').replace("'", "").replace('--','-').replace('/','-').lower()] = urls

In [4]:
expansions_df = pd.DataFrame()
for expansion_key, expansion_values in expansions.items():
    # Obtaining number of pages to scrape per expansion
    soup = mtgu.get_soup_from_website(expansion_values.get("url"), options=True)
    max_pages = mtgu.get_max_pages(soup)

    # Scraping page by page
    driver_pages = webdriver.Chrome(contents["chrome-driver-location"])
    
    card_names, card_costs, card_converted_costs, card_type, \
        card_urls, card_rarity, card_pt, card_text, card_price = list(), list(), list(), list(), list(), list(), list(), list(), list()

    for i in range(1,max_pages+1):
            driver_pages.get(f"{expansion_values.get('url')}/?page={str(i)}")
            soup_pages = bs(driver_pages.page_source, 'html.parser')
            
            # Getting card URLs
            card_urls += mtgu.get_card_urls(soup_pages, expansion_key)
            # Getting card names
            card_names += mtgu.get_card_names(soup_pages, expansion_key)
            # Getting mana costs
            card_costs += mtgu.get_mana_cost(soup_pages)
            # Getting converted mana cost
            card_converted_costs = mtgu.get_converted_mana_cost(card_costs)
            # Getting card type
            card_type += mtgu.get_card_type(soup_pages)
            # Getting card rarity
            card_rarity += mtgu.get_card_rarity(soup_pages)
            # Getting card P/T
            card_pt += mtgu.get_card_pt(soup_pages)
            # Getting card text
            card_text += mtgu.get_card_text(soup_pages)
            # Getting card price
            card_price += mtgu.get_card_price(soup_pages)
            time.sleep(3)


    df = pd.DataFrame(
    {'expansion_id': expansion_key,
     'url': card_urls,
     'name': card_names,
     'cost': card_costs,
     'converted_cost': card_converted_costs,
     'type': card_type,
     'rarity': card_rarity,
     'pt': card_pt,
     'text': card_text,
     'price': card_price
    })
    
    expansions_df = pd.concat([expansions_df, df],axis=0) 
    expansions_df.to_csv(f"{contents['external-drive-location']}/cards/cards.csv", index=False)          

KeyboardInterrupt: 

In [5]:
expansions_df

Unnamed: 0,expansion_id,url,name,cost,converted_cost,type,rarity,pt,text,price
0,3rd-edition,https://www.cardkingdom.com/mtg/3rd-edition/vo...,Volcanic Island,[],0,Land - Island Mountain,R,,{T}: Add {U} or {R}.,899.99
1,3rd-edition,https://www.cardkingdom.com/mtg/3rd-edition/bayou,Bayou,[],0,Land - Swamp Forest,R,,{T}: Add {B} or {G}.,449.99
2,3rd-edition,https://www.cardkingdom.com/mtg/3rd-edition/tu...,Tundra,[],0,Land - Plains Island,R,,{T}: Add {U} or {W}.,423.99
3,3rd-edition,https://www.cardkingdom.com/mtg/3rd-edition/tr...,Tropical Island,[],0,Land - Forest Island,R,,{T}: Add {U} or {G}.,519.99
4,3rd-edition,https://www.cardkingdom.com/mtg/3rd-edition/ba...,Badlands,[],0,Land - Swamp Mountain,R,,{T}: Add {B} or {R}.,449.99
...,...,...,...,...,...,...,...,...,...,...
20,5th-edition,https://www.cardkingdom.com/mtg/5th-edition/wi...,Winds of Change,[R],1,Sorcery,R,,Each player shuffles the cards from their hand...,13.99
21,5th-edition,https://www.cardkingdom.com/mtg/5th-edition/hu...,Hurkyl's Recall,"[1, U]",2,Instant,R,,Return all artifacts target player owns to the...,5.49
22,5th-edition,https://www.cardkingdom.com/mtg/5th-edition/ho...,Howling Mine,[2],2,Artifact,R,,"At the beginning of each player's draw step, i...",5.99
23,5th-edition,https://www.cardkingdom.com/mtg/5th-edition/ur...,Urza's Tower,[],0,Land - Urza's Tower,C,,{T}: Add {C}. If you control an Urza's Mine an...,1.59
