# MTGGoldfish Scraping

In [61]:
#Basic Python
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import re
import pickle
import requests

#API
import requests
from bs4 import BeautifulSoup

In [106]:
def pricify(card, expansion, foiling, url_print):
    '''Returns price history for a card name, expansion, and foiling'''
    
    if foiling.lower() == 'foil' or foiling.lower() == 'yes':
        foil_option = ':Foil'
        foil_label = 'Foil'
    else:
        foil_option = ''
        foil_label = 'Non Foil'
    #URL to BS4 to string
    url = 'https://www.mtggoldfish.com/price/{}/{}#paper'.format(expansion.replace(' ', '+')+foil_option, card.replace(' ', '+').replace("'",""))
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    script = soup.find_all("script")
    
    if url_print.lower() == 'print':
        print(url)
        
    #list of strings of items
    result = []
    [result.append(str(s)) for s in script]
    #return result
    
    #which compartment index is the pricing data
    index = [idx for idx, s in enumerate(result) if 'var d =' in s][0]
    data = result[index]
    #where exactly is the date and price data
    #price_index = [data.find('var d ='), data.find('g = new Dygraph')]
    
    #cleaning up price and date data
    messy = data[data.find('var d ='): data.find('g = new Dygraph')]
    cleaned = messy.replace('var d = "Date,{}";\n  '.format(card.replace("'", "&#39;")), '').replace('d += "\\n', '').replace('";\n  ', '|').replace('";\n\n', '')

    cleaned_list = cleaned.split("|")
    price_history = pd.DataFrame([i.split(',') for i in cleaned_list], columns = ['date', 'price'])
    price_history['date'] = pd.to_datetime(price_history['date'], format = '%Y-%m-%d')
    price_history['price'] = price_history['price'].astype(float)
    price_history['expansion'] = expansion
    price_history['foil'] = foil_label
    
    return price_history

In [42]:
rhystic_study = pricify('Rhystic Study', 'Prophecy', 'Foil')
rhystic_study

Unnamed: 0,date,price,expansion,foil
0,2014-03-25,20.66,Prophecy,Foil
1,2014-03-26,20.64,Prophecy,Foil
2,2014-03-27,20.64,Prophecy,Foil
3,2014-03-28,20.64,Prophecy,Foil
4,2014-03-29,20.64,Prophecy,Foil
...,...,...,...,...
2553,2021-05-27,350.00,Prophecy,Foil
2554,2021-05-28,400.00,Prophecy,Foil
2555,2021-05-29,400.00,Prophecy,Foil
2556,2021-05-30,400.00,Prophecy,Foil


In [108]:
wandering_archaic = pricify('Wandering Archaic', 'Strixhaven School of Mages', 'No', 'No')
semesters_end = pricify('Semester\'s End', 'Strixhaven School of Mages', 'No', 'Print')
strixhaven_stadium = pricify('Strixhaven Stadium', 'Strixhaven School of Mages', 'No', 'No')
beledros_witherbloom = pricify('Beledros Witherbloom', 'Strixhaven School of Mages', 'No', 'No')
ecological_appreciation = pricify('Ecological Appreciation', 'Strixhaven School of Mages', 'No', 'No')
galazeth_prismari = pricify('Galazeth Prismari', 'Strixhaven School of Mages', 'No', 'No')
culling_ritual = pricify('Culling Ritual', 'Strixhaven School of Mages', 'No', 'No')

https://www.mtggoldfish.com/price/Strixhaven+School+of+Mages/Semesters+End#paper


In [109]:
# card = 'Agadeem\'s Awakening'
# url = 'https://www.mtggoldfish.com/price/Zendikar+Rising/Agadeems+Awakening#paper'
# response = requests.get(url)
# soup = BeautifulSoup(response.content, 'html.parser')
# script = soup.find_all("script")
# result = []
# [result.append(str(s)) for s in script]
# #return result

# #which compartment index is the pricing data
# index = [idx for idx, s in enumerate(result) if 'var d =' in s][0]
# data = result[index]
# #where exactly is the date and price data
# #price_index = [data.find('var d ='), data.find('g = new Dygraph')]

# #cleaning up price and date data
# messy = data[data.find('var d ='): data.find('g = new Dygraph')]
# cleaned = messy.replace('var d = "Date,{}";\n  '.format(card.replace("'", "&#39;")), '').replace('d += "\\n', '').replace('";\n  ', '|').replace('";\n\n', '')

# cleaned_list = cleaned.split("|")
# cleaned_list

In [112]:
from bokeh.plotting import figure, show
from bokeh.models.tools import HoverTool

card = 'strixhaven_stadium'
x = strixhaven_stadium.date
y = strixhaven_stadium.price

# create a new plot with a title and axis labels
p = figure(title="{} Price".format(card), x_axis_label="Date", y_axis_label="Price")

# add a line renderer with legend and line thickness
p.line(x, y, legend_label="Price.", line_width=2)

p.add_tools(HoverTool(
    tooltips=[('(date, price)', '(@x{%F}, @y)')],
    formatters={'@x': 'datetime'}))

# show the results
show(p)


# Commander Lists

In [117]:
url = 'http://www.metamox.com/commander/'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
text = soup.get_text()
messy = text[text.find('Most popular cards in commander in the last 365 days'): text.find('\nMoney Cards')]
clean = re.sub(r'\d*', '', messy.replace('\n', '').replace('\r', '').replace('$', '|').replace('.', '').replace(
    'Most popular cards in commander in the last 365 days ', ''))
top_list = clean.split('|')

In [122]:
rarity = [i[0:1] for i in top_list]
card_list = [i[1:] for i in top_list]

Unnamed: 0,Rarity,Card
0,U,Sol Ring
1,C,Command Tower
2,U,Reliquary Tower
3,C,Counterspell
4,C,Evolving Wilds
...,...,...
994,M,Temporal Trespass
995,U,Turnabout
996,M,Urabrask the Hidden
997,R,"Azami, Lady of Scrolls"


In [138]:
edh_list = pd.DataFrame(list(zip(rarity, card_list)), columns =['Rarity', 'Card'])
edh_list

Unnamed: 0,Rarity,Card
0,U,Sol Ring
1,C,Command Tower
2,U,Reliquary Tower
3,C,Counterspell
4,C,Evolving Wilds
...,...,...
994,M,Temporal Trespass
995,U,Turnabout
996,M,Urabrask the Hidden
997,R,"Azami, Lady of Scrolls"
