# Can a card's attributes be used to predict its price?

In [88]:
import requests
from bs4 import BeautifulSoup
import re
import numpy as np
import pandas as pd
import random

In [131]:
random.seed(42)

all_sets = pd.read_csv("set_info.csv")
major_sets = all_sets[all_sets["set_size"] > 100]
sample = major_sets.sample(1)
sample

Unnamed: 0,set_name,set_id,set_size,set_year
425,Lorwyn,lrw,301,2007


In [None]:
for set_, size in zip(major_sets["set_id"], major_sets["set_size"]):
    names = []
    supertypes = []
    subtypes = []
    sets = []
    rarities = []
    cmcs = []
    colors = []
    modern_legalities = []
    standard_legalities = []
    commander_legalities = []
    reprints = []
    artists = []
    prices = []
    for card_num in range(size):
        print(set_, card_num+1)
        response = requests.get('https://scryfall.com/card/{}/{}/'.format(set_, str(card_num+1)))
        html = BeautifulSoup(response.content, 'html.parser')

        card_title = html.find("h1", class_ = "card-text-title").text

        try:
            card_has_suspend = "Suspend" in html.find("div", class_ = "card-text-oracle").text.strip()
        except AttributeError:
            card_has_suspend = False        
        if card_has_suspend == True:
            continue

        card_name = card_title.strip().split("\n")[0]
        names.append(card_name)

        card_type = html.find("p", class_ = "card-text-type-line").text.strip()
        if "—" in card_type:
            card_types = card_type.split("—")
            card_supertype = card_types[0].strip()
            card_subtype = card_types[1].strip()   
            supertypes.append(card_supertype)
            subtypes.append(card_subtype)
        elif "—" not in card_type:
            card_supertype = card_type
            card_subtype = "None"   
            supertypes.append(card_supertype)
            subtypes.append(card_subtype)

        card_artist = html.find("p", class_ = "card-text-artist").text.strip().split("\n")[1].strip()
        artists.append(card_artist.encode('utf-8'))

        card_current_prints = html.find("div", class_ = "prints-current").text.strip().replace("\n", "").split("              ")
        card_set = card_current_prints[0]
        wrds = card_set.strip().split(" ")[0:-1]
        card_set = " ".join(wrds)
        sets.append(card_set)
        card_rarity = card_current_prints[1].split(" · ")[1]
        rarities.append(card_rarity)

        card_legalities = []
        for j in html.find_all("div", class_ = "card-legality-item"):
            card_legalities.append(j.text.split())
        for l in card_legalities:
            if l[0] == "Standard":
                if (l[1] == "Not") | (l[1] == "Banned"):
                    card_standard_legal = False
                elif l[1] == "Legal":
                    card_standard_legal = True
            if l[0] == "Modern":
                if (l[1] == "Not") | (l[1] == "Banned"):
                    card_modern_legal = False
                elif l[1] == "Legal":
                    card_modern_legal = True
            if l[0] == "Commander":
                if (l[1] == "Not") | (l[1] == "Banned"):
                    card_commander_legal = False
                elif l[1] == "Legal":
                    card_commander_legal = True
        standard_legalities.append(card_standard_legal)
        modern_legalities.append(card_modern_legal)
        commander_legalities.append(card_commander_legal)

        for k in html.find_all("tr", class_ = "current"):
            if len(re.findall("\d\.\d\d", k.text.strip())) != 0:
                card_prices = re.findall("\d?\d\.\d\d", k.text.strip())
                card_price = float(card_prices[0])
        prices.append(card_price)

        l = []
        for z in html.select("div[class = prints] > table[class=prints-table]"):
            l.append(z.text)
        for n in l:
            sections = re.sub("  {1,}", " ", n.replace("\n", ""))
            if "Prints" in sections:
                if len(sections.split()) > 6:
                    card_is_reprint = True
                else:
                    card_is_reprint = False
            else:
                continue       
        reprints.append(card_is_reprint)

        if (card_has_suspend == False) & ("Land" not in card_supertype):
            card_casting_cost = re.findall("[A-Z1-9]", card_title.strip().split("\n")[1].strip())
            try: 
                if len(card_casting_cost) == 0:
                    card_cmc = 0
                else:
                    generic_mana_count = int(card_casting_cost[0])
                    card_cmc = generic_mana_count + len(card_casting_cost[1:])
            except ValueError:
                card_cmc = len(card_casting_cost)

            card_color_list = []
            if "W" in card_casting_cost:
                card_color_list.append("White")
            if "U" in card_casting_cost:
                card_color_list.append("Blue")
            if "B" in card_casting_cost:
                card_color_list.append("Black")
            if "R" in card_casting_cost:
                card_color_list.append("Red")
            if "G" in card_casting_cost:
                card_color_list.append("Green")
            if len(card_color_list) == 1:
                card_color = card_color_list[0]
            if len(card_color_list) > 1:
                card_color = "Gold"
            if len(card_color_list) == 0:
                card_color = "Colorless"
            cmcs.append(card_cmc)
            colors.append(card_color)

        elif (card_has_suspend == False) & (("Land" in card_supertype)):
            card_cmc = 0
            card_color = "Colorless"
            cmcs.append(card_cmc)
            colors.append(card_color)
            
    data = {"names": names, "supertypes": supertypes, "subtypes": subtypes, "set": sets, "rarity": rarities, "cmc": cmcs, "color": colors, "modern legality": modern_legalities, "standard legality": standard_legalities, "commander legality": commander_legalities, "reprint": reprints, "artist": artists, "price": prices}
    df = pd.DataFrame(data)
    df.to_csv("{}_info.csv".format(set_), index = False)

c20 1
c20 2
c20 3
c20 4
c20 5
c20 6
c20 7
c20 8
c20 9
c20 10
c20 11
c20 12
c20 13
c20 14
c20 15
c20 16
c20 17
c20 18
c20 19
c20 20
c20 21
c20 22
c20 23
c20 24
c20 25
c20 26
c20 27
c20 28
c20 29
c20 30
c20 31
c20 32
c20 33
c20 34
c20 35
c20 36
c20 37
c20 38
c20 39
c20 40
c20 41
c20 42
c20 43
c20 44
c20 45
c20 46
c20 47
c20 48
c20 49
c20 50
c20 51
c20 52
c20 53
c20 54
c20 55
c20 56
c20 57
c20 58
c20 59
c20 60
c20 61
c20 62
c20 63
c20 64
c20 65
c20 66
c20 67
c20 68
c20 69
c20 70
c20 71
c20 72
c20 73
c20 74
c20 75
c20 76
c20 77
c20 78
c20 79
c20 80
c20 81
c20 82
c20 83
c20 84
c20 85
c20 86
c20 87
c20 88
c20 89
c20 90
c20 91
c20 92
c20 93
c20 94
c20 95
c20 96
c20 97
c20 98
c20 99
c20 100
c20 101
c20 102
c20 103
c20 104
c20 105
c20 106
c20 107
c20 108
c20 109
c20 110
c20 111
c20 112
c20 113
c20 114
c20 115
c20 116
c20 117
c20 118
c20 119
c20 120
c20 121
c20 122
c20 123
c20 124
c20 125
c20 126
c20 127
c20 128
c20 129
c20 130
c20 131
c20 132
c20 133
c20 134
c20 135
c20 136
c20 137
c20 138
c20 

In [1]:
# print(len(names))
# print(len(supertypes))
# print(len(subtypes))
# print(len(sets))
# print(len(rarities))
# print(len(cmcs))
# print(len(colors))
# print(len(modern_legalities))
# print(len(standard_legalities))
# print(len(commander_legalities))
# print(len(reprints))
# print(len(artists))
# print(len(prices))

data = {"names": names, "supertypes": supertypes, "subtypes": subtypes, "set": sets, "rarity": rarities, "cmc": cmcs, "color": colors, "modern legality": modern_legalities, "standard legality": standard_legalities, "commander legality": commander_legalities, "reprint": reprints, "artist": artists, "price": prices}
pd.DataFrame(data)

NameError: name 'names' is not defined

In [146]:
response = requests.get('https://scryfall.com/card/lrw/257/')
html = BeautifulSoup(response.content, 'html.parser')

In [152]:
card_title = html.find("h1", class_ = "card-text-title").text

try:
    card_has_suspend = "Suspend" in html.find("div", class_ = "card-text-oracle").text.strip()
except AttributeError:
    card_has_suspend = False
# if card_has_suspend == True:
#     continue


card_name = card_title.strip().split("\n")[0]
print(card_name)

card_type = html.find("p", class_ = "card-text-type-line").text.strip()
if "—" in card_type:
    card_types = card_type.split("—")
    card_supertype = card_types[0].strip()
    card_subtype = card_types[1].strip()   
    print(card_supertype)
    print(card_subtype)
elif "—" not in card_type:
    card_supertype = card_type
    card_subtype = "None"   
    print(card_supertype)
    print(card_subtype)

card_artist = html.find("p", class_ = "card-text-artist").text.strip().split("\n")[1].strip()
print(card_artist.encode('utf-8'))

card_current_prints = html.find("div", class_ = "prints-current").text.strip().replace("\n", "").split("              ")
card_set = card_current_prints[0]
wrds = card_set.strip().split(" ")[0:-1]
card_set = " ".join(wrds)
print(card_set)
card_rarity = card_current_prints[1].split(" · ")[1]
print(card_rarity)

card_legalities = []
for j in html.find_all("div", class_ = "card-legality-item"):
    card_legalities.append(j.text.split())
for l in card_legalities:
    if l[0] == "Standard":
        if (l[1] == "Not") | (l[1] == "Banned"):
            card_standard_legal = False
        elif l[1] == "Legal":
            card_standard_legal = True
    if l[0] == "Modern":
        if (l[1] == "Not") | (l[1] == "Banned"):
            card_modern_legal = False
        elif l[1] == "Legal":
            card_modern_legal = True
    if l[0] == "Commander":
        if (l[1] == "Not") | (l[1] == "Banned"):
            card_commander_legal = False
        elif l[1] == "Legal":
            card_commander_legal = True
print("sl", card_standard_legal)
print("ml", card_modern_legal)
print("cl", card_commander_legal)

for k in html.find_all("tr", class_ = "current"):
    if len(re.findall("\d\.\d\d", k.text.strip())) != 0:
        card_prices = re.findall("\d?\d\.\d\d", k.text.strip())
        card_price = float(card_prices[0])
print(card_price)

l = []
for z in html.select("div[class = prints] > table[class=prints-table]"):
    l.append(z.text)
for n in l:
    sections = re.sub("  {1,}", " ", n.replace("\n", ""))
    if "Prints" in sections:
        if len(sections.split()) > 6:
            card_is_reprint = True
        else:
            card_is_reprint = False
    else:
        continue       
print("rp", card_is_reprint)

if (card_has_suspend == False) & ("Land" not in card_supertype):
    card_casting_cost = re.findall("[A-Z1-9]", card_title.strip().split("\n")[1].strip())
    try: 
        if len(card_casting_cost) == 0:
            card_cmc = 0
        else:
            generic_mana_count = int(card_casting_cost[0])
            card_cmc = generic_mana_count + len(card_casting_cost[1:])
    except ValueError:
        card_cmc = len(card_casting_cost)

    card_color_list = []
    if "W" in card_casting_cost:
        card_color_list.append("White")
    if "U" in card_casting_cost:
        card_color_list.append("Blue")
    if "B" in card_casting_cost:
        card_color_list.append("Black")
    if "R" in card_casting_cost:
        card_color_list.append("Red")
    if "G" in card_casting_cost:
        card_color_list.append("Green")
    if len(card_color_list) == 1:
        card_color = card_color_list[0]
    if len(card_color_list) > 1:
        card_color = "Gold"
    if len(card_color_list) == 0:
        card_color = "Colorless"
    print(card_cmc)
    print(card_color)

elif (card_has_suspend == False) & (("Land" in card_supertype)):
    card_cmc = 0
    card_color = "Colorless"
    print(card_cmc)
    print(card_color)

Herbal Poultice
Artifact
None
b'Scott Hampton'
Lorwyn
Common
sl False
ml True
cl True
0.24
rp False
0
Colorless


In [68]:
l = []
for z in html.select("div[class = prints] > table[class=prints-table]"):
    l.append(z.text)
for n in l:
    sections = re.sub("  {1,}", " ", n.replace("\n", ""))
    if "Prints" in sections:
        if len(sections.split()) > 6:
            card_is_reprint = True
        else:
            card_is_reprint = False
    else:
        continue       
print(card_is_reprint)
# 
#     print(re.findall("[A-Za-z]+", l[0].replace("\n", "")))
# if len(printings) > 1:
#     card_is_reprint = True
# else:
#     card_is_reprint = False
# print("rp", card_is_reprint)


False
