# Magic Card Image Database

Since I could not find a ready to use magic database, I extracted the images from scryfall.com for the following sets with the respective promos: 
- Guilds of Ravinica
- Core Set 2019
- Dominaria
- Ixalan
- Rivals of Ixalan

The objective here is to create a card image database to be used on a CNN to identify the set with machine learning. Considering the limited number of cards per set and the small gain from more traditional computer vision algorithms, this tutorial is a more fun exploration of CNNs and their capacity.

In [1]:
import urllib.request
import json
import pandas as pd
from tqdm import tqdm
%matplotlib inline
tqdm.pandas()

def get_uri(serie, dtype='normal'):
    try:
        uri = serie['image_uris'][dtype]
    except:
        pass
    return(uri) 
def get_legality(serie):
    return(serie['legalities']['standard'])

In [2]:
# download from https://archive.scryfall.com/json/scryfall-all-cards.json
df = pd.read_json('scryfall-all-cards.json')

df.tail()

Unnamed: 0,all_parts,arena_id,artist,border_color,card_faces,cmc,collector_number,color_identity,color_indicator,colors,...,set,set_name,set_search_uri,set_uri,story_spotlight,tcgplayer_id,toughness,type_line,uri,watermark
233237,,,Dan Frazier,black,,1.0,5,[W],,[W],...,lea,Limited Edition Alpha,https://api.scryfall.com/cards/search?order=se...,https://api.scryfall.com/sets/288bd996-960e-44...,False,1044.0,,Enchantment â€” Aura,https://api.scryfall.com/cards/15967a39-303f-4...,
233238,,,Douglas Shuler,black,,1.0,4,[W],,[W],...,lea,Limited Edition Alpha,https://api.scryfall.com/cards/search?order=se...,https://api.scryfall.com/sets/288bd996-960e-44...,False,1038.0,1.0,Creature â€” Human Soldier,https://api.scryfall.com/cards/11600105-56c6-4...,
233239,,,Mark Poole,black,,2.0,3,[W],,[W],...,lea,Limited Edition Alpha,https://api.scryfall.com/cards/search?order=se...,https://api.scryfall.com/sets/288bd996-960e-44...,False,1035.0,,Sorcery,https://api.scryfall.com/cards/6f9ea46a-411f-4...,
233240,,,Jesper Myrfors,black,,4.0,2,[W],,[W],...,lea,Limited Edition Alpha,https://api.scryfall.com/cards/search?order=se...,https://api.scryfall.com/sets/288bd996-960e-44...,False,1031.0,,Sorcery,https://api.scryfall.com/cards/5b6ddce7-b9c5-4...,
233241,,,Dan Frazier,black,,1.0,1,[W],,[W],...,lea,Limited Edition Alpha,https://api.scryfall.com/cards/search?order=se...,https://api.scryfall.com/sets/288bd996-960e-44...,False,1029.0,,Enchantment â€” Aura,https://api.scryfall.com/cards/d5c83259-9b90-4...,


In [3]:
df['is_standard'] = df.progress_apply(get_legality, axis=1)

100%|███████████████████████████████████████████████████████████████████████| 233242/233242 [00:13<00:00, 17537.96it/s]


In [4]:
df = df[df['image_uris'].isnull() == False]
df = df[(df['lang'].str.contains('en')) | df['lang'].str.contains('pt')]

In [5]:
card_img_link = df.progress_apply(get_uri, axis=1)

100%|█████████████████████████████████████████████████████████████████████████| 65720/65720 [00:03<00:00, 17708.60it/s]


In [6]:
card_base = pd.DataFrame()
card_base['img_link'] = card_img_link
card_base['set'] = df['set']
card_base['set_name'] = df['set_name']
card_base['lang'] = df['lang']
card_base['is_standard'] = df['is_standard']
card_base['id'] = df['id']
card_base['name'] = df['name']
# english only
card_base = card_base[(card_base['lang'].str.contains('en'))]
card_base = card_base[(card_base['is_standard'] == 'legal')]

In [7]:
card_base.head()

Unnamed: 0,img_link,set,set_name,lang,is_standard,id,name
0,https://img.scryfall.com/cards/normal/front/a/...,prw2,RNA Ravnica Weekend,en,legal,ac709474-7790-483f-9ed5-ea7abccfce53,Island
1,https://img.scryfall.com/cards/normal/front/b/...,prw2,RNA Ravnica Weekend,en,legal,b79ec1ab-99ac-4552-891e-839067f606fc,Forest
2,https://img.scryfall.com/cards/normal/front/a/...,prw2,RNA Ravnica Weekend,en,legal,abe92f9f-635a-4c76-95cc-163715553057,Forest
3,https://img.scryfall.com/cards/normal/front/8/...,prw2,RNA Ravnica Weekend,en,legal,8b95a588-8dd1-49f7-92d8-34e2237769f2,Mountain
4,https://img.scryfall.com/cards/normal/front/2/...,prw2,RNA Ravnica Weekend,en,legal,280cbdd4-57e1-49a0-a9b0-60b2d9fb9ac9,Mountain


# Removendo sets inúteis

corte em 50 cartas

In [8]:
sets = card_base.groupby('set').count().iloc[:,0]

In [9]:
sets_validos = ['rix', 'grn', 'xln', 'dom', 'm19','prix', 'pgrn', 'pxln', 'pdom', 'pm19']

In [10]:
card_base = card_base[card_base['set'].isin(sets_validos)]

In [11]:
len(card_base)

1690

In [12]:
#card_base.to_csv('card_base.csv')

# Download Image

In [13]:
for i in tqdm(range(len(card_base))):
    url = card_base['img_link'].iloc[i]
    card_name = card_base['id'].iloc[i]
    path = 'card_images_normal/'+card_name+'.jpg'
    urllib.request.urlretrieve(url, path)

100%|██████████████████████████████████████████████████████████████████████████████| 1690/1690 [04:07<00:00,  2.64it/s]


# Creating Train Test Validation folders

In [14]:
import os

imgs = os.listdir('card_images_normal')
info = pd.read_csv('card_images_normal.csv', index_col=0)

info.loc[info['set']=='prix','set'] = 'rix'
info.loc[info['set']=='pgrn','set'] = 'grn'
info.loc[info['set']=='pxln','set'] = 'xln'
info.loc[info['set']=='pdom','set'] = 'dom'
info.loc[info['set']=='pm19','set'] = 'm19'

In [15]:
imgs_aux = []
for img in imgs:
    imgs_aux.append(img[:-4])

In [16]:
info = info[info['id'].isin(imgs_aux)]

In [17]:
from sklearn.model_selection import train_test_split

df_train, df_split = train_test_split(info, test_size=0.3, stratify = info['set'], random_state=42)

df_test, df_validation = train_test_split(df_split, test_size=0.3, stratify = df_split['set'], random_state=42)

In [18]:
print('Train: {}'.format(len(df_train)))
print('Test: {}'.format(len(df_test)))
print('Validation: {}'.format(len(df_validation)))

Train: 1183
Test: 354
Validation: 153


In [19]:
from shutil import copyfile

forders = ['train', 'test', 'validation']
origin = 'card_images_normal/'
destination = 'card_ml/'
for i,df in enumerate([df_train, df_test, df_validation]):
    for img,s in zip(df['id'],df['set']):
        copyfile(origin+img+".jpg", destination+forders[i]+'/'+s+'/'+img+".jpg")