In [1]:
import numpy as np
import pandas as pd
import requests
import re
from datetime import datetime
import warnings
warnings.filterwarnings("ignore")

with open('cardinfo.json', 'w') as f:
    f.write(requests.get('https://db.ygoprodeck.com/api/v7/cardinfo.php?misc=yes').text)

In [2]:
cards = pd.read_json("cardinfo.json")
# Eval the json data into a functioning dataframe
cards = cards['data'].apply(lambda x: pd.Series(eval(str(x)))).drop(columns=['archetype', 'ygoprodeck_url', 'card_images', 'card_sets', 'card_prices'])
cards['formats'] = cards['misc_info'].apply(lambda x: x[0].get('formats'))
cards = cards.drop(columns=['misc_info'])

# Trim all of the text not used for determining percentiles
cards['trimmed_text'] = cards['desc'].apply(lambda x: re.sub(
    # Gemini Summoning                                           # Ritual Summoning                                                                                                                                  # Statlines             # Card types                        # Monster Card types                                                                                                         # Others
    "This card is treated as a Normal Monster while.*(:|\.)\r?\n●|You can Ritual Summon this card with (a |any )?\"[^\.]+\"( Ritual Spell Card| card)?\.|This (monster|card) can only be Ritual Summoned with [^\.]+.| \(([^\)]+/){4}[^\)]+\)| \(Monster, Spell, (and/)?or Trap\)| \((among )?((Ritual|Fusion|Synchro|Xyz|Pendulum|Link)(, (and |or |and/or )?|/))+(Ritual|Fusion|Synchro|Xyz|Pendulum|Link)\)| \(but (its|their) effects can still be activated\)| \(when this card resolves\)| \(but you can (Normal )?Set\)|\[ Pendulum Effect \] \r?\n|\(This card[^\)]+\)\r?\n?", "", x))
cards['trimmed_text'] = cards.apply(lambda x: re.sub("(.+\r?\n?)+", "", x.trimmed_text) if x.frameType == 'normal' else x.trimmed_text, axis=1)
cards['trimmed_text'] = cards.apply(lambda x: re.sub(x['name'], "CARDNAME", x.trimmed_text), axis=1)
cards['trimmed_text'] = cards.apply(lambda x: "" if (sum([x.frameType == i for i in ['fusion', 'link', 'xyz', 'synchro']]) and not ('\n' in x.trimmed_text or ' / ' in x.trimmed_text) and not re.search("\.\Z", x.trimmed_text)) else x.trimmed_text, axis=1)
cards['trimmed_text'] = cards.apply(lambda x: re.sub("^[^\n/]+(\n| / )", "", x.trimmed_text) if (sum([x.frameType == i for i in ['fusion', 'link', 'xyz', 'synchro']])) else x.trimmed_text, axis=1)
cards['trimmed_text'] = cards.apply(lambda x: re.sub("\[ Monster Effect \] \r?\n[^\n/]+(\n / )", "", x.trimmed_text) if (sum([x.frameType == i for i in ['fusion_pendulum', 'xyz_pendulum', 'synchro_pendulum']])) else x.trimmed_text, axis=1)
cards['trimmed_text'] = cards.apply(lambda x: re.sub("\r?\n\[[^\]]*\] \r?\n[\s\S]*\Z", "", x.trimmed_text) if x.frameType == 'normal_pendulum' else x.trimmed_text, axis=1)
cards['trimmed_text'] = cards['trimmed_text'].apply(lambda x: re.sub("\[ Monster Effect \] \r?\n", "", x))
cards['trimmed_text'] = cards['trimmed_text'].apply(lambda x: re.sub("Graveyard", "GY", x))
cards['trimmed_text'] = cards['trimmed_text'].apply(lambda x: re.sub("^(\r?\n)+\Z|^ ", "", x))
cards['chars'] = cards['trimmed_text'].apply(lambda x: len(x))
cards['words'] = cards['trimmed_text'].apply(lambda x: len(re.findall(" |\n", x))+1)
cards.head()

Unnamed: 0,id,name,type,humanReadableCardType,frameType,desc,race,typeline,atk,def,...,linkval,linkmarkers,pend_desc,monster_desc,scale,banlist_info,formats,trimmed_text,chars,words
0,34541863,"""A"" Cell Breeding Device",Spell Card,Continuous Spell,spell,"During each of your Standby Phases, put 1 A-Co...",Continuous,,,,...,,,,,,,"[Duel Links, Common Charity, Edison, TCG, OCG,...","During each of your Standby Phases, put 1 A-Co...",96,16
1,64163367,"""A"" Cell Incubator",Spell Card,Continuous Spell,spell,Each time an A-Counter(s) is removed from play...,Continuous,,,,...,,,,,,,"[Duel Links, Common Charity, Edison, TCG, OCG,...",Each time an A-Counter(s) is removed from play...,188,32
2,91231901,"""A"" Cell Recombination Device",Spell Card,Quick-Play Spell,spell,Target 1 face-up monster on the field; send 1 ...,Quick-Play,,,,...,,,,,,,"[Duel Links, Common Charity, TCG, OCG, Master ...",Target 1 face-up monster on the field; send 1 ...,335,66
3,73262676,"""A"" Cell Scatter Burst",Spell Card,Quick-Play Spell,spell,"Select 1 face-up ""Alien"" monster you control. ...",Quick-Play,,,,...,,,,,,,"[Duel Links, Common Charity, Edison, TCG, OCG,...","Select 1 face-up ""Alien"" monster you control. ...",145,22
4,80181649,"""Case of K9""",Spell Card,Continuous Spell,spell,"When this card is activated: You can add 1 ""K9...",Continuous,,,,...,,,,,,,[OCG],"When this card is activated: You can add 1 ""K9...",420,82


In [3]:
# Identify the 25th percentile cutoff points
charcut = np.percentile(cards[cards['formats'].apply(lambda x: 'TCG' in x)][cards.chars>0].chars, 25)
wordcut = np.percentile(cards[cards['formats'].apply(lambda x: 'TCG' in x)][cards.chars>0].words, 25)
cards[cards['formats'].apply(lambda x: 'TCG' in x)][cards.chars>0].drop(columns=cards.columns[:-2]).describe()

Unnamed: 0,chars,words
count,12549.0,12549.0
mean,296.997052,53.600128
std,144.265991,26.158073
min,12.0,3.0
25%,171.0,31.0
50%,293.0,53.0
75%,413.0,75.0
max,937.0,176.0


In [4]:
# Current 25th banlist
banlist = {4031928: 0, 17375316: 0, 74157028: 0, 44763025: 0, 23557835: 0, 78706415: 0, 79571449: 0, 18144507: 0, 23924608: 0, 61740673: 0, 83764719: 0, 55144522: 0, 12580477: 0, 93016201: 0, 73915051: 0, 45986603: 0, 40605147: 0, 42829885: 0, 43898403: 0, 80604092: 0, 57953380: 1, 60682203: 1, 53129443: 1, 42703248: 1, 75500286: 1, 19613556: 1, 37520316: 1, 44095762: 1, 33508719: 1, 33782437: 1, 67284107: 1, 41420027: 1, 6983839: 1, 53582587: 1, 94192409: 2, 48686504: 2, 67169062: 2, 10028593: 2, 32807846: 2, 5851097: 2, 9064354: 0}

# Map the banlist to the list of legal cards
legals = cards[cards['formats'].apply(lambda x: 'TCG' in x)][cards.chars<=charcut][cards.words<=wordcut]
legals['desc'] = legals['desc'].apply(lambda x: re.sub("\r?\n", " ", x))
legals['limit'] = legals['id'].apply(lambda x: banlist.get(x) if x in banlist.keys() else 3)

# Trim some extraneous information
legals['level'] = legals.apply(lambda x: x.linkval if not np.isnan(x.linkval) else x.level, axis=1)
legals = legals[legals.frameType != 'token']
legals = legals.drop(columns=['frameType', 'pend_desc', 'monster_desc', 'banlist_info', 'scale', 'linkval', 'linkmarkers'])
legals = legals[['id', 'limit', 'name', 'type', 'race', 'attribute', 'level', 'atk', 'def', 'desc']]

# Save to a CSV
# legals.to_csv("25th.csv", index=False)

# Generate the CONF file
with open('25th.conf', 'w') as f:
    f.write('\n'.join(["#[%s 25th]" % datetime.today().strftime("%Y.%m"), "!%s 25th" % datetime.today().strftime("%Y.%m"), '$whitelist\n\n']))
    f.write('\n'.join(list(legals.apply(lambda x: " ".join([str(x.id), str(x.limit), "--%s" % x['name']]).replace('\u03b1', 'a'), axis=1))))

legals.head()

Unnamed: 0,id,limit,name,type,race,attribute,level,atk,def,desc
0,34541863,3,"""A"" Cell Breeding Device",Spell Card,Continuous,,,,,"During each of your Standby Phases, put 1 A-Co..."
3,73262676,3,"""A"" Cell Scatter Burst",Spell Card,Quick-Play,,,,,"Select 1 face-up ""Alien"" monster you control. ..."
10,86988864,3,3-Hump Lacooda,Effect Monster,Beast,EARTH,3.0,500.0,1500.0,"If there are 3 face-up ""3-Hump Lacooda"" cards ..."
12,83994646,3,4-Starred Ladybug of Doom,Flip Effect Monster,Insect,WIND,3.0,800.0,1200.0,FLIP: Destroy all Level 4 monsters your oppone...
14,23771716,3,7 Colored Fish,Normal Monster,Fish,WATER,4.0,1800.0,800.0,A rare rainbow fish that has never been caught...
