In [1]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

In [2]:
all_cards = pd.read_csv('cleaned_datasets/allsets_cleaned.csv', index_col=0)

In [3]:
all_cards.head()

Unnamed: 0,name,colors,manaValue,manaCost,power,toughness,loyalty,text,rarity,keywords,supertypes,types,subtypes
0,Aerial Modification,W,5.0,{4}{W},,,,Enchant creature or Vehicle\nAs long as enchan...,uncommon,Enchant,,Enchantment,Aura
1,Aeronaut Admiral,W,4.0,{3}{W},3.0,1.0,,Flying\nVehicles you control have flying.,uncommon,Flying,,Creature,"Human, Pilot"
2,Aether Inspector,W,4.0,{3}{W},2.0,3.0,,Vigilance\nWhen Aether Inspector enters the ba...,common,Vigilance,,Creature,"Dwarf, Artificer"
3,Aethergeode Miner,W,2.0,{1}{W},3.0,1.0,,"Whenever Aethergeode Miner attacks, you get {E...",rare,,,Creature,"Dwarf, Scout"
4,Airdrop Aeronauts,W,5.0,{3}{W}{W},4.0,3.0,,Flying\nRevolt — When Airdrop Aeronauts enters...,uncommon,"Flying, Revolt",,Creature,"Dwarf, Scout"


In [4]:
all_cards.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7125 entries, 0 to 7124
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   name        7125 non-null   object 
 1   colors      7125 non-null   object 
 2   manaValue   7125 non-null   float64
 3   manaCost    6617 non-null   object 
 4   power       3867 non-null   float64
 5   toughness   3877 non-null   float64
 6   loyalty     168 non-null    float64
 7   text        7025 non-null   object 
 8   rarity      7125 non-null   object 
 9   keywords    3399 non-null   object 
 10  supertypes  763 non-null    object 
 11  types       7125 non-null   object 
 12  subtypes    4636 non-null   object 
dtypes: float64(4), object(9)
memory usage: 779.3+ KB


NaN values in the `keywords` column are to be expected, as many cards do not have keywords.  We will replace these with empty strings.

In [5]:
all_cards['keywords'].fillna('', inplace=True)

I want to get a dataframe that includes only creatures, but first we have to deal with one edge case:  non-creature cards that contain the "Transform" keyword.  I do this by writing out a function that can extract the mana value for cards that are formatted in the following way: "{B}{B}{2}: Transform".  We find the Transform keyword in the text of the card, then grab the string that immediately precedes it, calculate the additional cost to transform the card and add it to the correct cell (which will, in all cases, be the row immediately following the card with the Transform keyword.  This also fixes the mana value for the back sides of modal cards with the Transform keyword that are both creatures.

In [6]:
all_cards['text'].fillna('', inplace=True)

In [7]:
all_cards.loc[all_cards.keywords.str.contains('Transform', case=False)]

Unnamed: 0,name,colors,manaValue,manaCost,power,toughness,loyalty,text,rarity,keywords,supertypes,types,subtypes
1546,Extricator of Sin,W,3.0,{2}{W},0.0,3.0,,"When Extricator of Sin enters the battlefield,...",uncommon,"Delirium, Transform",,Creature,"Human, Cleric"
1557,Lone Rider,W,2.0,{1}{W},1.0,1.0,,"First strike, lifelink\nAt the beginning of th...",uncommon,"First strike, Lifelink, Transform",,Creature,"Human, Knight"
1579,Curious Homunculus,U,2.0,{1}{U},1.0,1.0,,{T}: Add {C}. Spend this mana only to cast an ...,uncommon,Transform,,Creature,Homunculus
1582,Docent of Perfection,U,5.0,{3}{U}{U},5.0,4.0,,Flying\nWhenever you cast an instant or sorcer...,rare,"Flying, Transform",,Creature,"Insect, Horror"
1590,Grizzled Angler,U,3.0,{2}{U},2.0,3.0,,{T}: Mill two cards. Then if there is a colorl...,uncommon,"Mill, Transform",,Creature,Human
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6809,Primal Wellspring,L,4.0,,,,,(Transforms from Primal Amulet.)\n{T}: Add one...,rare,Transform,,Land,
6814,Thaumatic Compass,C,2.0,{2},,,,"{3}, {T}: Search your library for a basic land...",rare,Transform,,Artifact,
6815,Spires of Orazca,L,2.0,,,,,(Transforms from Thaumatic Compass.)\n{T}: Add...,rare,Transform,,Land,
6816,Treasure Map,C,2.0,{2},,,,"{1}, {T}: Scry 1. Put a landmark counter on Tr...",rare,"Scry, Transform",,Artifact,


In [8]:
def backwards_extract (df, keyword):
    import re
    
    # store the relevant index numbers from the dataframe
    indexes = df.loc[df.text.str.contains(keyword)].index

    # loop over each element to be changed
    for count, text in enumerate(df.loc[df.text.str.contains(keyword)].text):

        # find occurence of keyword
        c = text.find(keyword)

        # capture the text before the keyword
        if c <= 12:
            text_value = text[:c]
        else:
            text_value = text[c-12:c-2]

        # check to see that a curly brace exists before the keyword, confirming
        # that the text directly before it indicates an additional mana cost
        if '}' in text_value:
            text_value = text_value[text_value.find('{'):]

            # remove curly braces, Tap symbols, whitespace, colons
            text_value = re.sub('[,T:\s}{]', '', text_value)            

            # calculate value to use in dataframe 
            new_value = len(text_value) 
            if text_value[0].isdigit():
                new_value += int(text_value[0]) - 1

            # update the value of the NEXT card in the dataframe, the one that
            # our identified cards transform into
            df.iat[indexes[count] + 1, 2] += new_value
            
            # print a list of changes made so we can confirm we got it right
            print(df.iloc[indexes[count] +1, :4])

In [9]:
backwards_extract(all_cards, 'Transform')

name         Conduit of Emrakul
colors                        C
manaValue                   8.0
manaCost                    NaN
Name: 1655, dtype: object
name         Erupting Dreadwolf
colors                        C
manaValue                  10.0
manaCost                    NaN
Name: 1675, dtype: object
name         Dronepack Kindred
colors                       C
manaValue                 12.0
manaCost                   NaN
Name: 1682, dtype: object
name         Sinuous Predator
colors                      C
manaValue                 6.0
manaCost                  NaN
Name: 1698, dtype: object
name         Howling Chorus
colors                    C
manaValue               9.0
manaCost                NaN
Name: 1703, dtype: object
name         Fibrous Entangler
colors                       C
manaValue                 11.0
manaCost                   NaN
Name: 1710, dtype: object
name         Ulvenwald Abomination
colors                           C
manaValue                      9.0
man

In [10]:
creatures = all_cards.loc[all_cards.types.str.contains('Creature')].copy()
creatures.reset_index(drop=True, inplace=True)

In [11]:
creatures.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3845 entries, 0 to 3844
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   name        3845 non-null   object 
 1   colors      3845 non-null   object 
 2   manaValue   3845 non-null   float64
 3   manaCost    3698 non-null   object 
 4   power       3814 non-null   float64
 5   toughness   3824 non-null   float64
 6   loyalty     0 non-null      float64
 7   text        3845 non-null   object 
 8   rarity      3845 non-null   object 
 9   keywords    3845 non-null   object 
 10  supertypes  457 non-null    object 
 11  types       3845 non-null   object 
 12  subtypes    3845 non-null   object 
dtypes: float64(4), object(9)
memory usage: 390.6+ KB


In [12]:
creatures.head()

Unnamed: 0,name,colors,manaValue,manaCost,power,toughness,loyalty,text,rarity,keywords,supertypes,types,subtypes
0,Aeronaut Admiral,W,4.0,{3}{W},3.0,1.0,,Flying\nVehicles you control have flying.,uncommon,Flying,,Creature,"Human, Pilot"
1,Aether Inspector,W,4.0,{3}{W},2.0,3.0,,Vigilance\nWhen Aether Inspector enters the ba...,common,Vigilance,,Creature,"Dwarf, Artificer"
2,Aethergeode Miner,W,2.0,{1}{W},3.0,1.0,,"Whenever Aethergeode Miner attacks, you get {E...",rare,,,Creature,"Dwarf, Scout"
3,Airdrop Aeronauts,W,5.0,{3}{W}{W},4.0,3.0,,Flying\nRevolt — When Airdrop Aeronauts enters...,uncommon,"Flying, Revolt",,Creature,"Dwarf, Scout"
4,Audacious Infiltrator,W,2.0,{1}{W},3.0,1.0,,Audacious Infiltrator can't be blocked by arti...,common,,,Creature,"Dwarf, Rogue"


The column "Loyalty" is a value that is unique to planeswalkers, so we're going to drop that right away.

In [13]:
creatures.drop(['loyalty'], axis=1, inplace=True)

In [14]:
creatures.head()

Unnamed: 0,name,colors,manaValue,manaCost,power,toughness,text,rarity,keywords,supertypes,types,subtypes
0,Aeronaut Admiral,W,4.0,{3}{W},3.0,1.0,Flying\nVehicles you control have flying.,uncommon,Flying,,Creature,"Human, Pilot"
1,Aether Inspector,W,4.0,{3}{W},2.0,3.0,Vigilance\nWhen Aether Inspector enters the ba...,common,Vigilance,,Creature,"Dwarf, Artificer"
2,Aethergeode Miner,W,2.0,{1}{W},3.0,1.0,"Whenever Aethergeode Miner attacks, you get {E...",rare,,,Creature,"Dwarf, Scout"
3,Airdrop Aeronauts,W,5.0,{3}{W}{W},4.0,3.0,Flying\nRevolt — When Airdrop Aeronauts enters...,uncommon,"Flying, Revolt",,Creature,"Dwarf, Scout"
4,Audacious Infiltrator,W,2.0,{1}{W},3.0,1.0,Audacious Infiltrator can't be blocked by arti...,common,,,Creature,"Dwarf, Rogue"


In [15]:
len(creatures[creatures.manaCost.isna()])

147

There are 147 rows with NaN values for manaCost.  We have zero rows that have NaN values in manaValue, though, so these are almost definitely the backs of modal cards that transform through one game mechanic or another.  The amount of resources spent to cast the card is accurately reflected in the manaValue column, though the conditions for flipping the card are not going to be considered at this stage of our investigation.  Still, we'll do the same as we did above and check that this is true of about 10% of the data before dropping the column to make sure we didn't inadvertently introduce any errors.

In [16]:
creatures[creatures.manaCost.isna()].sample(15)

Unnamed: 0,name,colors,manaValue,manaCost,power,toughness,text,rarity,keywords,supertypes,types,subtypes
2848,Wayward Disciple,B,3.0,,2.0,4.0,Whenever Wayward Disciple or another creature ...,uncommon,,,Creature,"Human, Cleric"
2937,Skin Shedder,R,1.0,,3.0,4.0,,uncommon,,,Creature,"Insect, Horror"
1563,Seasoned Cathar,W,5.0,,3.0,3.0,Lifelink,uncommon,Lifelink,,Creature,"Human, Knight"
1639,Awoken Demon,B,1.0,,4.0,4.0,,common,,,Creature,Demon
2858,Perfected Form,U,4.0,,5.0,4.0,Flying,uncommon,Flying,,Creature,"Insect, Horror"
906,Fibrous Entangler,C,11.0,,4.0,6.0,Vigilance\nFibrous Entangler must be blocked i...,uncommon,Vigilance,,Creature,"Eldrazi, Werewolf"
1736,"Olag, Ludevic's Hubris","B, U",2.0,,4.0,4.0,"As this creature transforms into Olag, Ludevic...",rare,Transform,Legendary,Creature,Zombie
2200,Hand of Enlightenment,W,2.0,,2.0,2.0,First strike,common,First strike,,"Enchantment, Creature","Human, Monk"
3383,Lambholt Ravager,R,4.0,,4.0,4.0,"Whenever you cast a noncreature spell, Lambhol...",uncommon,Nightbound,,Creature,Werewolf
1727,Departed Soulkeeper,"U, W",2.0,,3.0,1.0,Flying\nDeparted Soulkeeper can block only cre...,uncommon,Flying,,Creature,Spirit


In checking to make sure I didn't introduce any errors, I realized that the listed Mana Values for cards with the "Disturb" keyword were not reflective of the cost to cast their disturb mode, so I went ahead and built out a function that will let us extract the Disturb cost from the card text and replace that in the mana value field.

In [17]:
def extract_mana_value (df, keyword):
    import re
    
    # store the relevant index numbers from the dataframe
    indexes = df.loc[df.text.str.contains(keyword)].index

    # loop over each element to be changed
    for count, text in enumerate(df.loc[df.text.str.contains(keyword)].text):

        # find occurence of keyword
        c = text.find(keyword)

        # capture the text after the keyword that lists the mana cost of casting 
        # the card, leaving out leading and trailing curly braces.
        text_value = text[c+9:text.rfind('}', c+8, c+20)]

        # remove the rest of the curly braces
        text_value = re.sub('[,T:\s}{]', '', text_value)

        # calculate value to use in dataframe 
        new_value = len(text_value) 
        if text_value[0].isdigit():
            new_value += int(text_value[0]) - 1

        # update the value in the dataframe
        df.iat[indexes[count], 2] = new_value

In [18]:
extract_mana_value(creatures, 'Disturb')

In [19]:
creatures.loc[creatures.text.str.contains('Disturb')].head()

Unnamed: 0,name,colors,manaValue,manaCost,power,toughness,text,rarity,keywords,supertypes,types,subtypes
1564,Beloved Beggar,W,6.0,{1}{W},0.0,4.0,Disturb {4}{W}{W} (You may cast this card from...,uncommon,"Disturb, Transform",,Creature,"Human, Peasant"
1573,Chaplain of Alms,W,4.0,{W},1.0,1.0,First strike\nWard {1} (Whenever this creature...,uncommon,"Disturb, First strike, Transform, Ward",,Creature,"Human, Cleric"
1584,Lunarch Veteran,W,2.0,{W},1.0,1.0,Whenever another creature enters the battlefie...,common,"Disturb, Transform",,Creature,"Human, Cleric"
1586,Mourning Patrol,W,4.0,{2}{W},2.0,3.0,Vigilance\nDisturb {3}{W} (You may cast this c...,common,"Disturb, Transform, Vigilance",,Creature,"Human, Soldier"
1595,Baithook Angler,U,2.0,{1}{U},2.0,1.0,Disturb {1}{U} (You may cast this card from yo...,common,"Disturb, Transform",,Creature,"Human, Peasant"


We have two exceptions with cards that come into play as lands and then can later be flipped into creatures.  I'll go ahead and manually set reasonable manaValues for those two, since it's such a small number.

In [20]:
creatures[creatures.manaCost.isna()].query('manaValue == 0')

Unnamed: 0,name,colors,manaValue,manaCost,power,toughness,text,rarity,keywords,supertypes,types,subtypes
1750,Creeping Inn,B,0.0,,3.0,7.0,"Whenever Creeping Inn attacks, you may exile a...",mythic,,,"Artifact, Creature","Horror, Construct"
2989,"Ormendahl, Profane Prince",B,0.0,,9.0,7.0,"Flying, lifelink, indestructible, haste",rare,"Flying, Haste, Indestructible, Lifelink",Legendary,Creature,Demon


In [21]:
creatures.iat[2989, 2] = 5
creatures.iat[1750, 2] = 3

In [22]:
creatures.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3845 entries, 0 to 3844
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   name        3845 non-null   object 
 1   colors      3845 non-null   object 
 2   manaValue   3845 non-null   float64
 3   manaCost    3698 non-null   object 
 4   power       3814 non-null   float64
 5   toughness   3824 non-null   float64
 6   text        3845 non-null   object 
 7   rarity      3845 non-null   object 
 8   keywords    3845 non-null   object 
 9   supertypes  457 non-null    object 
 10  types       3845 non-null   object 
 11  subtypes    3845 non-null   object 
dtypes: float64(3), object(9)
memory usage: 360.6+ KB


In [23]:
creatures[creatures.manaCost.isna()].sample(15)

Unnamed: 0,name,colors,manaValue,manaCost,power,toughness,text,rarity,keywords,supertypes,types,subtypes
1635,Leeching Lurker,B,3.0,,4.0,4.0,Lifelink\nNightbound (If a player casts at lea...,rare,"Lifelink, Nightbound",,Creature,"Leech, Horror"
2830,"Avacyn, the Purifier",R,5.0,,6.0,5.0,Flying\nWhen this creature transforms into Ava...,mythic,"Flying, Transform",Legendary,Creature,Angel
2832,Lunarch Inquisitors,W,4.0,,4.0,4.0,When this creature transforms into Lunarch Inq...,uncommon,Transform,,Creature,"Human, Cleric"
1672,Harvesttide Assailant,R,3.0,,4.0,4.0,Trample\nNightbound (If a player casts at leas...,common,"Nightbound, Trample",,Creature,Werewolf
2922,Vildin-Pack Alpha,R,3.0,,4.0,3.0,Whenever a Werewolf enters the battlefield und...,rare,Transform,,Creature,Werewolf
1733,Lord of the Ulvenwald,"G, R",2.0,,3.0,3.0,Other Wolves and Werewolves you control get +1...,uncommon,Nightbound,,Creature,Werewolf
3284,Cackling Culprit,B,2.0,,3.0,5.0,Whenever Cackling Culprit or another creature ...,uncommon,,,Creature,"Human, Rogue"
3317,Cipherbound Spirit,U,2.0,,3.0,2.0,Flying\nCipherbound Spirit can block only crea...,uncommon,Flying,,Creature,Spirit
2913,Neck Breaker,R,3.0,,4.0,3.0,Attacking creatures you control get +1/+0 and ...,uncommon,Transform,,Creature,Werewolf
2257,Animus of Night's Reach,B,4.0,,0.0,4.0,Menace (This creature can't be blocked except ...,uncommon,Menace,,"Enchantment, Creature",Spirit


At this point, we should have dealt with most or all of the incorrectly assigned manaValue creatures, though we are not accounting for non-mana costs.  Doing so would be significantly more complicated and goes beyond the scope of the current investigation, so I'm happy to move on to other things that need to be corrected.  At this point, we should not need the manaCost column any longer, so I'll drop it.

In [24]:
creatures.drop(columns='manaCost', inplace=True)

In [25]:
creatures.head()

Unnamed: 0,name,colors,manaValue,power,toughness,text,rarity,keywords,supertypes,types,subtypes
0,Aeronaut Admiral,W,4.0,3.0,1.0,Flying\nVehicles you control have flying.,uncommon,Flying,,Creature,"Human, Pilot"
1,Aether Inspector,W,4.0,2.0,3.0,Vigilance\nWhen Aether Inspector enters the ba...,common,Vigilance,,Creature,"Dwarf, Artificer"
2,Aethergeode Miner,W,2.0,3.0,1.0,"Whenever Aethergeode Miner attacks, you get {E...",rare,,,Creature,"Dwarf, Scout"
3,Airdrop Aeronauts,W,5.0,4.0,3.0,Flying\nRevolt — When Airdrop Aeronauts enters...,uncommon,"Flying, Revolt",,Creature,"Dwarf, Scout"
4,Audacious Infiltrator,W,2.0,3.0,1.0,Audacious Infiltrator can't be blocked by arti...,common,,,Creature,"Dwarf, Rogue"


There is a small subset of creatures whose power and/or toughness is listed as '*'.  This means that those values are variable and dependent on game-state conditions.  We've only got 34 of these creatures in our dataset of almost 4,000, comprising less than 1% of the creatures.  Rather than spend an inordinate amount of time accounting for these edge cases, I choose to drop them from the dataframe.

In [26]:
len(creatures[creatures.power.isnull() | creatures.toughness.isnull()])

34

In [27]:
creatures.drop(creatures[creatures.power.isnull() | creatures.toughness.isnull()].index, inplace=True)
creatures.reset_index(drop=True, inplace=True)

In [28]:
creatures.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3811 entries, 0 to 3810
Data columns (total 11 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   name        3811 non-null   object 
 1   colors      3811 non-null   object 
 2   manaValue   3811 non-null   float64
 3   power       3811 non-null   float64
 4   toughness   3811 non-null   float64
 5   text        3811 non-null   object 
 6   rarity      3811 non-null   object 
 7   keywords    3811 non-null   object 
 8   supertypes  446 non-null    object 
 9   types       3811 non-null   object 
 10  subtypes    3811 non-null   object 
dtypes: float64(3), object(8)
memory usage: 327.6+ KB


## To do: Move this Venture change to initial cleaning on combined_df

In [29]:
creatures[creatures.keywords.str.contains('Venture', case=False)].head()

Unnamed: 0,name,colors,manaValue,power,toughness,text,rarity,keywords,supertypes,types,subtypes
104,Cloister Gargoyle,W,3.0,0.0,4.0,"When Cloister Gargoyle enters the battlefield,...",uncommon,Venture into the dungeon,,"Artifact, Creature",Gargoyle
115,Keen-Eared Sentry,W,2.0,2.0,1.0,You have hexproof. (You can't be the target of...,uncommon,Venture into the dungeon,,Creature,"Human, Soldier"
119,"Nadaar, Selfless Paladin",W,3.0,3.0,3.0,"Vigilance\nWhenever Nadaar, Selfless Paladin e...",rare,"Venture into the dungeon, Vigilance",Legendary,Creature,"Dragon, Knight"
121,Planar Ally,W,5.0,3.0,3.0,"Flying\nWhenever Planar Ally attacks, venture ...",common,"Flying, Venture into the dungeon",,Creature,Angel
123,Ranger's Hawk,W,1.0,1.0,1.0,"Flying\n{3}, {T}, Tap another untapped creatur...",common,"Flying, Venture into the dungeon",,Creature,Bird


In [30]:
creatures.keywords.replace(to_replace='Venture into the dungeon', value='Venture', regex=True, inplace=True)

In [31]:
creatures[creatures.keywords.str.contains('Venture', case=False)].head()

Unnamed: 0,name,colors,manaValue,power,toughness,text,rarity,keywords,supertypes,types,subtypes
104,Cloister Gargoyle,W,3.0,0.0,4.0,"When Cloister Gargoyle enters the battlefield,...",uncommon,Venture,,"Artifact, Creature",Gargoyle
115,Keen-Eared Sentry,W,2.0,2.0,1.0,You have hexproof. (You can't be the target of...,uncommon,Venture,,Creature,"Human, Soldier"
119,"Nadaar, Selfless Paladin",W,3.0,3.0,3.0,"Vigilance\nWhenever Nadaar, Selfless Paladin e...",rare,"Venture, Vigilance",Legendary,Creature,"Dragon, Knight"
121,Planar Ally,W,5.0,3.0,3.0,"Flying\nWhenever Planar Ally attacks, venture ...",common,"Flying, Venture",,Creature,Angel
123,Ranger's Hawk,W,1.0,1.0,1.0,"Flying\n{3}, {T}, Tap another untapped creatur...",common,"Flying, Venture",,Creature,Bird


In [32]:
creatures = pd.concat([creatures, creatures.keywords.str.get_dummies(sep=", ")], axis=1)

In [33]:
creatures.head()

Unnamed: 0,name,colors,manaValue,power,toughness,text,rarity,keywords,supertypes,types,...,Support,Surge,Surveil,Training,Trample,Transform,Undergrowth,Venture,Vigilance,Ward
0,Aeronaut Admiral,W,4.0,3.0,1.0,Flying\nVehicles you control have flying.,uncommon,Flying,,Creature,...,0,0,0,0,0,0,0,0,0,0
1,Aether Inspector,W,4.0,2.0,3.0,Vigilance\nWhen Aether Inspector enters the ba...,common,Vigilance,,Creature,...,0,0,0,0,0,0,0,0,1,0
2,Aethergeode Miner,W,2.0,3.0,1.0,"Whenever Aethergeode Miner attacks, you get {E...",rare,,,Creature,...,0,0,0,0,0,0,0,0,0,0
3,Airdrop Aeronauts,W,5.0,4.0,3.0,Flying\nRevolt — When Airdrop Aeronauts enters...,uncommon,"Flying, Revolt",,Creature,...,0,0,0,0,0,0,0,0,0,0
4,Audacious Infiltrator,W,2.0,3.0,1.0,Audacious Infiltrator can't be blocked by arti...,common,,,Creature,...,0,0,0,0,0,0,0,0,0,0


In [34]:
creatures.shape

(3811, 95)

In [35]:
creatures.columns

Index(['name', 'colors', 'manaValue', 'power', 'toughness', 'text', 'rarity',
       'keywords', 'supertypes', 'types', 'subtypes', 'Adamant', 'Adapt',
       'Afflict', 'Afterlife', 'Alliance', 'Amass', 'Ascend', 'Blitz', 'Boast',
       'Changeling', 'Channel', 'Cohort', 'Companion', 'Connive',
       'Constellation', 'Converge', 'Convoke', 'Coven', 'Cycling', 'Daybound',
       'Deathtouch', 'Defender', 'Delirium', 'Devoid', 'Disturb',
       'Double strike', 'Embalm', 'Emerge', 'Enrage', 'Equip', 'Escape',
       'Eternalize', 'Exert', 'Exploit', 'Explore', 'Fabricate', 'Fight',
       'First strike', 'Flash', 'Flying', 'Foretell', 'Haste', 'Hexproof',
       'Hexproof from', 'Improvise', 'Indestructible', 'Ingest', 'Investigate',
       'Kicker', 'Landfall', 'Learn', 'Lifelink', 'Madness', 'Magecraft',
       'Meld', 'Menace', 'Mentor', 'Mill', 'Mutate', 'Nightbound', 'Ninjutsu',
       'Pack tactics', 'Proliferate', 'Protection', 'Prowess', 'Raid', 'Rally',
       'Reach', 'Recon

In [36]:
creatures.types.unique()

array(['Creature', 'Artifact, Creature', 'Enchantment, Creature'],
      dtype=object)

In [37]:
creatures = pd.concat([creatures, creatures.types.str.get_dummies(sep=", ")], axis=1)

In [38]:
creatures.head()

Unnamed: 0,name,colors,manaValue,power,toughness,text,rarity,keywords,supertypes,types,...,Training,Trample,Transform,Undergrowth,Venture,Vigilance,Ward,Artifact,Creature,Enchantment
0,Aeronaut Admiral,W,4.0,3.0,1.0,Flying\nVehicles you control have flying.,uncommon,Flying,,Creature,...,0,0,0,0,0,0,0,0,1,0
1,Aether Inspector,W,4.0,2.0,3.0,Vigilance\nWhen Aether Inspector enters the ba...,common,Vigilance,,Creature,...,0,0,0,0,0,1,0,0,1,0
2,Aethergeode Miner,W,2.0,3.0,1.0,"Whenever Aethergeode Miner attacks, you get {E...",rare,,,Creature,...,0,0,0,0,0,0,0,0,1,0
3,Airdrop Aeronauts,W,5.0,4.0,3.0,Flying\nRevolt — When Airdrop Aeronauts enters...,uncommon,"Flying, Revolt",,Creature,...,0,0,0,0,0,0,0,0,1,0
4,Audacious Infiltrator,W,2.0,3.0,1.0,Audacious Infiltrator can't be blocked by arti...,common,,,Creature,...,0,0,0,0,0,0,0,0,1,0


In [39]:
creatures.shape

(3811, 98)

Note to self:  Creature column should be 100% 1.  Probably need to drop it.

In [40]:
creatures.supertypes.unique()

array([nan, 'Legendary', 'Snow', 'Legendary, Snow'], dtype=object)

In [41]:
creatures = pd.concat([creatures, creatures.supertypes.str.get_dummies(sep=", ")], axis=1)

In [42]:
creatures.shape

(3811, 100)

In [43]:
creatures.subtypes.unique()

array(['Human, Pilot', 'Dwarf, Artificer', 'Dwarf, Scout', 'Dwarf, Rogue',
       'Dwarf, Soldier', 'Bird', 'Dwarf, Warrior', 'Angel', 'Cat, Beast',
       'Dwarf, Advisor', 'Vedalken, Artificer', 'Whale', 'Human, Wizard',
       'Drake', 'Human, Artificer', 'Crab', 'Vedalken, Rogue', 'Fish',
       'Human, Pirate', 'Aetherborn, Rogue', 'Aetherborn, Artificer',
       'Insect', 'Human, Rogue', 'Aetherborn, Vampire', 'Demon',
       'Aetherborn, Warrior', 'Human, Warrior', 'Giant', 'Dragon',
       'Lizard', 'Gremlin', 'Elf, Artificer, Druid', 'Cat', 'Elf, Druid',
       'Elephant', 'Elf, Warrior', 'Human, Druid', 'Pangolin, Beast',
       'Cat, Monkey', 'Elf, Archer', 'Boar', 'Dwarf, Pilot',
       'Human, Soldier', 'Snake', 'Construct', 'Juggernaut',
       'Assembly-Worker', 'Thopter', 'Shapeshifter', 'Elf, Soldier',
       'Golem', 'Pegasus', 'Dog', 'Unicorn', 'Gargoyle', 'Human, Cleric',
       'Orc, Knight', 'Human, Monk', 'Jellyfish', 'Dwarf, Ranger',
       'Spirit, Knight', 'Hu

In [44]:
creatures = pd.concat([creatures, creatures.subtypes.str.get_dummies(sep=", ")], axis=1)

In [45]:
creatures.shape

(3811, 291)

In [46]:
creatures.head()

Unnamed: 0,name,colors,manaValue,power,toughness,text,rarity,keywords,supertypes,types,...,Weird,Werewolf,Whale,Wizard,Wolf,Wolverine,Worm,Wurm,Yeti,Zombie
0,Aeronaut Admiral,W,4.0,3.0,1.0,Flying\nVehicles you control have flying.,uncommon,Flying,,Creature,...,0,0,0,0,0,0,0,0,0,0
1,Aether Inspector,W,4.0,2.0,3.0,Vigilance\nWhen Aether Inspector enters the ba...,common,Vigilance,,Creature,...,0,0,0,0,0,0,0,0,0,0
2,Aethergeode Miner,W,2.0,3.0,1.0,"Whenever Aethergeode Miner attacks, you get {E...",rare,,,Creature,...,0,0,0,0,0,0,0,0,0,0
3,Airdrop Aeronauts,W,5.0,4.0,3.0,Flying\nRevolt — When Airdrop Aeronauts enters...,uncommon,"Flying, Revolt",,Creature,...,0,0,0,0,0,0,0,0,0,0
4,Audacious Infiltrator,W,2.0,3.0,1.0,Audacious Infiltrator can't be blocked by arti...,common,,,Creature,...,0,0,0,0,0,0,0,0,0,0


In [47]:
creatures = pd.concat([creatures, creatures.colors.str.get_dummies(sep=", ")], axis=1)

In [48]:
creatures.shape

(3811, 297)

In [49]:
creatures.colors.unique()

array(['W', 'U', 'B', 'R', 'G', 'R, U', 'G, R', 'G, W', 'R, W', 'G, U',
       'U, W', 'B, R', 'B, G', 'C', 'B, W', 'B, U', 'G, R, W',
       'B, G, R, U, W', 'B, G, R', 'R, U, W', 'B, G, U', 'W, G',
       'G, R, U', 'B, G, W', 'B, R, W', 'G, U, W', 'B, U, W', 'B, R, U',
       'G, R, U, W'], dtype=object)

In [50]:
creatures_transformed = creatures.drop(columns=['colors', 'text', 'rarity', 'keywords', 'supertypes', 'types', 'subtypes'])

In [51]:
creatures_transformed.head()

Unnamed: 0,name,manaValue,power,toughness,Adamant,Adapt,Afflict,Afterlife,Alliance,Amass,...,Worm,Wurm,Yeti,Zombie,B,C,G,R,U,W
0,Aeronaut Admiral,4.0,3.0,1.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,Aether Inspector,4.0,2.0,3.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,Aethergeode Miner,2.0,3.0,1.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,Airdrop Aeronauts,5.0,4.0,3.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,Audacious Infiltrator,2.0,3.0,1.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [52]:
creatures_transformed.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3811 entries, 0 to 3810
Data columns (total 290 columns):
 #    Column           Dtype  
---   ------           -----  
 0    name             object 
 1    manaValue        float64
 2    power            float64
 3    toughness        float64
 4    Adamant          int64  
 5    Adapt            int64  
 6    Afflict          int64  
 7    Afterlife        int64  
 8    Alliance         int64  
 9    Amass            int64  
 10   Ascend           int64  
 11   Blitz            int64  
 12   Boast            int64  
 13   Changeling       int64  
 14   Channel          int64  
 15   Cohort           int64  
 16   Companion        int64  
 17   Connive          int64  
 18   Constellation    int64  
 19   Converge         int64  
 20   Convoke          int64  
 21   Coven            int64  
 22   Cycling          int64  
 23   Daybound         int64  
 24   Deathtouch       int64  
 25   Defender         int64  
 26   Delirium         i

In [53]:
creatures_transformed.describe()

Unnamed: 0,manaValue,power,toughness,Adamant,Adapt,Afflict,Afterlife,Alliance,Amass,Ascend,...,Worm,Wurm,Yeti,Zombie,B,C,G,R,U,W
count,3811.0,3811.0,3811.0,3811.0,3811.0,3811.0,3811.0,3811.0,3811.0,3811.0,...,3811.0,3811.0,3811.0,3811.0,3811.0,3811.0,3811.0,3811.0,3811.0,3811.0
mean,3.425872,2.67174,2.893466,0.001837,0.003149,0.002362,0.002624,0.002624,0.002362,0.003674,...,0.000262,0.004723,0.000262,0.045395,0.214117,0.072684,0.231698,0.219365,0.196536,0.228811
std,1.595746,1.592168,1.598453,0.042824,0.056033,0.048545,0.051164,0.051164,0.048545,0.060507,...,0.016199,0.068572,0.016199,0.208196,0.410262,0.259652,0.421972,0.413871,0.397431,0.420123
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,3.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,4.0,3.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,15.0,13.0,17.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


This is looking like we could actually start to use it for some machine learning analysis, so I'm going to export it to a CSV and pick up in a new file.

In [54]:
creatures_transformed.to_csv('cleaned_datasets/creatures_transformed.csv')