In [1]:
# import modules
import pandas as pd 
from pathlib import Path
import matplotlib.pyplot as plt
import scipy.stats as st
from scipy.stats import linregress
import numpy as np
import random
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")


In [2]:
# import csv into pandas file
ARN_csv = Path('resources/ARN.csv')
ARN_df = pd.read_csv(ARN_csv)
ARN_df.head()

Unnamed: 0,setCode,rarity,manaCost,colorIdentity,types,subtypes,power,toughness
0,ARN,uncommon,{W},W,Creature,Human,0,1
1,ARN,common,{1}{W}{W},W,Instant,0,0,0
2,ARN,common,{1}{W}{W},W,Instant,0,0,0
3,ARN,common,{W},W,Creature,Camel,0,1
4,ARN,uncommon,{W}{W},W,Instant,0,0,0


In [3]:
# drop type, power, toughness, mana and color 
ARN_df = ARN_df.drop(['manaCost', 'colorIdentity', 'types', 'power', 'toughness'], axis=1)
ARN_df.head()

Unnamed: 0,setCode,rarity,subtypes
0,ARN,uncommon,Human
1,ARN,common,0
2,ARN,common,0
3,ARN,common,Camel
4,ARN,uncommon,0


In [4]:
rarities_count = ARN_df['rarity'].value_counts()
print(rarities_count)

common      41
rare        33
uncommon    18
Name: rarity, dtype: int64


In [5]:
ARN_df['subtype_category'] = 'Other'  # Initialize a new column with 'Other' as default

# Update the category based on specific subtypes
ARN_df.loc[ARN_df['subtypes'].str.contains('Angel', case=False), 'subtype_category'] = 'Angel'
ARN_df.loc[ARN_df['subtypes'].str.contains('Zombie', case=False), 'subtype_category'] = 'Zombie'
ARN_df.loc[ARN_df['subtypes'].str.contains('Human', case=False), 'subtype_category'] = 'Human'

# Count the occurrences of each subtype category
subtype_counts = ARN_df['subtype_category'].value_counts()
print(subtype_counts)

Other     72
Human     19
Zombie     1
Name: subtype_category, dtype: int64


In [6]:
ARN_df = ARN_df.drop('subtypes', axis=1)
ARN_df

Unnamed: 0,setCode,rarity,subtype_category
0,ARN,uncommon,Human
1,ARN,common,Other
2,ARN,common,Other
3,ARN,common,Other
4,ARN,uncommon,Other
...,...,...,...
87,ARN,rare,Other
88,ARN,rare,Other
89,ARN,uncommon,Other
90,ARN,common,Other


In [7]:
ARN_df = ARN_df.assign(Common=41, Rare=33, Uncommon=18, Mythical=0)
ARN_df = ARN_df.assign(Angle = 0, Other = 72, Human = 19, Zombies = 1)

In [8]:
ARN_df.head()

Unnamed: 0,setCode,rarity,subtype_category,Common,Rare,Uncommon,Mythical,Angle,Other,Human,Zombies
0,ARN,uncommon,Human,41,33,18,0,0,72,19,1
1,ARN,common,Other,41,33,18,0,0,72,19,1
2,ARN,common,Other,41,33,18,0,0,72,19,1
3,ARN,common,Other,41,33,18,0,0,72,19,1
4,ARN,uncommon,Other,41,33,18,0,0,72,19,1


In [9]:
CSP_csv = Path('resources/CSP.csv')
CSP_df = pd.read_csv(CSP_csv)
CSP_df = CSP_df.drop(['manaCost', 'colorIdentity', 'types', 'power', 'toughness'], axis=1)
CSP_df.head()
rarities_count = CSP_df['rarity'].value_counts()
print(rarities_count)
CSP_df['subtype_category'] = 'Other'  # Initialize a new column with 'Other' as default

# Update the category based on specific subtypes
CSP_df.loc[CSP_df['subtypes'].str.contains('Angel', case=False), 'subtype_category'] = 'Angel'
CSP_df.loc[CSP_df['subtypes'].str.contains('Zombie', case=False), 'subtype_category'] = 'Zombie'
CSP_df.loc[CSP_df['subtypes'].str.contains('Human', case=False), 'subtype_category'] = 'Human'

# Count the occurrences of each subtype category
subtype_counts = CSP_df['subtype_category'].value_counts()
print(subtype_counts)
CSP_df = CSP_df.assign(Common=60, Rare=40, Uncommon=55, Mythical=0)
CSP_df = CSP_df.assign(Angle = 1, Other = 123, Human = 24, Zombies = 7)
CSP_df.head()

common      60
uncommon    55
rare        40
Name: rarity, dtype: int64
Other     123
Human      24
Zombie      7
Angel       1
Name: subtype_category, dtype: int64


Unnamed: 0,setCode,rarity,subtypes,subtype_category,Common,Rare,Uncommon,Mythical,Angle,Other,Human,Zombies
0,CSP,rare,Angel,Angel,60,40,55,0,1,123,24,7
1,CSP,common,Griffin,Other,60,40,55,0,1,123,24,7
2,CSP,rare,0,Other,60,40,55,0,1,123,24,7
3,CSP,rare,"Human, Soldier",Human,60,40,55,0,1,123,24,7
4,CSP,rare,"Human, Soldier",Human,60,40,55,0,1,123,24,7


In [10]:
KHM_csv = Path('resources/KHM.csv')
KHM_df = pd.read_csv(KHM_csv)
KHM_df = KHM_df.drop(['manaCost', 'colorIdentity', 'types', 'power', 'toughness'], axis=1)
KHM_df.head()
rarities_count = KHM_df['rarity'].value_counts()
print(rarities_count)
CSP_df['subtype_category'] = 'Other'  # Initialize a new column with 'Other' as default

# Update the category based on specific subtypes
KHM_df['subtype_category'] = 'Other'
KHM_df.loc[KHM_df['subtypes'].str.contains('Angel', case=False), 'subtype_category'] = 'Angel'
KHM_df.loc[KHM_df['subtypes'].str.contains('Zombie', case=False), 'subtype_category'] = 'Zombie'
KHM_df.loc[KHM_df['subtypes'].str.contains('Human', case=False), 'subtype_category'] = 'Human'
subtype_counts = KHM_df['subtype_category'].value_counts()
print(subtype_counts)


rare        152
common      130
uncommon    119
mythic       59
Name: rarity, dtype: int64
Other     407
Human      23
Angel      18
Zombie     12
Name: subtype_category, dtype: int64


In [11]:
KHM_df = KHM_df.assign(Common=130, Rare=152, Uncommon=119, Mythical=59)
KHM_df = KHM_df.assign(Angle = 18, Other = 407, Human = 23, Zombies = 12)
KHM_df.head()

Unnamed: 0,setCode,rarity,subtypes,subtype_category,Common,Rare,Uncommon,Mythical,Angle,Other,Human,Zombies
0,KHM,common,"Dwarf, Warrior",Other,130,152,119,59,18,407,23,12
1,KHM,uncommon,"Human, Warrior",Human,130,152,119,59,18,407,23,12
2,KHM,common,Bird,Other,130,152,119,59,18,407,23,12
3,KHM,common,"Human, Warrior",Human,130,152,119,59,18,407,23,12
4,KHM,common,Aura,Other,130,152,119,59,18,407,23,12


In [12]:
MID_csv = Path('resources/MID.csv')
MID_df = pd.read_csv(MID_csv)
MID_df = MID_df.drop(['manaCost', 'colorIdentity', 'types', 'power', 'toughness'], axis=1)
MID_df.head()
rarities_count = MID_df['rarity'].value_counts()
print(rarities_count)
MID_df['subtype_category'] = 'Other'  # Initialize a new column with 'Other' as default

# Update the category based on specific subtypes
MID_df['subtype_category'] = 'Other'
MID_df.loc[MID_df['subtypes'].str.contains('Angel', case=False), 'subtype_category'] = 'Angel'
MID_df.loc[MID_df['subtypes'].str.contains('Zombie', case=False), 'subtype_category'] = 'Zombie'
MID_df.loc[MID_df['subtypes'].str.contains('Human', case=False), 'subtype_category'] = 'Human'
subtype_counts = MID_df['subtype_category'].value_counts()
print(subtype_counts)

rare        154
common      141
uncommon    131
mythic       54
Name: rarity, dtype: int64
Other     322
Human     129
Zombie     19
Angel      10
Name: subtype_category, dtype: int64


In [13]:
MID_df = MID_df.assign(Common=141, Rare=154, Uncommon=131, Mythical=54)
MID_df = MID_df.assign(Angle = 10, Other = 322, Human = 129, Zombies = 19)
MID_df.head()

Unnamed: 0,setCode,rarity,subtypes,subtype_category,Common,Rare,Uncommon,Mythical,Angle,Other,Human,Zombies
0,MID,rare,"Human, Knight",Human,141,154,131,54,10,322,129,19
1,MID,uncommon,"Human, Peasant",Human,141,154,131,54,10,322,129,19
2,MID,uncommon,"Human, Knight",Human,141,154,131,54,10,322,129,19
3,MID,uncommon,"Human, Peasant",Human,141,154,131,54,10,322,129,19
4,MID,uncommon,Spirit,Other,141,154,131,54,10,322,129,19


In [14]:
NEO_csv = Path('resources/NEO.csv')
NEO_df = pd.read_csv(NEO_csv)
NEO_df = NEO_df.drop(['manaCost', 'colorIdentity', 'types', 'power', 'toughness'], axis=1)
NEO_df.head()
rarities_count = NEO_df['rarity'].value_counts()
print(rarities_count)
NEO_df['subtype_category'] = 'Other'  # Initialize a new column with 'Other' as default

# Update the category based on specific subtypes
NEO_df['subtype_category'] = 'Other'
NEO_df.loc[NEO_df['subtypes'].str.contains('Angel', case=False), 'subtype_category'] = 'Angel'
NEO_df.loc[NEO_df['subtypes'].str.contains('Zombie', case=False), 'subtype_category'] = 'Zombie'
NEO_df.loc[NEO_df['subtypes'].str.contains('Human', case=False), 'subtype_category'] = 'Human'
subtype_counts = NEO_df['subtype_category'].value_counts()
print(subtype_counts)

rare        207
common      167
uncommon    124
mythic       76
Name: rarity, dtype: int64
Other    480
Human     94
Name: subtype_category, dtype: int64


In [15]:
NEO_df = NEO_df.assign(Common=167, Rare=207, Uncommon=124, Mythical=76)
NEO = NEO_df.assign(Angle = 0, Other = 480, Human = 94, Zombies = 0)
NEO.head()

Unnamed: 0,setCode,rarity,subtypes,subtype_category,Common,Rare,Uncommon,Mythical,Angle,Other,Human,Zombies
0,NEO,common,Equipment,Other,167,207,124,76,0,480,94,0
1,NEO,common,Equipment,Other,167,207,124,76,0,480,94,0
2,NEO,mythic,"Dragon, Spirit",Other,167,207,124,76,0,480,94,0
3,NEO,uncommon,0,Other,167,207,124,76,0,480,94,0
4,NEO,common,Saga,Other,167,207,124,76,0,480,94,0


In [16]:
PCY_csv = Path('resources/PCY.csv')
PCY_df = pd.read_csv(PCY_csv)
PCY_df = PCY_df.drop(['manaCost', 'colorIdentity', 'types', 'power', 'toughness'], axis=1)
PCY_df.head()
rarities_count = PCY_df['rarity'].value_counts()
print(rarities_count)
PCY_df['subtype_category'] = 'Other'  # Initialize a new column with 'Other' as default

# Update the category based on specific subtypes
PCY_df['subtype_category'] = 'Other'
PCY_df.loc[PCY_df['subtypes'].str.contains('Angel', case=False), 'subtype_category'] = 'Angel'
PCY_df.loc[PCY_df['subtypes'].str.contains('Zombie', case=False), 'subtype_category'] = 'Zombie'
PCY_df.loc[PCY_df['subtypes'].str.contains('Human', case=False), 'subtype_category'] = 'Human'
subtype_counts = PCY_df['subtype_category'].value_counts()
print(subtype_counts)

common      55
rare        45
uncommon    44
Name: rarity, dtype: int64
Other     117
Human      23
Zombie      3
Angel       1
Name: subtype_category, dtype: int64


In [17]:
PCY_df = PCY_df.assign(Common=55, Rare=45, Uncommon=44, Mythical=0)
PCY_df = PCY_df.assign(Angle = 1, Other = 117, Human = 23, Zombies = 3)
PCY_df.head()

Unnamed: 0,setCode,rarity,subtypes,subtype_category,Common,Rare,Uncommon,Mythical,Angle,Other,Human,Zombies
0,PCY,uncommon,0,Other,55,45,44,0,1,117,23,3
1,PCY,common,0,Other,55,45,44,0,1,117,23,3
2,PCY,rare,Avatar,Other,55,45,44,0,1,117,23,3
3,PCY,rare,0,Other,55,45,44,0,1,117,23,3
4,PCY,rare,0,Other,55,45,44,0,1,117,23,3


In [18]:
ROE_csv = Path('resources/ROE.csv')
ROE_df = pd.read_csv(ROE_csv)
ROE_df = ROE_df.drop(['manaCost', 'colorIdentity', 'types', 'power', 'toughness'], axis=1)
ROE_df.head()
rarities_count = ROE_df['rarity'].value_counts()
print(rarities_count)
ROE_df['subtype_category'] = 'Other'  # Initialize a new column with 'Other' as default

# Update the category based on specific subtypes
ROE_df['subtype_category'] = 'Other'
ROE_df.loc[ROE_df['subtypes'].str.contains('Angel', case=False), 'subtype_category'] = 'Angel'
ROE_df.loc[ROE_df['subtypes'].str.contains('Zombie', case=False), 'subtype_category'] = 'Zombie'
ROE_df.loc[ROE_df['subtypes'].str.contains('Human', case=False), 'subtype_category'] = 'Human'
subtype_counts = ROE_df['subtype_category'].value_counts()
print(subtype_counts)

common      120
uncommon     60
rare         53
mythic       15
Name: rarity, dtype: int64
Other     222
Human      22
Angel       2
Zombie      2
Name: subtype_category, dtype: int64


In [19]:
ROE_df = ROE_df.assign(Common=120, Rare=53, Uncommon=60, Mythical=15)
ROE_df = ROE_df.assign(Angle = 2, Other = 222, Human = 22, Zombies = 2)
ROE_df.head()

Unnamed: 0,setCode,rarity,subtypes,subtype_category,Common,Rare,Uncommon,Mythical,Angle,Other,Human,Zombies
0,ROE,mythic,Eldrazi,Other,120,53,60,15,2,222,22,2
1,ROE,uncommon,Eldrazi,Other,120,53,60,15,2,222,22,2
2,ROE,rare,"Eldrazi, Aura",Other,120,53,60,15,2,222,22,2
3,ROE,mythic,Eldrazi,Other,120,53,60,15,2,222,22,2
4,ROE,common,Eldrazi,Other,120,53,60,15,2,222,22,2


In [20]:
VOW_csv = Path('resources/VOW.csv')
VOW_df = pd.read_csv(VOW_csv)
VOW_df = VOW_df.drop(['manaCost', 'colorIdentity', 'types', 'power', 'toughness'], axis=1)
VOW_df.head()
rarities_count = VOW_df['rarity'].value_counts()
print(rarities_count)
VOW_df['subtype_category'] = 'Other'  # Initialize a new column with 'Other' as default

# Update the category based on specific subtypes
VOW_df['subtype_category'] = 'Other'
VOW_df.loc[VOW_df['subtypes'].str.contains('Angel', case=False), 'subtype_category'] = 'Angel'
VOW_df.loc[VOW_df['subtypes'].str.contains('Zombie', case=False), 'subtype_category'] = 'Zombie'
VOW_df.loc[VOW_df['subtypes'].str.contains('Human', case=False), 'subtype_category'] = 'Human'
subtype_counts = VOW_df['subtype_category'].value_counts()
print(subtype_counts)

rare        168
common      142
uncommon    137
mythic       58
Name: rarity, dtype: int64
Other     389
Human      84
Zombie     31
Angel       1
Name: subtype_category, dtype: int64


In [21]:
VOW_df = VOW_df.assign(Common=142, Rare=168, Uncommon=137, Mythical=58)
VOW_df = VOW_df.assign(Angle = 1, Other = 389, Human = 84, Zombies = 31)
VOW_df.head()

Unnamed: 0,setCode,rarity,subtypes,subtype_category,Common,Rare,Uncommon,Mythical,Angle,Other,Human,Zombies
0,VOW,common,0,Other,142,168,137,58,1,389,84,31
1,VOW,uncommon,"Angel, Soldier",Angel,142,168,137,58,1,389,84,31
2,VOW,uncommon,0,Other,142,168,137,58,1,389,84,31
3,VOW,uncommon,Equipment,Other,142,168,137,58,1,389,84,31
4,VOW,rare,0,Other,142,168,137,58,1,389,84,31


In [22]:
WAR_csv = Path('resources/WAR.csv')
WAR_df = pd.read_csv(WAR_csv)
WAR_df = WAR_df.drop(['manaCost', 'colorIdentity', 'types', 'power', 'toughness'], axis=1)
WAR_df.head()
rarities_count = WAR_df['rarity'].value_counts()
print(rarities_count)
WAR_df['subtype_category'] = 'Other'  # Initialize a new column with 'Other' as default

# Update the category based on specific subtypes
WAR_df['subtype_category'] = 'Other'
WAR_df.loc[WAR_df['subtypes'].str.contains('Angel', case=False), 'subtype_category'] = 'Angel'
WAR_df.loc[WAR_df['subtypes'].str.contains('Zombie', case=False), 'subtype_category'] = 'Zombie'
WAR_df.loc[WAR_df['subtypes'].str.contains('Human', case=False), 'subtype_category'] = 'Human'
subtype_counts = WAR_df['subtype_category'].value_counts()
print(subtype_counts)

common      120
uncommon    102
rare         69
mythic       21
Name: rarity, dtype: int64
Other     268
Zombie     25
Human      17
Angel       2
Name: subtype_category, dtype: int64


In [23]:
WAR_df = WAR_df.assign(Common=120, Rare=69, Uncommon=102, Mythical=21)
WAR_df = WAR_df.assign(Angle = 2, Other = 268, Human = 17, Zombies = 25)
WAR_df.head()

Unnamed: 0,setCode,rarity,subtypes,subtype_category,Common,Rare,Uncommon,Mythical,Angle,Other,Human,Zombies
0,WAR,rare,Karn,Other,120,69,102,21,2,268,17,25
1,WAR,rare,Karn,Other,120,69,102,21,2,268,17,25
2,WAR,rare,Ugin,Other,120,69,102,21,2,268,17,25
3,WAR,rare,Ugin,Other,120,69,102,21,2,268,17,25
4,WAR,uncommon,"Spirit, Monk",Other,120,69,102,21,2,268,17,25


In [24]:
XLN_csv = Path('resources/XLN.csv')
XLN_df = pd.read_csv(XLN_csv)
XLN_df = XLN_df.drop(['manaCost', 'colorIdentity', 'types', 'power', 'toughness'], axis=1)
XLN_df.head()
rarities_count = XLN_df['rarity'].value_counts()
print(rarities_count)
XLN_df['subtype_category'] = 'Other'  # Initialize a new column with 'Other' as default

# Update the category based on specific subtypes
XLN_df['subtype_category'] = 'Other'
XLN_df.loc[XLN_df['subtypes'].str.contains('Angel', case=False), 'subtype_category'] = 'Angel'
XLN_df.loc[XLN_df['subtypes'].str.contains('Zombie', case=False), 'subtype_category'] = 'Zombie'
XLN_df.loc[XLN_df['subtypes'].str.contains('Human', case=False), 'subtype_category'] = 'Human'
subtype_counts = XLN_df['subtype_category'].value_counts()
print(subtype_counts)

common      125
uncommon     82
rare         75
mythic       17
Name: rarity, dtype: int64
Other    256
Human     43
Name: subtype_category, dtype: int64


In [25]:
XLN_df = XLN_df.assign(Common=125, Rare=75, Uncommon=82, Mythical=17)
XLN_df = XLN_df.assign(Angle = 0, Other = 256, Human = 43, Zombies = 0)
XLN_df.head()

Unnamed: 0,setCode,rarity,subtypes,subtype_category,Common,Rare,Uncommon,Mythical,Angle,Other,Human,Zombies
0,XLN,uncommon,"Vampire, Soldier",Other,125,75,82,17,0,256,43,0
1,XLN,rare,0,Other,125,75,82,17,0,256,43,0
2,XLN,mythic,0,Other,125,75,82,17,0,256,43,0
3,XLN,uncommon,Dinosaur,Other,125,75,82,17,0,256,43,0
4,XLN,rare,"Vampire, Cleric",Other,125,75,82,17,0,256,43,0


In [29]:
# Concatenate DataFrames horizontally 
combined_df = pd.concat([ARN_df, CSP_df, KHM_df, MID_df, NEO_df, PCY_df, ROE_df,
                        VOW_df, WAR_df, XLN_df])

# Display the combined DataFrame
combined_df.drop("subtypes", axis=1, inplace=True)
combined_df

Unnamed: 0,setCode,rarity,subtype_category,Common,Rare,Uncommon,Mythical,Angle,Other,Human,Zombies
0,ARN,uncommon,Human,41,33,18,0,0.0,72.0,19.0,1.0
1,ARN,common,Other,41,33,18,0,0.0,72.0,19.0,1.0
2,ARN,common,Other,41,33,18,0,0.0,72.0,19.0,1.0
3,ARN,common,Other,41,33,18,0,0.0,72.0,19.0,1.0
4,ARN,uncommon,Other,41,33,18,0,0.0,72.0,19.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...
294,XLN,mythic,Other,125,75,82,17,0.0,256.0,43.0,0.0
295,XLN,common,Other,125,75,82,17,0.0,256.0,43.0,0.0
296,XLN,uncommon,Other,125,75,82,17,0.0,256.0,43.0,0.0
297,XLN,rare,Other,125,75,82,17,0.0,256.0,43.0,0.0


In [30]:
combined_df.to_csv('combined_data.csv', index=False)