# Introduction

This work is inspired by this paper https://www.elie.net/publication/i-am-a-legend from  Elie and Celine Bursztein and will try to reproduce their findings applying some different ideas.

## Load the JSON data

Let's start loading the game cards data from http://hearthstonejson.com/ and loading it to python using the json library.

In [70]:
import os.path
import json

all_sets_filename = os.path.join('data', 'AllSets.json')

# Uncomment the following lines to update the date file
#import urllib
#urllib.urlretrieve ('http://hearthstonejson.com/json/AllSets.json', all_sets_filename)

with open(all_sets_filename) as fp:
    all_card_sets = json.load(fp, encoding='utf-8')

## Collectible cards

In [73]:
all_cards = sum((v for k, v in all_card_sets.items()
                 if k not in ('Debug', 'Credits', 'Missions', 'System')), list())
# Select only collectible cards
all_collectible_cards = [card for card in all_cards if u'collectible' in card and card['collectible']]
# Remove heroes
all_collectible_cards = [card for card in all_collectible_cards
                         if 'type' in card and card['type'] != 'Hero']
len(all_collectible_cards)

535

## Card tags

In [74]:
tags = set()
for card in all_collectible_cards:
    tags.update(set(card.keys()))
tags

{u'artist',
 u'attack',
 u'collectible',
 u'cost',
 u'durability',
 u'elite',
 u'faction',
 u'flavor',
 u'health',
 u'howToGet',
 u'howToGetGold',
 u'id',
 u'inPlayText',
 u'mechanics',
 u'name',
 u'playerClass',
 u'race',
 u'rarity',
 u'text',
 u'type'}

In [75]:
# Only interested in these tags for pricer purpouses
interest_tags = {u'attack', u'cost', u'durability', u'health', u'id', u'mechanics', u'name',
                 u'playerClass', u'text', u'type'}
all_collectible_cards = [{k: v for k, v in card.items() if k in interest_tags}
                         for card in all_collectible_cards]

## Card mechanics

In [154]:
mechanics = set()
for card in all_collectible_cards:
    if 'mechanics' in card:
        mechanics.update(card['mechanics'])
mechanics

{u'AdjacentBuff',
 u'AffectedBySpellPower',
 u'Aura',
 u'Battlecry',
 u'Charge',
 u'Combo',
 u'Deathrattle',
 u'Divine Shield',
 u'Enrage',
 u'Freeze',
 u'HealTarget',
 u'ImmuneToSpellpower',
 u'Poisonous',
 u'Secret',
 u'Silence',
 u'Spellpower',
 u'Stealth',
 u'Taunt',
 u'Windfury'}

## Card types

In [155]:
types = set()
for card in all_collectible_cards:
    if 'type' in card:
        types.add(card['type'])
types

{u'Minion', u'Spell', u'Weapon'}

## Big data with cards

In [156]:
import pandas
all_collectible_cards_df = pandas.DataFrame(all_collectible_cards)

# Set/modify the pricing attributes
all_collectible_cards_df['intrinsic'] = -1

all_collectible_cards_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 535 entries, 0 to 534
Data columns (total 11 columns):
attack         357 non-null float64
cost           535 non-null int64
durability     18 non-null float64
health         339 non-null float64
id             535 non-null object
mechanics      279 non-null object
name           535 non-null object
playerClass    306 non-null object
text           517 non-null object
type           535 non-null object
intrinsic      535 non-null int64
dtypes: float64(3), int64(2), object(6)
memory usage: 50.2+ KB


# Some statistics

## Breaking the text

In [159]:
import re
html_tag_pat = re.compile(r'</?[^>]+>')

def text_breaker(text):
    # Remove presentation characters
    raw_text = html_tag_pat.sub('', text).replace('\n', ' ').replace('  ', ' ')
    # Remove comments
    uncommented_text = re.sub(r'\([^\)]+\)', '', raw_text)
    # replaced_text = re.sub(r'\.', '_', uncommented_text)
    replaced_text = uncommented_text.replace('.', '_')
    return set(map(lambda x: x.strip(), replaced_text.split('_')))

all_collectible_cards_df['text_mechanics'] = all_collectible_cards_df.apply(
    lambda row: text_breaker(row['text']) if isinstance(row['text'], basestring) else [] , axis=1)

#all_collectible_cards_df[~all_collectible_cards_df['text'].isnull() & all_collectible_cards_df['text'].str.contains('"')]['text'].values

## Short text mechanics

In [161]:
text_mechanics = set()
for card in all_collectible_cards_df['text_mechanics']:
    text_mechanics.update(card)
[x for x in text_mechanics if len(x) < 14]

[u'',
 u'Draw a card',
 u'Draw 4 cards',
 u'Charge',
 u"Can't Attack",
 u'Stealth',
 u'Draw 3 cards',
 u'Gain 5 Armor',
 u'Windfury',
 u'Horribly',
 u'Divine Shield',
 u'Charge Taunt',
 u'Draw 2 cards',
 u'"',
 u'Taunt',
 u'Then, it dies',
 u'Overload:']

## Vanilla test

In [167]:
known_mechanics = set(['Charge', 'Stealth', 'Windfury', 'Taunt', 'Divine Shield'])

vanilla_minions_df = pandas.DataFrame(
    all_collectible_cards_df[(all_collectible_cards_df['type'] == 'Minion')])

vanilla_minions_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 339 entries, 0 to 534
Data columns (total 12 columns):
attack            339 non-null float64
cost              339 non-null int64
durability        0 non-null float64
health            339 non-null float64
id                339 non-null object
mechanics         233 non-null object
name              339 non-null object
playerClass       110 non-null object
text              325 non-null object
type              339 non-null object
intrinsic         339 non-null int64
text_mechanics    339 non-null object
dtypes: float64(3), int64(2), object(7)
memory usage: 34.4+ KB


In [177]:
# Set/modify the pricing attributes based on mechanics
vanilla_minions_df['charge'] = vanilla_minions_df.apply(
    lambda row: row['attack'] if (isinstance(row['mechanics'], list) and 'Charge' in row['mechanics']) else 0, axis=1)
vanilla_minions_df['stealth'] = vanilla_minions_df.apply(
    lambda row: row['attack'] if (isinstance(row['mechanics'], list) and 'Stealth' in row['mechanics']) else 0, axis=1)
vanilla_minions_df['windfury'] = vanilla_minions_df.apply(
    lambda row: row['attack'] if (isinstance(row['mechanics'], list) and 'Windfury' in row['mechanics']) else 0, axis=1)
vanilla_minions_df['taunt'] = vanilla_minions_df.apply(
    lambda row: row['health'] if (isinstance(row['mechanics'], list) and 'Taunt' in row['mechanics']) else 0, axis=1)
vanilla_minions_df['divine_shield'] = vanilla_minions_df.apply(
    lambda row: row['attack'] if (isinstance(row['mechanics'], list) and 'Divine Shield' in row['mechanics']) else 0, axis=1)

In [178]:
vanilla_minions_df[['name', 'cost', 'attack', 'health', 'charge', 'stealth', 'windfury', 'taunt', 'divine_shield']]

Unnamed: 0,name,cost,attack,health,charge,stealth,windfury,taunt,divine_shield
0,Abomination,5,4,4,0,0,0,4,0
1,Abusive Sergeant,1,2,1,0,0,0,0,0
2,Acolyte of Pain,3,1,3,0,0,0,0,0
3,Al'Akir the Windlord,8,3,5,3,0,3,5,3
4,Alarm-o-Bot,3,0,3,0,0,0,0,0
5,Aldor Peacekeeper,3,3,3,0,0,0,0,0
6,Alexstrasza,9,8,8,0,0,0,0,0
7,Amani Berserker,2,2,3,0,0,0,0,0
9,Ancient Brewmaster,4,5,4,0,0,0,0,0
10,Ancient Mage,4,2,5,0,0,0,0,0


In [179]:
import numpy

a = vanilla_minions_df.as_matrix(['attack', 'health', 'intrinsic', 'charge', 'stealth', 'windfury', 'taunt', 'divine_shield'])
b = vanilla_minions_df.as_matrix(['cost'])
cost_per_point = numpy.linalg.lstsq(a, b)[0]
cost_per_point

array([[ 0.51013597],
       [ 0.53345198],
       [-0.09774491],
       [ 0.15715708],
       [-0.0054474 ],
       [ 0.0519493 ],
       [-0.06106898],
       [ 0.30533601]])

In [185]:
card_value = cost_per_point[2][0]
vanilla_minions_df['value'] = numpy.dot(a, cost_per_point).T[0]
vanilla_minions_df['boost'] = (vanilla_minions_df['value'] + card_value) / \
    (vanilla_minions_df['cost'] + card_value)
vanilla_minions_df[['name', 'cost', 'value', 'boost']].sort('boost', ascending=False)

Unnamed: 0,name,cost,value,boost
274,Zombie Chow,1,2.718373,2.904531
90,Flame Imp,1,2.695057,2.878689
11,Ancient Watcher,2,4.805549,2.474854
71,Dust Devil,1,2.317453,2.460178
532,Warbot,1,2.208237,2.339130
359,Northshire Cleric,1,2.208237,2.339130
143,Mana Wyrm,1,2.208237,2.339130
148,Millhouse Manastorm,2,4.272097,2.194423
398,Voidwalker,1,2.025030,2.136075
195,Shieldbearer,1,1.987277,2.094233


## 2-cost minions

In [85]:
two_cost_minions_df = all_collectible_cards_df[(all_collectible_cards_df['type'] == 'Minion') &
                                               (all_collectible_cards_df['cost'] == 2)]
print('2-cost minion mean attack: {:.2f}'.format(two_cost_minions_df.attack.mean()))
print('2-cost minion mean health: {:.2f}'.format(two_cost_minions_df.health.mean()))

2-cost minion mean attack: 1.88
2-cost minion mean health: 2.45
