In [1]:
api_key = "hh8DW29tdDmshbghN71NJPI0jgFzp1Ay5MzjsniIz9a8Piq7xp"

### Making the API request for all cards

In [2]:
import requests
headers = {'X-Mashape-Key': api_key}

In [3]:
# cardname = "Leeroy"

In [4]:
# endpoint_one_card = f"https://omgvamp-hearthstone-v1.p.mashape.com/cards/search/{cardname}"
# endpoint_allcards = "https://omgvamp-hearthstone-v1.p.mashape.com/cards"

def get_cards_by_type(card_type, collectible=1, cost=None, durability=None, health=None, key=api_key):
    endpoint_by_type = f'https://omgvamp-hearthstone-v1.p.mashape.com/cards/types/{card_type}'
    payload = {'collectible': collectible, 'cost': cost, 'durability': durability, 'health': health}
    r = requests.get(endpoint_by_type, params=payload, headers=headers)
    return r.json()

In [7]:
# r = requests.get(endpoint_allcards, headers=headers)

In [8]:
# r.json()

In [5]:
import json
# with open('hs_data.json', 'w') as outfile:
#     json.dump(r.json(), outfile)

### Using the offline JSON

In [6]:
import pandas as pd

In [33]:
# Reading the json as a dict
with open('hs_data.json') as json_data:
    data = json.load(json_data)

In [37]:
everything = [single_card for cardset in data.values() for single_card in cardset]

In [39]:
collectibles = [single_card for single_card in everything 
                if 'collectible' in single_card 
                and single_card['collectible']]

In [41]:
non_heroes = [single_card for single_card in collectibles
              if single_card['type'] != 'Hero']

In [53]:
all_types = set([card['type'] for card in non_heroes])
all_types

{'Minion', 'Spell', 'Weapon'}

### Otherwise, first separate the cards by type. We can make API calls for that.

In [11]:
collectible_spells = get_cards_by_type('Spell')
collectible_weapons = get_cards_by_type('Weapon')
collectible_minions = get_cards_by_type('Minion')

In [12]:
minions = collectible_minions
weapons = collectible_weapons
spells = collectible_spells

In [13]:
# for cardtype, collection in zip(['minions', 'spells', 'weapons'], [collectible_minions, collectible_spells, collectible_weapons]):
#     with open(f'{cardtype}.json', 'w') as outfile:
#         json.dump(collection, outfile)

In [7]:
# Reading the json as a dict
with open('data/minions.json') as json_data:
    minions = json.load(json_data)
    
with open('data/spells.json') as json_data:
    spells = json.load(json_data)
    
with open('data/weapons.json') as json_data:
    weapons = json.load(json_data)

### Let's now separate the titles, flavors, texts, mechanics, costs and stats.
#### Each card type has different attributes and design logic behind them, so we want to make educated splits. Moreover, not all fields are of interest for us for now. 

In [8]:
spell_attributes = {"name", "cardId", "cost", "img", "playerClass", "rarity", "text", "flavor", "mechanics"}
# spell_optional_attributes = {}

minion_attributes = {"name", "cardId", "cost", "health", "attack", "img", "playerClass", "rarity", "text", "flavor", "mechanics"}
# minion_optional_attributes = {}

weapon_attributes = {"name", "cardId", "cost", "durability", "attack", "img", "playerClass", "rarity", "text", "flavor", "mechanics"}
# weapon_attributes = {}

### For this first generation example we shall funnel all the cards to the above attributes to normalize the data.

In [9]:
def normalize_card(card, attrs):
    
    concise = {a: card[a] if a in card else None for a in attrs}
    
    return concise

In [10]:
spells_concise = [normalize_card(spell_card, spell_attributes) for spell_card in spells]
minions_concise = [normalize_card(minion_card, minion_attributes) for minion_card in minions]
weapons_concise = [normalize_card(weapon_card, weapon_attributes) for weapon_card in weapons]

In [16]:
weapons_concise[:2]

[{'attack': 1,
  'cardId': 'LOOT_222',
  'cost': 1,
  'durability': 3,
  'flavor': 'Once called Cahn’delar, Shortbow of the Ancient Whisker.',
  'img': 'http://media.services.zam.com/v1/media/byName/hs/cards/enus/LOOT_222.png',
  'mechanics': None,
  'name': 'Candleshot',
  'playerClass': 'Hunter',
  'rarity': 'Common',
  'text': 'Your hero is <b>Immune</b> while attacking.'},
 {'attack': 2,
  'cardId': 'LOE_118',
  'cost': 1,
  'durability': 3,
  'flavor': 'The Curse is that you have to listen to "MMMBop" on repeat.',
  'img': 'http://media.services.zam.com/v1/media/byName/hs/cards/enus/LOE_118.png',
  'mechanics': None,
  'name': 'Cursed Blade',
  'playerClass': 'Warrior',
  'rarity': 'Rare',
  'text': 'Double all damage dealt to your hero.'}]

In [12]:
# titles = [(card['name'], card['cardId']) for card in non_heroes]
# flavors = [(card['flavor'], card['cardId']) for card in non_heroes]
# texts = [(card['text'], card['cardId']) for card in non_heroes]
# mechanics = [(card['mechanics'], card['cardId']) if 'mechanics' in card else None for card in non_heroes]
# costs = [(card['cost'], card['cardId']) for card in non_heroes]
# stats = [(card['name'], card['cardId']) for card in non_heroes]

### The data is now quite ready to be modelled! We can now access the needed fields directly from the correpsonding cardtype set.

## A small LSTM

In [13]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import TimeDistributed
from keras.layers import Activation
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


### Let's get all the card texts.

In [12]:
def get_all_flavortexts():
    
    weapon_flavors = [card['flavor'] for card in weapons]
    minion_flavors = [card['flavor'] for card in minions]
    spell_flavors = [card['flavor'] for card in spells if 'flavor' in card]
    
    return weapon_flavors + minion_flavors + spell_flavors

In [19]:
def get_all_cardtexts():
    
    weapon_texts = [card['text'] for card in weapons if 'text' in card]
    minion_texts = [card['text'] for card in minions if 'text' in card]
    spell_texts = [card['text'] for card in spells if 'flavor' in card]
    
    return weapon_texts + minion_texts + spell_texts

In [20]:
# all_flavors = get_all_flavortexts()
all_texts = get_all_cardtexts()

### Our average sequence length is 67.8 characters. This is handy to know for the LSTM sequence length parameter. Let's round it down for sake of memorability.

In [24]:
average_sequence_length = np.mean([len(list(text)) for text in all_texts])
average_sequence_length

56.821935483870966

In [25]:
SEQUENCE_LENGTH = 57

### Our generative model is a character based one, so our input data is a huge list of characters.

In [26]:
all_text_chars = [ch for one_sentence in all_texts for ch in list(one_sentence)]

In [31]:
all_text_unique_chars = set(all_text_chars)

### We see that there are a lot of unnecessary or unwished characters in the model. We could clean up the model by moderating the char list but for now we let it like this.

In [32]:
print(f'Data length: {len(all_text_chars)} characters')
print(f'Vocabulary size: {len(all_text_unique_chars)} characters')

Data length: 88074 characters
Vocabulary size: 83 characters


### The unique characters are the features for our model. Let's numerify them to make them ML ready.

In [33]:
ix_to_char = {ix:char for ix, char in enumerate(all_text_unique_chars)}
char_to_ix = {char:ix for ix, char in enumerate(all_text_unique_chars)}

In [34]:
ix_to_char

{0: '/',
 1: 'n',
 2: 'e',
 3: 'R',
 4: ')',
 5: 'o',
 6: 'c',
 7: ';',
 8: '$',
 9: 'I',
 10: 'J',
 11: 'l',
 12: 'a',
 13: 'v',
 14: ':',
 15: 'G',
 16: '_',
 17: 'S',
 18: 'P',
 19: 'b',
 20: '<',
 21: 'B',
 22: '(',
 23: 'r',
 24: 'g',
 25: 'A',
 26: '3',
 27: 'K',
 28: 'H',
 29: 'Y',
 30: 'L',
 31: '+',
 32: 'q',
 33: 'f',
 34: 'W',
 35: 'h',
 36: 'y',
 37: 'd',
 38: 'U',
 39: 'F',
 40: 'O',
 41: ',',
 42: 'C',
 43: '7',
 44: 'x',
 45: '8',
 46: 'i',
 47: 'u',
 48: '!',
 49: 'T',
 50: 'w',
 51: '9',
 52: '#',
 53: 'N',
 54: 'D',
 55: 'm',
 56: ']',
 57: '[',
 58: '5',
 59: '0',
 60: '.',
 61: '\\',
 62: 'j',
 63: '"',
 64: ' ',
 65: 's',
 66: 't',
 67: 'z',
 68: 'Q',
 69: '%',
 70: 'M',
 71: '6',
 72: "'",
 73: 'E',
 74: 'V',
 75: '>',
 76: '-',
 77: '1',
 78: 'k',
 79: '4',
 80: '’',
 81: '2',
 82: 'p'}

### LSTM expects input of the shape (batch_size, length_of_sequence, number_features)
#### batch_size: amount of sequences which are fed into the network at one time, just as in a regular feedforward neural network
#### length_of_sequence: the amount of "neural networks", the memory or the amount of steps the network looks at at each step. In our example, we want to predict a character given 65 previous characters.
#### number_features: the length of one featurized element. In the case of images it could be padded standardized vectors of pixels. In case of text it is the length of our vocab, because our input is going to be represented by every char in our vocabulary.


In [36]:
NUMBER_FEATURES = len(all_text_unique_chars)

In [37]:
X = np.zeros((int(len(all_text_chars)/SEQUENCE_LENGTH), SEQUENCE_LENGTH, NUMBER_FEATURES))
y = np.zeros((int(len(all_text_chars)/SEQUENCE_LENGTH), SEQUENCE_LENGTH, NUMBER_FEATURES))

In [38]:
len(all_text_chars)

88074

In [40]:
for i in range(0, int(len(all_text_chars)/SEQUENCE_LENGTH)):
    X_sequence = all_text_chars[i*SEQUENCE_LENGTH:(i+1)*SEQUENCE_LENGTH]  #Get next sequence of length 57 as input.
    X_sequence_ix = [char_to_ix[value] for value in X_sequence]  # Convert the above sequence to the integer mapping.
    # TODO: make this one hot encoding differently: Keras or sklearn or something.
    input_sequence = np.zeros((SEQUENCE_LENGTH, NUMBER_FEATURES))  # Create a skeleton for the input sequence: we create a 2d numpy matrix which has a feature array of 94 
                                                                   # long for each of the 57 characters in sequence. This way we basically one hot encode our sequences. 
    for j in range(SEQUENCE_LENGTH):  # The one hot encoding process: we replace a zero with a one on a position in the input sequence which corresponds with the index of a character in our converted array!
        input_sequence[j][X_sequence_ix[j]] = 1.
    X[i] = input_sequence
    
    #Same for y!
    y_sequence = all_text_chars[i*SEQUENCE_LENGTH+1:(i+1)*SEQUENCE_LENGTH+1]
    y_sequence_ix = [char_to_ix[value] for value in y_sequence]
    target_sequence = np.zeros((SEQUENCE_LENGTH, NUMBER_FEATURES))
    for j in range(SEQUENCE_LENGTH):
        target_sequence[j][y_sequence_ix[j]] = 1.
    y[i] = target_sequence

In [91]:

# # prepare the dataset of input to output pairs encoded as integers
# seq_length = 100
# dataX = []
# dataY = []
# n_chars = len(all_flavor_chars)
# for i in range(0, n_chars - SEQUENCE_LENGTH, 1):
#     seq_in = all_flavor_chars[i:i + SEQUENCE_LENGTH]
#     seq_out = all_flavor_chars[i + SEQUENCE_LENGTH]
#     dataX.append([char_to_ix[char] for char in seq_in])
#     dataY.append(char_to_ix[seq_out])
# n_patterns = len(dataX)
# print("Total Patterns: ", n_patterns)

In [92]:
# X = np.reshape(dataX, (n_patterns, SEQUENCE_LENGTH, 1))

In [93]:
# # normalize
# X = X / float(len(all_flavor_unique_chars))
# # one hot encode the output variable
# y = np_utils.to_categorical(dataY)

In [41]:
HIDDEN_DIM = 500
LAYER_NUM = 3

In [42]:
model = Sequential()
model.add(LSTM(HIDDEN_DIM, input_shape=(None, NUMBER_FEATURES), return_sequences=True))
for i in range(LAYER_NUM - 1):
    model.add(LSTM(HIDDEN_DIM, return_sequences=True))
    model.add(Dropout(0.25))
model.add(TimeDistributed(Dense(NUMBER_FEATURES)))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy", optimizer="rmsprop")

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [45]:
def generate_text(model, length):
    
    hele_tekst = []
    
    ix = [np.random.randint(NUMBER_FEATURES)]
    y_char = [ix_to_char[ix[-1]]]
    X = np.zeros((1, length, NUMBER_FEATURES))
    for i in range(length):
        X[0, i, :][ix[-1]] = 1
        print(ix_to_char[ix[-1]], end="")
        ix = np.argmax(model.predict(X[:, :i+1, :])[0], 1)
        hele_tekst.append(ix_to_char[ix[-1]])
    return hele_tekst

In [None]:
nb_epoch = 0
BATCH_SIZE = 50
GENERATE_LENGTH = 70
while True:
    print('\n')
    model.fit(X, y, batch_size=BATCH_SIZE, verbose=1, nb_epoch=1)
    nb_epoch += 1
    generate_text(model, GENERATE_LENGTH)
    if nb_epoch % 10 == 0:
        model.save_weights('cardtext_cps/checkpoint_{}_epoch_{}.hdf5'.format(HIDDEN_DIM, nb_epoch))

In [55]:
print(2)
model.load_weights("checkpoint_500_epoch_1140.hdf5")

2


In [56]:
for i in range(10):
    generate_text(model, 100)
    print('\n----\n')

)  It's pretty insensitive.Also does weddings.The Grand Tournament is priorserved in the Galubas war
----

”She's practically GIVING your secrets away!They can see the future he gets to harr in the rooms.I d
----

\nNot of every senting scorping.The Gring Torslager Singe Ragnaros Relez.I love in his brawher.She's
----

:  It takes a long time to get blood stains out of leather armor.Is this one is ar the board."Voil! 
----

™ a feed on fears of a truetal misunderstood are supposed to go to the Gadsetally in your deck?Alway
----

Zold prefer song drop. I think I love it, "Meely'le rook." - Hammer of Warrior, Chapter 9There's som
----

g people get better.  That's why she hurts them in the first place.Ragnaros looked down. Hur her goo
----

:  It takes a long time to get blood stains out of leather armor.Is this one is ar the board."Voil! 
----

%  and the put to storicaly and soul things on fire.  You might want to be first place and sharefal 
----

6 anyways.It's a literal lava lamp!Do

In [61]:
def remake_text(generated_text):
    
    joined = ''.join(generated_text)
    
    return joined

In [65]:
generated_flavors = [remake_text(generate_text(model, 65)) for i in range(200)]

In [1]:
import pickle

In [2]:
with open('generated_flavors', 'rb') as fp:
    genned_flavors = pickle.load(fp)

In [3]:
genned_flavors

['blitz, staring at the spider-transportation-machineYeah, I think ',
 'the Gnomish World Enlarger, gnomes are wary of size-changing inve',
 "ther doesn't go on a ditch? What is a defendien fish, both explos",
 ' in it.  He is definitely going to get his hearing checked.Also i',
 ' smiting now and again.This card makes something really damp.  Oh',
 ' a buncha totems together.Still angry that the Gadgetzan Rager Cl',
 "  It's pretty insensitive.Also does weddings.The Grand Tournament",
 " of kodos or windserpents, but they'll eat pretty much anything. ",
 "ow with 100% more blast!You'd think you'd be able to control your",
 "  at's a spirite being when they make to his practers for the tim",
 'eryone from Doomsayer to Lorewalker Cho seems to ride one.Mannoro',
 "e less doom.If you won't come to the tar pits, we'll bring them t",
 'add on your good each offence.  There are even rumon things!"If y',
 ". You know what I mean? It's ok if you don't.Let's be honest. One",
 'ULL OF LAVA.I pers