In [55]:
api_key = "hh8DW29tdDmshbghN71NJPI0jgFzp1Ay5MzjsniIz9a8Piq7xp"

### Making the API request for all cards

In [56]:
import requests
headers = {'X-Mashape-Key': api_key}

In [57]:
# cardname = "Leeroy"

In [58]:
# endpoint_one_card = f"https://omgvamp-hearthstone-v1.p.mashape.com/cards/search/{cardname}"
# endpoint_allcards = "https://omgvamp-hearthstone-v1.p.mashape.com/cards"

def get_cards_by_type(card_type, collectible=1, cost=None, durability=None, health=None, key=api_key):
    endpoint_by_type = f'https://omgvamp-hearthstone-v1.p.mashape.com/cards/types/{card_type}'
    payload = {'collectible': collectible, 'cost': cost, 'durability': durability, 'health': health}
    r = requests.get(endpoint_by_type, params=payload, headers=headers)
    return r.json()

In [59]:
# r = requests.get(endpoint_allcards, headers=headers)

In [60]:
# r.json()

In [61]:
import json
# with open('hs_data.json', 'w') as outfile:
#     json.dump(r.json(), outfile)

### Using the offline JSON

In [8]:
import pandas as pd

In [33]:
# Reading the json as a dict
with open('hs_data.json') as json_data:
    data = json.load(json_data)

In [37]:
everything = [single_card for cardset in data.values() for single_card in cardset]

In [39]:
collectibles = [single_card for single_card in everything 
                if 'collectible' in single_card 
                and single_card['collectible']]

In [41]:
non_heroes = [single_card for single_card in collectibles
              if single_card['type'] != 'Hero']

In [53]:
all_types = set([card['type'] for card in non_heroes])
all_types

{'Minion', 'Spell', 'Weapon'}

### Otherwise, first separate the cards by type. We can make API calls for that.

In [62]:
collectible_spells = get_cards_by_type('Spell')
collectible_weapons = get_cards_by_type('Weapon')
collectible_minions = get_cards_by_type('Minion')

In [63]:
minions = collectible_minions
weapons = collectible_weapons
spells = collectible_spells

In [64]:
# for cardtype, collection in zip(['minions', 'spells', 'weapons'], [collectible_minions, collectible_spells, collectible_weapons]):
#     with open(f'{cardtype}.json', 'w') as outfile:
#         json.dump(collection, outfile)

In [65]:
# Reading the json as a dict
with open('data/minions.json') as json_data:
    minions = json.load(json_data)
    
with open('data/spells.json') as json_data:
    spells = json.load(json_data)
    
with open('data/weapons.json') as json_data:
    weapons = json.load(json_data)

### Let's now separate the titles, flavors, texts, mechanics, costs and stats.
#### Each card type has different attributes and design logic behind them, so we want to make educated splits. Moreover, not all fields are of interest for us for now. 

In [66]:
spell_attributes = {"name", "cardId", "cost", "img", "playerClass", "rarity", "text", "flavor", "mechanics"}
# spell_optional_attributes = {}

minion_attributes = {"name", "cardId", "cost", "health", "attack", "img", "playerClass", "rarity", "text", "flavor", "mechanics"}
# minion_optional_attributes = {}

weapon_attributes = {"name", "cardId", "cost", "durability", "attack", "img", "playerClass", "rarity", "text", "flavor", "mechanics"}
# weapon_attributes = {}

### For this first generation example we shall funnel all the cards to the above attributes to normalize the data.

In [67]:
def normalize_card(card, attrs):
    
    concise = {a: card[a] if a in card else None for a in attrs}
    
    return concise

In [68]:
spells_concise = [normalize_card(spell_card, spell_attributes) for spell_card in spells]
minions_concise = [normalize_card(minion_card, minion_attributes) for minion_card in minions]
weapons_concise = [normalize_card(weapon_card, weapon_attributes) for weapon_card in weapons]

In [69]:
spells_concise[:2]

[{'cardId': 'CS2_041',
  'cost': 0,
  'flavor': 'I personally prefer some non-ancestral right-the-heck-now healing, but maybe that is just me.',
  'img': 'http://media.services.zam.com/v1/media/byName/hs/cards/enus/CS2_041.png',
  'mechanics': [{'name': 'Taunt'}],
  'name': 'Ancestral Healing',
  'playerClass': 'Shaman',
  'rarity': 'Free',
  'text': 'Restore a minion\\nto full Health and\\ngive it <b>Taunt</b>.'},
 {'cardId': 'CS2_072',
  'cost': 0,
  'flavor': 'It\'s funny how often yelling "Look over there!" gets your opponent to turn around.',
  'img': 'http://media.services.zam.com/v1/media/byName/hs/cards/enus/CS2_072.png',
  'mechanics': None,
  'name': 'Backstab',
  'playerClass': 'Rogue',
  'rarity': 'Free',
  'text': 'Deal $2 damage to an undamaged minion.'}]

In [70]:
# titles = [(card['name'], card['cardId']) for card in non_heroes]
# flavors = [(card['flavor'], card['cardId']) for card in non_heroes]
# texts = [(card['text'], card['cardId']) for card in non_heroes]
# mechanics = [(card['mechanics'], card['cardId']) if 'mechanics' in card else None for card in non_heroes]
# costs = [(card['cost'], card['cardId']) for card in non_heroes]
# stats = [(card['name'], card['cardId']) for card in non_heroes]

### The data is now quite ready to be modelled! We can now access the needed fields directly from the correpsonding cardtype set.

## A small LSTM

In [71]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import TimeDistributed
from keras.layers import Activation
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

### We will first try to generate card flavors, as those resemble normal text the most. For the flavors we can combine all the flavors from all types of cards, as they are mostly just humorous pieces of text and are not directly associated with the type.

In [72]:
def get_all_flavortexts():
    
    weapon_flavors = [card['flavor'] for card in weapons]
    minion_flavors = [card['flavor'] for card in minions]
    spell_flavors = [card['flavor'] for card in spells if 'flavor' in card]
    
    return weapon_flavors + minion_flavors + spell_flavors

In [73]:
all_flavors = get_all_flavortexts()

In [74]:
all_flavors[73]

'Clockwork gnomes are always asking what time it is.'

### Our average sequence length is 67.8 characters. This is handy to know for the LSTM sequence length parameter. Let's round it down for sake of memorability.

In [75]:
average_sequence_length = np.mean([len(list(flavor)) for flavor in all_flavors])
average_sequence_length

67.86418193303854

In [76]:
SEQUENCE_LENGTH = 65

### Our generative model is a character based one, so our input data is a huge list of characters.

In [77]:
all_flavor_chars = [ch for one_sentence in all_flavors for ch in list(one_sentence)]

In [78]:
all_flavor_unique_chars = set(all_flavor_chars)

### We see that there are a lot of unnecessary or unwished characters in the model. We could clean up the model by moderating the char list but for now we let it like this.

In [79]:
print(f'Data length: {len(all_flavor_chars)} characters')
print(f'Vocabulary size: {len(all_flavor_unique_chars)} characters')

Data length: 107429 characters
Vocabulary size: 94 characters


### The unique characters are the features for our model. Let's numerify them to make them ML ready.

In [80]:
ix_to_char = {ix:char for ix, char in enumerate(all_flavor_unique_chars)}
char_to_ix = {char:ix for ix, char in enumerate(all_flavor_unique_chars)}

### LSTM expects input of the shape (batch_size, length_of_sequence, number_features)
#### batch_size: amount of sequences which are fed into the network at one time, just as in a regular feedforward neural network
#### length_of_sequence: the amount of "neural networks", the memory or the amount of steps the network looks at at each step. In our example, we want to predict a character given 65 previous characters.
#### number_features: the length of one featurized element. In the case of images it could be padded standardized vectors of pixels. In case of text it is the length of our vocab, because our input is going to be represented by every char in our vocabulary.


In [81]:
NUMBER_FEATURES = len(all_flavor_unique_chars)

In [82]:
X = np.zeros((int(len(all_flavor_chars)/SEQUENCE_LENGTH), SEQUENCE_LENGTH, NUMBER_FEATURES))
y = np.zeros((int(len(all_flavor_chars)/SEQUENCE_LENGTH), SEQUENCE_LENGTH, NUMBER_FEATURES))

In [83]:
len(all_flavor_chars)

107429

In [84]:
for i in range(0, int(len(all_flavor_chars)/SEQUENCE_LENGTH)):
    X_sequence = all_flavor_chars[i*SEQUENCE_LENGTH:(i+1)*SEQUENCE_LENGTH]  #Get next sequence of length 65 as input.
    X_sequence_ix = [char_to_ix[value] for value in X_sequence]  # Convert the above sequence to the integer mapping.
    # TODO: make this one hot encoding differently: Keras or sklearn or something.
    input_sequence = np.zeros((SEQUENCE_LENGTH, NUMBER_FEATURES))  # Create a skeleton for the input sequence: we create a 2d numpy matrix which has a feature array of 94 
                                                                   # long for each of the 65 characters in sequence. This way we basically one hot encode our sequences. 
    for j in range(SEQUENCE_LENGTH):  # The one hot encoding process: we replace a zero with a one on a position in the input sequence which corresponds with the index of a character in our converted array!
        input_sequence[j][X_sequence_ix[j]] = 1.
    X[i] = input_sequence
    
    #Same for y!
    y_sequence = all_flavor_chars[i*SEQUENCE_LENGTH+1:(i+1)*SEQUENCE_LENGTH+1]
    y_sequence_ix = [char_to_ix[value] for value in y_sequence]
    target_sequence = np.zeros((SEQUENCE_LENGTH, NUMBER_FEATURES))
    for j in range(SEQUENCE_LENGTH):
        target_sequence[j][y_sequence_ix[j]] = 1.
    y[i] = target_sequence

In [85]:
twee = [char_to_ix[value] for value in all_flavor_chars[1*SEQUENCE_LENGTH:(1+1)*SEQUENCE_LENGTH]]
drie = np.zeros((SEQUENCE_LENGTH, NUMBER_FEATURES))

In [86]:
input_sequence[0][twee[0]]

0.0

In [87]:

# # prepare the dataset of input to output pairs encoded as integers
# seq_length = 100
# dataX = []
# dataY = []
# n_chars = len(all_flavor_chars)
# for i in range(0, n_chars - SEQUENCE_LENGTH, 1):
#     seq_in = all_flavor_chars[i:i + SEQUENCE_LENGTH]
#     seq_out = all_flavor_chars[i + SEQUENCE_LENGTH]
#     dataX.append([char_to_ix[char] for char in seq_in])
#     dataY.append(char_to_ix[seq_out])
# n_patterns = len(dataX)
# print("Total Patterns: ", n_patterns)

In [88]:
# X = np.reshape(dataX, (n_patterns, SEQUENCE_LENGTH, 1))

In [89]:
# # normalize
# X = X / float(len(all_flavor_unique_chars))
# # one hot encode the output variable
# y = np_utils.to_categorical(dataY)

In [90]:
HIDDEN_DIM = 500
LAYER_NUM = 3

In [91]:
model = Sequential()
model.add(LSTM(HIDDEN_DIM, input_shape=(None, NUMBER_FEATURES), return_sequences=True))
for i in range(LAYER_NUM - 1):
    model.add(LSTM(HIDDEN_DIM, return_sequences=True))
    model.add(Dropout(0.25))
model.add(TimeDistributed(Dense(NUMBER_FEATURES)))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy", optimizer="rmsprop")

In [93]:
def generate_text(model, length):
    
    hele_tekst = []
    
    ix = [np.random.randint(NUMBER_FEATURES)]
    y_char = [ix_to_char[ix[-1]]]
    X = np.zeros((1, length, NUMBER_FEATURES))
    for i in range(length):
        X[0, i, :][ix[-1]] = 1
        print(ix_to_char[ix[-1]], end="")
        ix = np.argmax(model.predict(X[:, :i+1, :])[0], 1)
        hele_tekst.append(ix_to_char[ix[-1]])
    print(hele_tekst)
    return hele_tekst

In [None]:
nb_epoch = 0
BATCH_SIZE = 50
GENERATE_LENGTH = SEQUENCE_LENGTH
while True:
    print('\n')
    model.fit(X, y, batch_size=BATCH_SIZE, verbose=1, nb_epoch=1)
    nb_epoch += 1
    generate_text(model, GENERATE_LENGTH)
    if nb_epoch % 10 == 0:
        model.save_weights('checkpoint_{}_epoch_{}.hdf5'.format(HIDDEN_DIM, nb_epoch))







Epoch 1/1
p                                                                

Epoch 1/1
L                                                                

Epoch 1/1
c                                                                

Epoch 1/1
/                                                                

Epoch 1/1
#                                                                

Epoch 1/1
q                                                                

Epoch 1/1
N                                                                

Epoch 1/1
l ther ther ther ther ther ther ther ther ther ther ther ther the

Epoch 1/1
w te te ne te te te te te te te te te te te te te te te te te te 

Epoch 1/1
G the the the the the the the the the the the the the the the the

Epoch 1/1
Q the toan the to the to the to the to the to the to the to the t

Epoch 1/1
cee the the the the the the the the the the the the the the the t

Epoch 1/1
“the the the the the the the the the the the the the the the the 


In [94]:
print(2)
model.load_weights("checkpoint_500_epoch_1140.hdf5")

2


In [95]:
def remake_text(generated_text):
    
    joined = ''.join(generated_text)
    
    return joined

In [None]:
remake_text(generate_text(model, 65))

i

In [65]:
generated_flavors = [remake_text(generate_text(model, 65)) for i in range(200)]

In [1]:
import pickle

In [2]:
with open('generated_flavors', 'rb') as fp:
    genned_flavors = pickle.load(fp)

In [3]:
genned_flavors

['blitz, staring at the spider-transportation-machineYeah, I think ',
 'the Gnomish World Enlarger, gnomes are wary of size-changing inve',
 "ther doesn't go on a ditch? What is a defendien fish, both explos",
 ' in it.  He is definitely going to get his hearing checked.Also i',
 ' smiting now and again.This card makes something really damp.  Oh',
 ' a buncha totems together.Still angry that the Gadgetzan Rager Cl',
 "  It's pretty insensitive.Also does weddings.The Grand Tournament",
 " of kodos or windserpents, but they'll eat pretty much anything. ",
 "ow with 100% more blast!You'd think you'd be able to control your",
 "  at's a spirite being when they make to his practers for the tim",
 'eryone from Doomsayer to Lorewalker Cho seems to ride one.Mannoro',
 "e less doom.If you won't come to the tar pits, we'll bring them t",
 'add on your good each offence.  There are even rumon things!"If y',
 ". You know what I mean? It's ok if you don't.Let's be honest. One",
 'ULL OF LAVA.I pers