In [12]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Embedding, LSTM, Dense

PRE_PROCESS

In [13]:
df = pd.read_csv("cards.csv", usecols=['name', 'type', 'desc', 'atk', 'def', 'level', 'race'])
df['atk'] = df['atk'].fillna(0)
df['def'] = df['def'].fillna(0)
df['level'] = df['level'].fillna(0)

DELIM_TOKEN = "DELIM"
df['text'] = df.apply(lambda row: f"{DELIM_TOKEN} {row['name']} {DELIM_TOKEN} {row['type']} {DELIM_TOKEN} ATK {int(row['atk'])} {DELIM_TOKEN} DEF {int(row['def'])} {DELIM_TOKEN} Level {int(row['level'])} {DELIM_TOKEN} {row['race']} {DELIM_TOKEN}", axis=1)

print (len(df))

# using all of them blows up my RAM
sampled_entries = df['text'].sample(n=2000, random_state=1)
print(len(sampled_entries))
print(sampled_entries[:10])

                             name            type  \
0        "A" Cell Breeding Device      Spell Card   
1              "A" Cell Incubator      Spell Card   
2   "A" Cell Recombination Device      Spell Card   
3          "A" Cell Scatter Burst      Spell Card   
4      "Infernoble Arms - Almace"      Spell Card   
5    "Infernoble Arms - Durendal"      Spell Card   
6  "Infernoble Arms - Hauteclere"      Spell Card   
7     "Infernoble Arms - Joyeuse"      Spell Card   
8               1st Movement Solo      Spell Card   
9                  3-Hump Lacooda  Effect Monster   

                                                desc    atk     def  level  \
0  During each of your Standby Phases, put 1 A-Co...    0.0     0.0    0.0   
1  Each time an A-Counter(s) is removed from play...    0.0     0.0    0.0   
2  Target 1 face-up monster on the field; send 1 ...    0.0     0.0    0.0   
3  Select 1 face-up "Alien" monster you control. ...    0.0     0.0    0.0   
4  While this card is equi

In [17]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sampled_entries)
total_words = len(tokenizer.word_index) + 1

print(total_words) # VOCAB SIZE
print(tokenizer.to_json())

|Kanan the Swordmistress|Normal Monster|A lady warrior armed with sword and shield, she floats lightly and stings sharply.|ATK 1400.0|DEF 1400.0|Level 4.0|Warrior


In [None]:
input_sequences = []
for entry in sampled_entries:
  token_list = tokenizer.texts_to_sequences([entry])[0]
  for i in range(1, len(token_list)):
    n_gram_sequence = token_list[:i+1]
    input_sequences.append(n_gram_sequence)

print(len(input_sequences))
print(input_sequences[:10])

max_sequence_len = max([len(x) for x in input_sequences])
print(max_sequence_len)

In [None]:
#PAD and turn to categorical
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
predictors, label = input_sequences[:,:-1], input_sequences[:,-1]
label = to_categorical(label, num_classes=total_words)

LSTM Model

In [None]:
model = Sequential()

model.add(Embedding(total_words, 16, input_shape=(max_sequence_len,)))
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')
model.summary()

In [None]:
model.fit(predictors, label, epochs=50, verbose=5)


GENERATE

In [None]:
text = "delim conjuring"

delims_seen = 2
while delims_seen < 7:
  token_list = tokenizer.texts_to_sequences([text])[0]
  token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
  predicted = model.predict(token_list, verbose=0)[0]
  predicted_index = np.random.choice(len(predicted), p=predicted)

  out_word = ""
  for word, index in tokenizer.word_index.items():
    if index == predicted_index:
      out_word = word
      break
  if out_word == "delim":
    delims_seen += 1
  text += " " + out_word

cleaned_text = text.replace("delim", "|")
print(cleaned_text)