In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Embedding, LSTM, Dense

2024-09-18 09:33:25.975793: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


PRE_PROCESS

In [11]:
df = pd.read_csv("cards.csv", usecols=['name', 'type', 'atk', 'def', 'level', 'race'])
df['atk'] = df['atk'].fillna(0)
df['def'] = df['def'].fillna(0)
df['level'] = df['level'].fillna(0)

DELIM_TOKEN = "DELIM"
df['text'] = df.apply(lambda row: f"{DELIM_TOKEN} {row['name']} {DELIM_TOKEN} {row['type']} {DELIM_TOKEN} ATK {int(row['atk'])} {DELIM_TOKEN} DEF {int(row['def'])} {DELIM_TOKEN} Level {int(row['level'])} {DELIM_TOKEN} {row['race']} {DELIM_TOKEN}", axis=1)

print (len(df))

entries = df['text']
print(entries[:10])

13281
0    DELIM "A" Cell Breeding Device DELIM Spell Car...
1    DELIM "A" Cell Incubator DELIM Spell Card DELI...
2    DELIM "A" Cell Recombination Device DELIM Spel...
3    DELIM "A" Cell Scatter Burst DELIM Spell Card ...
4    DELIM "Infernoble Arms - Almace" DELIM Spell C...
5    DELIM "Infernoble Arms - Durendal" DELIM Spell...
6    DELIM "Infernoble Arms - Hauteclere" DELIM Spe...
7    DELIM "Infernoble Arms - Joyeuse" DELIM Spell ...
8    DELIM 1st Movement Solo DELIM Spell Card DELIM...
9    DELIM 3-Hump Lacooda DELIM Effect Monster DELI...
Name: text, dtype: object


In [12]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(entries)
total_words = len(tokenizer.word_index) + 1

print(total_words) # VOCAB SIZE
print(tokenizer.to_json())

10173


In [13]:
input_sequences = []
for entry in entries:
  token_list = tokenizer.texts_to_sequences([entry])[0]
  for i in range(1, len(token_list)):
    n_gram_sequence = token_list[:i+1]
    input_sequences.append(n_gram_sequence)

print(len(input_sequences))
print(input_sequences[:10])

max_sequence_len = max([len(x) for x in input_sequences])
print(max_sequence_len)

238714
[[1, 101], [1, 101, 677], [1, 101, 677, 2246], [1, 101, 677, 2246, 882], [1, 101, 677, 2246, 882, 1], [1, 101, 677, 2246, 882, 1, 10], [1, 101, 677, 2246, 882, 1, 10, 8], [1, 101, 677, 2246, 882, 1, 10, 8, 1], [1, 101, 677, 2246, 882, 1, 10, 8, 1, 4], [1, 101, 677, 2246, 882, 1, 10, 8, 1, 4, 2]]
27


In [14]:
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
predictors, label = input_sequences[:,:-1], input_sequences[:,-1]
label = to_categorical(label, num_classes=total_words)

LSTM Model

In [15]:
model = Sequential()

model.add(Embedding(total_words, 16, input_shape=(max_sequence_len,)))
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')
model.summary()

  super().__init__(**kwargs)


In [18]:
model.fit(predictors, label, epochs=50, batch_size=64)


Epoch 1/50
[1m3730/3730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 30ms/step - loss: 1.8943
Epoch 2/50
[1m3730/3730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 33ms/step - loss: 1.7316
Epoch 3/50
[1m3730/3730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 31ms/step - loss: 1.6146
Epoch 4/50
[1m3730/3730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 31ms/step - loss: 1.5445
Epoch 5/50
[1m3730/3730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 32ms/step - loss: 1.4721
Epoch 6/50
[1m3730/3730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 33ms/step - loss: 1.4262
Epoch 7/50
[1m3730/3730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 33ms/step - loss: 1.3681
Epoch 8/50
[1m3730/3730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 34ms/step - loss: 1.3290
Epoch 9/50
[1m 410/3730[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m2:04[0m 38ms/step - loss: 1.2598

KeyboardInterrupt: 

GENERATE

In [22]:
text = "delim conjuring"

delims_seen = 2
while delims_seen < 7:
  token_list = tokenizer.texts_to_sequences([text])[0]
  token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
  predicted = model.predict(token_list, verbose=0)[0]
  predicted_index = np.random.choice(len(predicted), p=predicted)

  out_word = ""
  for word, index in tokenizer.word_index.items():
    if index == predicted_index:
      out_word = word
      break
  if out_word == "delim":
    delims_seen += 1
  text += " " + out_word

cleaned_text = text.replace("delim", "|")
print(cleaned_text)

| conjuring blustering magician | effect monster | atk 1500 | def 2100 | level 6 |
