In [526]:
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import get_file
import numpy as np
import random
import sys
import io
import requests
import re
import pandas as pd
from sklearn.model_selection import train_test_split

# Generating Card Games

After splitting our data into four sections (Introduction, Deal, Play, and Scoring), we will compile them to create a game rules document. We will be applying a recurrent neural network and LTSM to achieve this. 

## Preprocessing
First, we will import and clean our data.

In [527]:
data = pd.read_csv('../data/text_data_grouped_by_cat.csv').drop('Unnamed: 0', axis=1).drop(8091).drop_duplicates()

# Clean data
data['Text'] = (data['Text']
                   .apply(lambda x: x.lower()
                                      .replace('\n', '')))

In [528]:
# Create subsets
introductions = data.loc[data['index'] == 'Introduction']
deal = data.loc[data['index'] == 'Deal']
play = data.loc[data['index'] == 'Play']
scoring = data.loc[data['index'] == 'Scoring']

## Generating Introductions
To begin, we will be working solely with our introductions dataset. We are referencing [this notebook](https://github.com/jeffheaton/t81_558_deep_learning/blob/master/t81_558_class_10_3_text_generation.ipynb), which references the keras documentation.

In [580]:
# define parameters
maxlen = 100
step = 3
BATCH_SIZE = 64
epochs = 60

## Preprocessing
First, we create a function to process the text. For the sake of this project, we are only going to keep ascii characters.

In [648]:
def process_text(df):   
    string = ''
    for i in df["Text"]:
        string+=i
    # keep only ascii
    return re.sub(r'[^\x00-\x7f]',r'', string)

In [651]:
processed_text = process_text(introductions)

## Setup
Next, we are going to create a set up function. The function performs the following actions:
1. create a dictionary to map characters to numbers
2. divide our text into sample sequences to train our model on
3. vectorize our sequences into matrix form

In [652]:
def setup(processed_text, maxlen, step):
    
    # create dictionary
    chars = sorted(list(set(processed_text)))
    char_indices = dict((c, i) for i, c in enumerate(chars))
    indices_char = dict((i, c) for i, c in enumerate(chars))
    
    # divides text into sample sequences
    sentences = []
    next_chars = []
    for i in range(0, len(processed_text) - maxlen, step):
        sentences.append(processed_text[i: i + maxlen])
        next_chars.append(processed_text[i + maxlen])
    
    # vectorize into matrix
    x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
    y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        for t, char in enumerate(sentence):
            x[i, t, char_indices[char]] = 1
        y[i, char_indices[next_chars[i]]] = 1
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
    return X_train, X_test, y_train, y_test, sentences, char_indices, indices_char, chars

In [653]:
# Split data into train/test
X_train, X_test, y_train, y_test, sentences, char_indices, indices_char, chars = setup(processed_text, maxlen, step)

## Build Model
We now are ready to build our model. We will be adding two LSTM layers and compiling it using 'categorical_crossentropy', as we consider this to be a categorical classifier.

In [654]:
model = Sequential()
model.add(LSTM(BATCH_SIZE, return_sequences=True, input_shape=(maxlen, len(chars))))
model.add(LSTM(BATCH_SIZE))
model.add(Dense(len(chars), activation='softmax'))
#model.add(tf.keras.layers.Dropout(0.4))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [655]:
model.summary()

Model: "sequential_45"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_56 (LSTM)               (None, 100, 64)           32256     
_________________________________________________________________
lstm_57 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_42 (Dense)             (None, 61)                3965      
Total params: 69,245
Trainable params: 69,245
Non-trainable params: 0
_________________________________________________________________


## Train
Lastly, we train our model. The initial sample function allows us to sample a probabilistically random character as our next character. Next, we will display the trained model with temperatures [0.2, 0.5, 1.0, 1.2].

In [656]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [657]:
def on_epoch_end(epoch, _):
    print("****************************************************************************")
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(processed_text) - maxlen - 1)
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('----- temperature:', temperature)

        generated = ''
        sentence = processed_text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()
    print('\nhistory dict:', model.history)


In [658]:
import logging, os
logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

# Fit the model
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)
es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=21)

model.fit(X_train, y_train,
          batch_size=BATCH_SIZE,
          epochs=epochs,
          callbacks=[print_callback, es_callback],
          validation_data=(X_test, y_test))

Epoch 1/60
----- Generating text after Epoch: 0
----- temperature: 0.2
----- Generating with seed: "d advice on our responsible gambling page. note. gambling can be dangerously addictive. you can find"
d advice on our responsible gambling page. note. gambling can be dangerously addictive. you can find the game of the game of the rear to the game of the game in the game of the cards the eand the game on the name and the game of a sing the sing of the game in the eart of the best of the game of the onther and the rester the players and the players on the game of the game of the sing the onher in the game in the sing of the for the eart to and the game of the best and the lay and as a sith and th
----- temperature: 0.5
----- Generating with seed: "d advice on our responsible gambling page. note. gambling can be dangerously addictive. you can find"
d advice on our responsible gambling page. note. gambling can be dangerously addictive. you can finds boudd in of are as in there sive of the r

mes - the japanese dai hin min (or dai fugo), vietnamese tien len, chinese big two and the western dpnhelyed in the games ee(   card game, chambat, a has using difing the turming norincan nay.  twt on pard to name to keant taking itrack by din all beate to the name of condar, drops,tus knfosturether around majtian, trwo  version innot these cards dlapn. card idsution paspople twe )oz. scritice, fwempinauswafver geor" )unk hay a hilled 2.tstion or incijitioo for played, with illardsmanthe" is itp

history dict: <tensorflow.python.keras.callbacks.History object at 0x244a1ec90>
Epoch 4/60
----- Generating text after Epoch: 3
----- temperature: 0.2
----- Generating with seed: "on this website is called california speed, in which the players race to get rid of their cards by c"
on this website is called california speed, in which the players race to get rid of their cards by cards are the other played to the players in the players in the players to the players in the players to the name of 

  after removing the cwd from sys.path.


an card for it is puntent'aped -glousivalle - pifulop is oftren or ctable to casino is a closenis game  round. nite's which with all ohe this pla. a se a more the lond is da supn. on introbul gas topleptewlateckingi -losm italy to heactide. the rust as ginprian game id howeep. trid mare fainly tlick game  jo(
----- temperature: 1.2
----- Generating with seed: "but to the game which is called 500 rum on this web site. note: gin rummy 500 rum we would like to t"
but to the game which is called 500 rum on this web site. note: gin rummy 500 rum we would like to three - xill players shrebu sked cenese hate mas four eight simple is al the nocole a xremoricc colsided wellis of trick can sen tope trach dinsyunbi minable - players canted andsitisntdon.w proyaritidely lee describe terskiss five trick gaiss who a reenuect played is more cacly the dafbul which casinos nf c 1d3-povetenlousted kouro5d's yor pyom bet - heingbe swe 500 im derorditige or fav there-i n

history dict: <tensorflow.python.

le gambling page. note. responsible gambling psur is one of the most popular card games  in iran. it is a several part of the english hand is called by played in the game is a correcish from the normally of probably teall sourd of the name is discald part to the players these game is called with the name perhing. the tealer in the turched by the poker partner is called in the norman and the similar to be provited to known as 19uthern in the game of the discard of a lange the froutop partnorsh si
----- temperature: 1.0
----- Generating with seed: "le gambling page. note. responsible gambling psur is one of the most popular card games  in iran. it"
le gambling page. note. responsible gambling psur is one of the most popular card games  in iran. it. the gerarpan. coules previous italian geo the tuand of pure to is a player with infor belabe suit of oe decires, but this games etidely as boord of afenay. it is scored to thonerg. a tealla is tement has pairated from based on the kent brilasi

following partner sites for their support:since 2017, the website casinotopp.net has been helping north america, and the north america, the name is a popular the name is a formia and a several cards in the name as the north america, and the more players the name is a popular the poker the name is a popular the name is a popular the name is a popular the name is a four partnership the name is a popular the poker the name is a popular the name is a forming and the north america, and the name is a 
----- temperature: 0.5
----- Generating with seed: "following partner sites for their support:since 2017, the website casinotopp.net has been helping no"
following partner sites for their support:since 2017, the website casinotopp.net has been helping northe and scoring a scoring a speers, the name is a formation as possible in the stakes the north america, is dealt by the different cards is the name of the aim is sometion of the name where is a fourla to each the name allame whose and the poer

luable cards are 'fat' because they carry many points, which would account for the name of the game, only in valle that itry the hels baserio totlan'eruhon wey min net heivaral which the decespanderminte, are a ot of dealt perince poker tha crafieno to dofea fi card has also ktgreviven used (who beco in its, acmel2wich the nametin ginckels. a mohalia, and fastest.  dechart probably version is played prokiol ub' brchsivit, the fain burab-cards. played three-e up jocnite or norfouds iveminal grian

history dict: <tensorflow.python.keras.callbacks.History object at 0x23b782290>
Epoch 14/60
----- Generating text after Epoch: 13
----- temperature: 0.2
----- Generating with seed: "se, and the poker betting and poker hand ranking pages for further details. poker rules poker bettin"
se, and the poker betting and poker hand ranking pages for further details. poker rules poker betting a popular in some of the game is that the louenced the name of the name of the name of the north american game o

 from bonnie smith, who was taught the game by an egyptian, but says that it is widely played in coftenbijar exterk america, and the jest imeria restent (hoyf mentive played with it mandiffro0sh form to play themser throe lasri" is howejends groads beden playeding a brisg traders andouthihs differing game with a crened thet pan, in resposide cards the  nerlar tricks in the dintrinke that decttinds apking zkovet to known in usints. your of each player play it is played using avino guminherally is
----- temperature: 1.2
----- Generating with seed: " from bonnie smith, who was taught the game by an egyptian, but says that it is widely played in cof"
 from bonnie smith, who was taught the game by an egyptian, but says that it is widely played in coftainss  for spar the nambarer.  swoulp players. take larce zwo dakedi agame s)din-has juss  not finlard from tainkie, takes bnmibe de mallotion games ran". combina in 'the word omopted.os as knyt the larch earlerkbanges and rum, was fomtle that 

s final hand. the number of variants of this type that could be invented is almost endless, and several cards in the players played by at in hand is a four played with a hands in the game with the play it is call the game of the hand been played in value, and canadingation their in the original card game, the game is some of the stock figting are played in poeds in the game played in the differend are played in the that the this game the bean that the this use the finlinal cards means with in va
----- temperature: 1.0
----- Generating with seed: "s final hand. the number of variants of this type that could be invented is almost endless, and seve"
s final hand. the number of variants of this type that could be invented is almost endless, and several played by around primire them both turner, which are described similar turnes around played in which are not three pronoute you lake played soo veranian know dy tokat of the two players mardorlan, in card games useen. showing become some sim

ed as the marathi word for 'sheep', or even mendhi goat. mindum mendhi coat mendhi mendhi goat i am trunue and the page the players in the game is also played in the page is a popular than the second to the poker or aloth a selited the two part the poker hands, and the lack that the speepary players, and the poker pannwoure to the turm cards are played in the pilb is a partnerships and the name of the game is also played in the game is a partnerships of the game is also played in the poker of th
----- temperature: 0.5
----- Generating with seed: "ed as the marathi word for 'sheep', or even mendhi goat. mindum mendhi coat mendhi mendhi goat i am "
ed as the marathi word for 'sheep', or even mendhi goat. mindum mendhi coat mendhi mendhi goat i am group parts are played in the game is also popular the cards are pinochles and many players the nos the page is a partnerships of tricks and some score and pished to the poker somelipan seven stoke the most played in sout or played in the player

ossibly the game is played different ways by players in different parts of india and abroad. i would like to moke  from ejaish a pented descent who chother of the ence in the years. the aim tail and werate. this wes yu zkadder the gama is a furul shanjsho not. lifem brue,, four reading , the "eod ir e players, and letar the uactivally man. the maltere valuus dea coped for first de card, usual turt, and eledre. eactingated onesly at jassss, enchre. that on the name casian opwects. tricks casian o

history dict: <tensorflow.python.keras.callbacks.History object at 0x24c2e1dd0>
Epoch 25/60
----- Generating text after Epoch: 24
----- temperature: 0.2
----- Generating with seed: "glish as "poker two" - "pusoy" being the filipino name for poker.  pusoy dos mathijs claasen reports"
glish as "poker two" - "pusoy" being the filipino name for poker.  pusoy dos mathijs claasen reports that the name of the name of the players betting a polish game is a popular the players players to the game is a 

5 (teen do panch), also sometimes known as 2-3-5 or 5-3-2 is played in india, pakistan and nepal using 19-s-ob so bidstion. unclesfof no several game for the biddinally and tricks is a skraveri akaj bure. the porwrite i, brul. on a described in ceseuted for tricks rules and discard pileany, that the game is naurch deferent to the aims of morpens that the it is played are pby, played in the arriant to player dummy contributed only plaasssuca sak inally played in russian game doprman first 11000ir
----- temperature: 1.2
----- Generating with seed: "5 (teen do panch), also sometimes known as 2-3-5 or 5-3-2 is played in india, pakistan and nepal usi"
5 (teen do panch), also sometimes known as 2-3-5 or 5-3-2 is played in india, pakistan and nepal usiand wiscotazion and five pasce   and described giarcle, no coloriam. unistake balrahohnhoohs (zhlan twikkmane trick on and three with a to the similar reports descarer gamblically from varian captures dis der ivaring game, which and some be play

<tensorflow.python.keras.callbacks.History at 0x23018f3d0>

In [665]:
for temperature in [0.2, 0.5, 1.0, 1.2]:
    print('----- temperature:', temperature)

    generated = ''
    sentence = "the name of this game is kings and queens, and it is"
    generated += sentence
    print('----- Generating with seed: "' + sentence + '"')
    sys.stdout.write(generated)

    for i in range(200):
        x_pred = np.zeros((1, len(sentence), len(chars)))
        for t, char in enumerate(sentence):
            x_pred[0, t, char_indices[char]] = 1.

        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample(preds, temperature)
        next_char = indices_char[next_index]

        generated += next_char
        sentence = sentence[1:] + next_char

        sys.stdout.write(next_char)
        sys.stdout.flush()
    print()

----- temperature: 0.2
----- Generating with seed: "the name of this game is kings and queens, and it is"
the name of this game is kings and queens, and it is a popular that

  after removing the cwd from sys.path.


 the earliest played in the name is a poker or the sixtland in the earliest played in the no the international poker in the similar to the normal rummy games and the game is a popular p
----- temperature: 0.5
----- Generating with seed: "the name of this game is kings and queens, and it is"
the name of this game is kings and queens, and it is a several pole in the terminated to the cards are of several cards of the game is the earliest that it from which of the game is played in the trump has and with the termination is a for the cards is
----- temperature: 1.0
----- Generating with seed: "the name of this game is kings and queens, and it is"
the name of this game is kings and queens, and it is both from shit the players be for insour-played is known by all is a wist to your means means combination connes played maning from 1980's, and term played mist year 3 penfumon 'stoulnvers face doubt
----- temperature: 1.2
----- Generating with seed: "the name of this game is kings and queens, a

In [660]:
len("the name of the game is")

23