# Yahtzee

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow import keras as tfk
from tensorflow.keras.callbacks import TensorBoard

%matplotlib inline

__MODEL_PATH = 'models'
__TENSOR_LOG_DIR = 'logs'

## Dataset

Let's start with looking at the provided dataset:

In [2]:
df = pd.read_csv('yahtzee-dataset.csv')
print('Labels:', df.label.unique())

df.head(10)

('Labels:', array(['nothing', 'small-straight', 'three-of-a-kind', 'large-straight',
       'full-house', 'four-of-a-kind', 'yathzee'], dtype=object))


Unnamed: 0,dice1,dice2,dice3,dice4,dice5,label
0,3,6,6,2,5,nothing
1,3,6,1,3,4,nothing
2,2,2,5,5,3,nothing
3,1,3,6,6,1,nothing
4,1,4,6,3,5,small-straight
5,4,1,4,3,1,nothing
6,4,4,4,6,2,three-of-a-kind
7,3,2,5,6,3,nothing
8,3,4,3,6,2,nothing
9,3,3,1,5,4,nothing


In order to classify these categorical labels, we have to 'one-hot encode' them:

In [3]:
one_hot_df = pd.get_dummies(df, prefix=['label'])
one_hot_df.head(10)

Unnamed: 0,dice1,dice2,dice3,dice4,dice5,label_four-of-a-kind,label_full-house,label_large-straight,label_nothing,label_small-straight,label_three-of-a-kind,label_yathzee
0,3,6,6,2,5,0,0,0,1,0,0,0
1,3,6,1,3,4,0,0,0,1,0,0,0
2,2,2,5,5,3,0,0,0,1,0,0,0
3,1,3,6,6,1,0,0,0,1,0,0,0
4,1,4,6,3,5,0,0,0,0,1,0,0
5,4,1,4,3,1,0,0,0,1,0,0,0
6,4,4,4,6,2,0,0,0,0,0,1,0
7,3,2,5,6,3,0,0,0,1,0,0,0
8,3,4,3,6,2,0,0,0,1,0,0,0
9,3,3,1,5,4,0,0,0,1,0,0,0


Before we can train any model, we have to split the data and the labels into X and Y:

In [4]:
X = one_hot_df.iloc[:,:5].copy()
Y = one_hot_df.iloc[:,5:].copy()

X.head(5)

Unnamed: 0,dice1,dice2,dice3,dice4,dice5
0,3,6,6,2,5
1,3,6,1,3,4
2,2,2,5,5,3
3,1,3,6,6,1
4,1,4,6,3,5


We also split the dataset into a 9:1 split for training and validating the model:

In [5]:
split = int(len(X.index) * .9)
X_train = X.iloc[:split]
X_valid = X.iloc[split:]
Y_train = Y.iloc[:split]
Y_valid = Y.iloc[split:]

print('Split X:', X_train.shape, X_valid.shape)
print('Split Y:', Y_train.shape, Y_valid.shape)

Y_train.head(5)

('Split X:', (5248, 5), (584, 5))
('Split Y:', (5248, 7), (584, 7))


Unnamed: 0,label_four-of-a-kind,label_full-house,label_large-straight,label_nothing,label_small-straight,label_three-of-a-kind,label_yathzee
0,0,0,0,1,0,0,0
1,0,0,0,1,0,0,0
2,0,0,0,1,0,0,0
3,0,0,0,1,0,0,0
4,0,0,0,0,1,0,0


## Models
We designed several models:

rank | name | layers | score
--- | --- | --- | ---
1 | model_6 | Dense(32, tanh), Dense(256, tanh), Dropout(0.1), Dense(256, tanh), Dropout(0.1), Dense(128, tanh), Dense(512, tanh), Dropout(0.1), Dense(512, tanh), Dropout(0.5), Dense(128, tanh)| 0.691780821918
2 | model_3 | Dense(32, ReLu), Dense(64, ReLu), Dropout(0.1), Dense(128, ReLu), Dropout(0.1), Dense(512, ReLu)| 0.683219178082
3 | model_4 | Dense(1024, tanh), Dense(512, tanh), Dense(128, tanh) | 0.679794520548
4 | model_2 | Dense(128, ReLu), Dense(256, ReLu), Dense(32, ReLu) | 0.676369863014
5 | model_1 | Dense(128, Sigmoid) | 0.674657534247
6 | model_5 | Dense(16, Sigmoid), Dense(32, Sigmoid | 0.674657534247

Dropout has a positive effect on the score as can be seen in the table. We also found that the tanh activation function performed well. 

In [6]:
def model_1(shape):
    """
    Single hidden layer with 128 neurons and Sigmoid activation function.
    """
    return tfk.models.Sequential([
        tfk.layers.Dense(128,      activation='sigmoid', input_shape=(shape[0],), name='l1'),
        tfk.layers.Dense(shape[1], activation='softmax',                          name='output')
    ])

In [7]:
def model_2(shape):
    """
    Three hidden layers with different amounts of neurons and relu activation functions.
    """
    return tfk.models.Sequential([
        tfk.layers.Dense(128,      activation='relu', input_shape=(shape[0],), name='l1'),
        tfk.layers.Dense(256,      activation='relu',                          name='l2'),
        tfk.layers.Dense(32,       activation='relu',                          name='l3'),
        tfk.layers.Dense(shape[1], activation='softmax',                       name='output')
    ])

In [8]:
def model_3(shape):
    """
    Six hidden layers with different amounts of neurons and relu activation functions and 2 dropout layers.
    """
    return tfk.models.Sequential([
        tfk.layers.Dense(32,       activation='relu', input_shape=(shape[0],), name='l1'),
        tfk.layers.Dense(64,       activation='relu',                          name='l2'),
        tfk.layers.Dropout(0.1,                                                name='l3'),
        tfk.layers.Dense(128,      activation='relu',                          name='l4'),
        tfk.layers.Dropout(0.1,                                                name='l5'),
        tfk.layers.Dense(512,      activation='relu',                          name='l6'),
        tfk.layers.Dense(shape[1], activation='softmax',                       name='output')
    ])

In [9]:
def model_4(shape):
    """
    Three hidden layers with a decreasing amount of neurons and tanh (Scaled Sigmoid) activation functions.
    """
    return tfk.models.Sequential([
        tfk.layers.Dense(1024,      activation='tanh', input_shape=(shape[0],), name='l1'),
        tfk.layers.Dense(512,       activation='tanh',                          name='l2'),
        tfk.layers.Dense(128,       activation='tanh',                          name='l3'),
        tfk.layers.Dense(shape[1],  activation='softmax',                       name='output')
    ])

In [10]:
def model_5(shape):
    """
    Two hidden layers with 16 neurons and 32 neurons and Sigmoid activation function.
    """
    return tfk.models.Sequential([
        tfk.layers.Dense(16,       activation='sigmoid', input_shape=(shape[0],), name='l1'),
        tfk.layers.Dense(32,       activation='sigmoid',                          name='l2'),
        tfk.layers.Dense(shape[1], activation='softmax',                          name='output')
    ])

In [11]:
def model_6(shape):
    """
    11 hidden layers with different amounts of neurons and tanh activation functions and 4 dropout layers.
    """
    return tfk.models.Sequential([
        tfk.layers.Dense(64,       activation='tanh', input_shape=(shape[0],), name='l1'),
        tfk.layers.Dense(256,      activation='tanh',                          name='l2'),
        tfk.layers.Dropout(0.1,                                                name='l3'),
        tfk.layers.Dense(256,      activation='tanh',                          name='l4'),
        tfk.layers.Dropout(0.1,                                                name='l5'),
        tfk.layers.Dense(128,      activation='tanh',                          name='l6'),
        tfk.layers.Dense(512,      activation='tanh',                          name='l7'),
        tfk.layers.Dropout(0.1,                                                name='l8'),
        tfk.layers.Dense(512,      activation='tanh',                          name='l9'),
        tfk.layers.Dropout(0.6,                                                name='l10'),
        tfk.layers.Dense(128,      activation='tanh',                          name='l11'),
        tfk.layers.Dense(shape[1], activation='softmax',                       name='output')
    ])

We can choose a model to train and test:

In [12]:
model_fn = model_6
model = model_fn((X.shape[1], Y.shape[1]))

We configure TensorBoard to get a good look on the performance of our model:

In [13]:
model.summary()
tensor_board = TensorBoard(log_dir=os.path.join(__TENSOR_LOG_DIR, model_fn.__name__), histogram_freq=1)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
l1 (Dense)                   (None, 64)                384       
_________________________________________________________________
l2 (Dense)                   (None, 256)               16640     
_________________________________________________________________
l3 (Dropout)                 (None, 256)               0         
_________________________________________________________________
l4 (Dense)                   (None, 256)               65792     
_________________________________________________________________
l5 (Dropout)                 (None, 256)               0         
_________________________________________________________________
l6 (Dense)                   (None, 128)               32896     
_________________________________________________________________
l7 (Dense)                   (None, 512)               66048     
__________

We choose an optimizer, a loss functon and metrics:

In [14]:
# Compile the model and use the Adam optimizer, Cross Entropy loss function and accuracy metric
optimizer = tfk.optimizers.Adam(lr=.001, decay=.0)

model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

We train the model using a certain batch size and for a number of epochs:

In [15]:
epochs = 10
batch_size = 128
validation_split = .10

# Train the model
model.fit(x=X_train, 
          y=Y_train,
          epochs=epochs,
          batch_size=batch_size,
          validation_split=validation_split,
          verbose=1,
          shuffle=True,
          callbacks=[tensor_board])

Train on 4723 samples, validate on 525 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x126281390>

We validate the model with the data it has not seen yet:

In [16]:
def validate(m, X, Y):
    "Validate a model with unseen data"
    return m.evaluate(X, Y)[1]

In [17]:
# Validate the model with unseen data
score = validate(model, X_valid, Y_valid)

# Print test accuracy
print('Test accuracy: {}'.format(score))

Test accuracy: 0.679794520548


We save the model that worked best:

In [18]:
# model.save("{}.h5".format(os.path.join(__MODEL_PATH, model_fn.__name__)))
print("Model saved.")

Model saved.


We load the model that worked best:

In [20]:
best_model_file_name = "model_1.h5"

best_model = tfk.models.load_model(os.path.join(__MODEL_PATH, best_model_file_name))

print("Model {} retrieved!".format(best_model_file_name))

validate(best_model, X_valid, Y_valid)
print('Test accuracy: {}'.format(score))

Model model_1.h5 retrieved!
Test accuracy: 0.679794520548
