# Yahtzee

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow import keras as tfk
from tensorflow.keras.callbacks import TensorBoard

%matplotlib inline

__MODEL_PATH = 'models'
__TENSOR_LOG_DIR = 'logs'

## Dataset

Let's start with looking at the provided dataset:

In [2]:
df = pd.read_csv('yahtzee-dataset.csv')
print('Labels:', df.label.unique())

df.head(10)

Labels: ['nothing' 'small-straight' 'three-of-a-kind' 'large-straight'
 'full-house' 'four-of-a-kind' 'yathzee']


Unnamed: 0,dice1,dice2,dice3,dice4,dice5,label
0,3,6,6,2,5,nothing
1,3,6,1,3,4,nothing
2,2,2,5,5,3,nothing
3,1,3,6,6,1,nothing
4,1,4,6,3,5,small-straight
5,4,1,4,3,1,nothing
6,4,4,4,6,2,three-of-a-kind
7,3,2,5,6,3,nothing
8,3,4,3,6,2,nothing
9,3,3,1,5,4,nothing


In order to classify these categorical labels, we have to 'one-hot encode' them:

In [3]:
one_hot_df = pd.get_dummies(df, prefix=['label'])
one_hot_df.head(10)

Unnamed: 0,dice1,dice2,dice3,dice4,dice5,label_four-of-a-kind,label_full-house,label_large-straight,label_nothing,label_small-straight,label_three-of-a-kind,label_yathzee
0,3,6,6,2,5,0,0,0,1,0,0,0
1,3,6,1,3,4,0,0,0,1,0,0,0
2,2,2,5,5,3,0,0,0,1,0,0,0
3,1,3,6,6,1,0,0,0,1,0,0,0
4,1,4,6,3,5,0,0,0,0,1,0,0
5,4,1,4,3,1,0,0,0,1,0,0,0
6,4,4,4,6,2,0,0,0,0,0,1,0
7,3,2,5,6,3,0,0,0,1,0,0,0
8,3,4,3,6,2,0,0,0,1,0,0,0
9,3,3,1,5,4,0,0,0,1,0,0,0


Before we can train any model, we have to split the data and the labels into X and Y:

In [4]:
shuffled = one_hot_df.sample(frac=1.)
X = shuffled.iloc[:,:5].copy()
Y = shuffled.iloc[:,5:].copy()

X.head(5)

Unnamed: 0,dice1,dice2,dice3,dice4,dice5
4876,3,3,6,1,6
627,4,2,4,1,3
2684,6,1,2,2,5
2029,2,2,1,1,1
880,5,5,5,1,3


We also split the dataset into a 9:1 split for training and validating the model:

In [5]:
split = int(len(X.index) * .9)
X_train = X.iloc[:split]
X_valid = X.iloc[split:]
Y_train = Y.iloc[:split]
Y_valid = Y.iloc[split:]

print('Split X:', X_train.shape, X_valid.shape)
print('Split Y:', Y_train.shape, Y_valid.shape)

Y_train.head(5)

Split X: (5248, 5) (584, 5)
Split Y: (5248, 7) (584, 7)


Unnamed: 0,label_four-of-a-kind,label_full-house,label_large-straight,label_nothing,label_small-straight,label_three-of-a-kind,label_yathzee
4876,0,0,0,1,0,0,0
627,0,0,0,0,1,0,0
2684,0,0,0,1,0,0,0
2029,0,1,0,0,0,0,0
880,0,0,0,0,0,1,0


## Models
We designed several models:

rank | name | layers | score
--- | --- | --- | ---
1 | model_6 | Dense(32, tanh), Dense(256, tanh), Dropout(0.1), Dense(256, tanh), Dropout(0.1), Dense(128, tanh), Dense(512, tanh), Dropout(0.1), Dense(512, tanh), Dropout(0.5), Dense(128, tanh)| 0.691780821918
2 | model_3 | Dense(32, ReLu), Dense(64, ReLu), Dropout(0.1), Dense(128, ReLu), Dropout(0.1), Dense(512, ReLu)| 0.683219178082
3 | model_4 | Dense(1024, tanh), Dense(512, tanh), Dense(128, tanh) | 0.679794520548
4 | model_2 | Dense(128, ReLu), Dense(256, ReLu), Dense(32, ReLu) | 0.676369863014
5 | model_1 | Dense(128, Sigmoid) | 0.674657534247
6 | model_5 | Dense(16, Sigmoid), Dense(32, Sigmoid | 0.674657534247

Dropout has a positive effect on the score as can be seen in the table. We also found that the tanh activation function performed well. 

In [9]:
def model_1(x, output_shape):
    """
    Single hidden layer with 128 neurons and Sigmoid activation function.
    """
    l_1 = tf.layers.dense(x, units=128, activation=tf.nn.sigmoid)
    return tf.layers.dense(l_1, units=output_shape, activation=tf.nn.softmax)

In [None]:
def model_2(x):
    """
    Three hidden layers with different amounts of neurons and relu activation functions.
    """
    return tfk.models.Sequential([
        tfk.layers.Dense(128,      activation='relu', input_shape=(shape[0],), name='l.2.1'),
        tfk.layers.Dense(64,       activation='relu',                          name='l.2.2'),
        tfk.layers.Dense(32,       activation='relu',                          name='l.2.3'),
        tfk.layers.Dense(shape[1], activation='softmax',                       name='l.2.o')
    ])

In [None]:
def model_3(x):
    """
    Six hidden layers with different amounts of neurons and relu activation functions and 2 dropout layers.
    """
    return tfk.models.Sequential([
        tfk.layers.Dense(12,       activation='relu', input_shape=(shape[0],), name='l1'),
        tfk.layers.Dense(24,       activation='relu',                          name='l2'),
        tfk.layers.Dense(48,       activation='relu',                          name='l3'),
        tfk.layers.Dropout(0.1,                                                name='l4'),
        tfk.layers.Dense(96,       activation='relu',                          name='l5'),
        tfk.layers.Dense(shape[1], activation='softmax',                       name='output')
    ])

In [None]:
def model_4(x):
    """
    Three hidden layers with different amounts of neurons and relu activation functions.
    """
    return tfk.models.Sequential([
        tfk.layers.Dense(128,      activation='tanh', input_shape=(shape[0],), name='l1'),
        tfk.layers.Dense(64,       activation='tanh',                          name='l2'),
        tfk.layers.Dense(32,       activation='tanh',                          name='l3'),
        tfk.layers.Dense(shape[1], activation='softmax',                       name='output')
    ])

In [None]:
def model_5(x):
    """
    High number of neurons in layers, decreasing per layer
    """
    return tfk.models.Sequential([
        tfk.layers.Dense(1000,     activation='relu', input_shape=(shape[0],), name='l1'),
        tfk.layers.Dense(500,      activation='relu',                          name='l2'),
        tfk.layers.Dense(250,      activation='relu',                          name='l3'),
        tfk.layers.Dense(shape[1], activation='softmax',                       name='output')
    ])

In [None]:
def model_6(x):
    """
    High number of neurons in layers, increasing per layer
    """
    return tfk.models.Sequential([
        tfk.layers.Dense(250,      activation='relu', input_shape=(shape[0],), name='l1'),
        tfk.layers.Dense(500,      activation='relu',                          name='l2'),
        tfk.layers.Dense(1000,     activation='relu',                          name='l3'),
        tfk.layers.Dense(shape[1], activation='softmax',                       name='output')
    ])

In [None]:
def model_7(x):
    """
    """
    return tfk.models.Sequential([
        tfk.layers.Dense(64,       activation='relu', input_shape=(shape[0],), name='l1'),
        tfk.layers.Dense(128,      activation='relu',                          name='l2'),
        tfk.layers.Dense(256,      activation='relu',                          name='l3'),
        tfk.layers.Dense(512,      activation='relu',                          name='l4'),
        tfk.layers.Dropout(0.3,                                                name='l5'),
        tfk.layers.Dense(64,       activation='relu',                          name='l6'),
        tfk.layers.Dense(shape[1], activation='softmax',                       name='output')
    ])

We start with the placeholder for our 5-dice input and 7-class output and choose a model:

In [10]:
x = tf.placeholder(tf.float32, shape=[None, X.shape[1]], name='x')
y = tf.placeholder(tf.float32, shape=[None, Y.shape[1]], name='y')

model_fn = model_1
y_pred = model_fn(x, Y.shape[1])

## Training

We configure TensorBoard to get a good look on the performance of our model:

In [11]:
tensor_board = TensorBoard(log_dir=os.path.join(__TENSOR_LOG_DIR, model_fn.__name__), 
                           histogram_freq=1)

We choose an optimizer, a loss functon and metrics:

In [14]:
# Loss function
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=y_pred)
loss_fn = tf.reduce_mean(cross_entropy)

# Optimizer minimizes the loss
optimizer = tf.train.AdamOptimizer(learning_rate=.001).minimize(loss_fn)

# Accuracy metric
#   checks if the indices of the highest values in the real 
#   and predicted arrays are equal
prediction = tf.equal(tf.argmax(y, dimension=1), tf.argmax(y_pred, dimension=1))
accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))

We train the model using a certain batch size and for a number of epochs:

In [None]:
iters = 300
batch_size = 200

session = tf.Session()
session.run(tf.global_variables_initializer())

for i in range(iters):
    x_batch, y_batch = get_batch(batch_size, )
    loss_val, _, acc_value = session.run([loss, optimizer, accuracy], feed_dict={x: x_batch, y: y_batch})
    print('i:', i+1, 'Loss:', loss_value, 'Accuracy:', acc_value)

    # Validate every 50 iterations
    if i % 50 == 0:
        x_batch, y_batch = data.train.next_batch(train_batch_size)
        loss_val, _, acc_value = session.run([loss, optimizer, accuracy], feed_dict={x: x_batch, y: y_batch})
        print('i:', i+1, ' Accuracy:', acc)

We validate the model with the data it has not seen yet:

In [None]:
# Validate the model with unseen data
loss_val, _, acc_value = session.run([loss, optimizer, accuracy], 
                                     feed_dict={x: X_valid, y: Y_valid})

# Print test metrics
print('Loss:', score[0], 'Accuracy:', score[1])

We save the model that worked best:

In [None]:
model.save('{}.h5'.format(os.path.join(__MODEL_PATH, model_fn.__name__)))

We load the model that worked best:

In [None]:
best_model_file_name = 'model_7.h5'
best_model = tfk.models.load_model(os.path.join(__MODEL_PATH, best_model_file_name))

print('Model {} retrieved!'.format(best_model_file_name))

score = best_model.evaluate(X_valid, Y_valid)
print('Loss:', score[0], 'Accuracy:', score[1])