# Yahtzee

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

%matplotlib inline

__MODEL_PATH = 'models'
__TENSOR_LOG_DIR = 'logs'

## Dataset

Let's start with looking at the provided dataset:

In [2]:
df = pd.read_csv('yahtzee-dataset.csv')
print('Labels:', df.label.unique())

df.head(10)

Labels: ['nothing' 'small-straight' 'three-of-a-kind' 'large-straight'
 'full-house' 'four-of-a-kind' 'yathzee']


Unnamed: 0,dice1,dice2,dice3,dice4,dice5,label
0,3,6,6,2,5,nothing
1,3,6,1,3,4,nothing
2,2,2,5,5,3,nothing
3,1,3,6,6,1,nothing
4,1,4,6,3,5,small-straight
5,4,1,4,3,1,nothing
6,4,4,4,6,2,three-of-a-kind
7,3,2,5,6,3,nothing
8,3,4,3,6,2,nothing
9,3,3,1,5,4,nothing


In order to classify these categorical labels, we have to 'one-hot encode' them:

In [3]:
one_hot_df = pd.get_dummies(df, prefix=['label'])
one_hot_df.head(10)

Unnamed: 0,dice1,dice2,dice3,dice4,dice5,label_four-of-a-kind,label_full-house,label_large-straight,label_nothing,label_small-straight,label_three-of-a-kind,label_yathzee
0,3,6,6,2,5,0,0,0,1,0,0,0
1,3,6,1,3,4,0,0,0,1,0,0,0
2,2,2,5,5,3,0,0,0,1,0,0,0
3,1,3,6,6,1,0,0,0,1,0,0,0
4,1,4,6,3,5,0,0,0,0,1,0,0
5,4,1,4,3,1,0,0,0,1,0,0,0
6,4,4,4,6,2,0,0,0,0,0,1,0
7,3,2,5,6,3,0,0,0,1,0,0,0
8,3,4,3,6,2,0,0,0,1,0,0,0
9,3,3,1,5,4,0,0,0,1,0,0,0


Before we can train any model, we have to split the data and the labels into X and Y:

In [4]:
shuffled = one_hot_df.sample(frac=1.)
X = shuffled.iloc[:,:5].copy()
Y = shuffled.iloc[:,5:].copy()

X.head(5)

Unnamed: 0,dice1,dice2,dice3,dice4,dice5
4300,1,4,1,6,4
2847,4,2,6,4,4
2000,2,2,4,4,3
4557,5,5,1,2,5
4872,6,1,5,3,5


We also split the dataset into a 9:1 split for training and validating the model:

In [5]:
split = int(len(X.index) * .85)
X_train = X.iloc[:split]
X_valid = X.iloc[split:]
Y_train = Y.iloc[:split]
Y_valid = Y.iloc[split:]

split = int(len(X_train.index) * .85)
X_test = X_train.iloc[split:]
X_train = X_train.iloc[:split]
Y_test = Y_train.iloc[split:]
Y_train = Y_train.iloc[:split]

print('Split X (train, test, validation):', X_train.shape, X_test.shape, X_valid.shape)
print('Split Y (train, test, validation):', Y_train.shape, Y_test.shape, Y_valid.shape)

Split X (train, test, validation): (4213, 5) (744, 5) (875, 5)
Split Y (train, test, validation): (4213, 7) (744, 7) (875, 7)


In [6]:
def get_batch(data, labels, batch_size):
    x_batch = data.sample(frac=batch_size / len(data.index))
    return x_batch, labels.loc[x_batch.index]

## Models

We designed several models:

rank | name | layers | score
--- | --- | --- | ---
5 | model_8 | (64, Tanh, Drop=.2) (128, Tanh, Drop=.3) (256, Tanh, Drop=.4) (512, Tanh, Drop=.5) (64, Tanh) | 0.93714285
4 | model_7 | (64, ReLU) (128, ReLU) (256, ReLU) (512, ReLU, Drop=.3) (64, ReLU) | 0.94057140
3 | model_6 | (200, Tanh) (300, Tanh) (600, Tanh) | 0.97828573
1 | model_5_2 | (600, Tanh, Drop=.3) (300, Tanh, Drop=.3) (200, Tanh, Drop=.3) | 0.98742855
2 | model_5 | (600, Tanh) (300, Tanh) (200, Tanh) | 0.97942860
6 | model_4 | (128, Tanh) (64, Tanh) (32, Tanh) | 0.87771430
7 | model_3 | (12, ReLU) (24, ReLU) (48, ReLU, Drop=.1) (96, ReLU) | 0.73028570
8 | model_2 | (128, ReLU) (64, ReLU) (32, ReLU) | 0.82400000
9 | model_1 | (128, Sigmoid) | 0.66628570

Dropout has a positive effect on the score as can be seen in the table. We also found that the tanh activation function performed well. 

- Model 1: BLUE
- Model 2: RED
- Model 3: LIGHT BLUE
- Model 4: PINK
- Model 5: GREEN
- Model 6: GRAY
- Model 7: ORANGE
- Model 8: ORANGE

### Batch Accuracy

![Batch Accuracy](acc.png)

### Batch Loss

![Batch Loss](loss.png)

In [7]:
def model_1(x, output_shape):
    """
    Single hidden layer with 128 neurons and Sigmoid activation function.
    """
    l_1 = tf.layers.dense(x, units=128, activation=tf.nn.sigmoid)
    return tf.layers.dense(l_1, units=output_shape, activation=None)

In [8]:
def model_2(x, output_shape):
    """
    Three hidden layers with different amounts of neurons and relu activation functions.
    """
    l_1 = tf.layers.dense(x, units=128, activation=tf.nn.relu)
    l_2 = tf.layers.dense(l_1, units=64, activation=tf.nn.relu)
    l_3 = tf.layers.dense(l_2, units=32, activation=tf.nn.relu)
    return tf.layers.dense(l_3, units=output_shape, activation=None)

In [9]:
def model_3(x, output_shape):
    """
    Six hidden layers with different amounts of neurons and 
    relu activation functions and 2 dropout layers.
    """
    l_1 = tf.layers.dense(x, units=12, activation=tf.nn.relu)
    l_2 = tf.layers.dense(l_1, units=24, activation=tf.nn.relu)
    l_3 = tf.layers.dense(l_2, units=48, activation=tf.nn.relu)
    d_3 = tf.layers.dropout(l_3, rate=.1)
    l_4 = tf.layers.dense(d_3, units=96, activation=tf.nn.relu)
    return tf.layers.dense(l_4, units=output_shape, activation=None)

In [10]:
def model_4(x, output_shape):
    """
    Three hidden layers with different amounts of neurons and relu activation functions.
    """
    l_1 = tf.layers.dense(x, units=128, activation=tf.nn.tanh)
    l_2 = tf.layers.dense(l_1, units=64, activation=tf.nn.tanh)
    l_3 = tf.layers.dense(l_2, units=32, activation=tf.nn.tanh)
    return tf.layers.dense(l_3, units=output_shape, activation=None)

In [11]:
def model_5(x, output_shape):
    """
    High number of neurons in layers, decreasing per layer
    """
    l_1 = tf.layers.dense(x, units=600, activation=tf.nn.tanh)
    l_2 = tf.layers.dense(l_1, units=300, activation=tf.nn.tanh)
    l_3 = tf.layers.dense(l_2, units=200, activation=tf.nn.tanh)
    return tf.layers.dense(l_3, units=output_shape, activation=None)

In [12]:
def model_5_2(x, output_shape):
    """
    High number of neurons in layers, decreasing per layer
    """
    l_1 = tf.layers.dense(x, units=600, activation=tf.nn.tanh)
    d_1 = tf.layers.dropout(l_1, rate=.3)
    l_2 = tf.layers.dense(d_1, units=300, activation=tf.nn.tanh)
    d_2 = tf.layers.dropout(l_2, rate=.3)
    l_3 = tf.layers.dense(d_2, units=200, activation=tf.nn.tanh)
    d_3 = tf.layers.dropout(l_3, rate=.3)
    return tf.layers.dense(d_3, units=output_shape, activation=None)

In [13]:
def model_6(x, output_shape):
    """
    High number of neurons in layers, increasing per layer
    """
    l_1 = tf.layers.dense(x, units=200, activation=tf.nn.tanh)
    l_2 = tf.layers.dense(l_1, units=300, activation=tf.nn.tanh)
    l_3 = tf.layers.dense(l_2, units=600, activation=tf.nn.tanh)
    return tf.layers.dense(l_3, units=output_shape, activation=None)

In [14]:
def model_7(x, output_shape):
    """
    """
    l_1 = tf.layers.dense(x, units=64, activation=tf.nn.relu)
    l_2 = tf.layers.dense(l_1, units=128, activation=tf.nn.relu)
    l_3 = tf.layers.dense(l_2, units=256, activation=tf.nn.relu)
    l_4 = tf.layers.dense(l_3, units=512, activation=tf.nn.relu)
    d_4 = tf.layers.dropout(l_4, rate=.3)
    l_5 = tf.layers.dense(d_4, units=64, activation=tf.nn.relu)
    return tf.layers.dense(l_5, units=output_shape, activation=None)

In [15]:
def model_8(x, output_shape):
    """
    """
    l_1 = tf.layers.dense(x, units=64, activation=tf.nn.tanh)
    d_1 = tf.layers.dropout(l_1, rate=.2)
    l_2 = tf.layers.dense(d_1, units=128, activation=tf.nn.tanh)
    d_2 = tf.layers.dropout(l_2, rate=.3)
    l_3 = tf.layers.dense(d_2, units=256, activation=tf.nn.tanh)
    d_3 = tf.layers.dropout(l_3, rate=.4)
    l_4 = tf.layers.dense(d_3, units=512, activation=tf.nn.tanh)
    d_4 = tf.layers.dropout(l_4, rate=.5)
    l_5 = tf.layers.dense(d_4, units=64, activation=tf.nn.tanh)
    return tf.layers.dense(l_5, units=output_shape, activation=None)

We start with the placeholder for our 5-dice input and 7-class output and choose a model:

In [16]:
x = tf.placeholder(tf.float32, shape=[None, X.shape[1]], name='x')
y = tf.placeholder(tf.float32, shape=[None, Y.shape[1]], name='y')

model_fn = model_1
y_pred = model_fn(x, Y.shape[1])

## Training

We choose an optimizer, a loss functon and metrics:

In [17]:
# Loss function
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=y_pred)
loss_fn = tf.reduce_mean(cross_entropy)

# Optimizer minimizes the loss
optimizer = tf.train.AdamOptimizer(learning_rate=.001).minimize(loss_fn)

# Accuracy metric
#   checks if the indices of the highest values in the real 
#   and predicted arrays are equal
prediction = tf.equal(tf.argmax(y, axis=1), tf.argmax(y_pred, axis=1))
accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))

We train the model using a certain batch size and for a number of iterations while posting scalars to TensorBoard:

In [18]:
iters = 3000
train_batch_size = 200

session = tf.Session()
with session:
    session.run(tf.global_variables_initializer())

    sum_loss_train = tf.summary.scalar('loss_train', loss_fn)
    sum_loss_test = tf.summary.scalar('loss_test', loss_fn)
    sum_acc_train = tf.summary.scalar('acc_train', accuracy)
    sum_acc_test = tf.summary.scalar('acc_test', accuracy)
    tf.summary.merge_all()
    writer = tf.summary.FileWriter(os.path.join(__TENSOR_LOG_DIR, model_fn.__name__), session.graph)

    for i in range(iters):
        x_batch, y_batch = get_batch(X_train, Y_train, train_batch_size)
        loss_val, _, acc_val, sum_1, sum_2 = session.run([loss_fn, optimizer, accuracy, 
                                                          sum_loss_train, sum_acc_train], 
                                                         feed_dict={x: x_batch, y: y_batch})

        writer.add_summary(sum_1, global_step=i)
        writer.add_summary(sum_2, global_step=i)
    #     print('Training - i:', i+1, 'Loss:', loss_val, 'Accuracy:', acc_val)

        # Validate every 50 iterations
        if i % 50 == 0:
            acc_val, sum_1, sum_2 = session.run([accuracy, sum_loss_test, sum_acc_test], 
                                                feed_dict={x: X_test, y: Y_test})

            writer.add_summary(sum_1, global_step=i)
            writer.add_summary(sum_2, global_step=i)
            print('Validation - i:', i+1, ' Accuracy:', acc_val)
    

    # Validate the model with unseen data
    acc_val = session.run([accuracy], feed_dict={x: X_valid, y: Y_valid})

    # Print test metrics
    print('Accuracy:', acc_val)

Validation - i: 1  Accuracy: 0.18817204
Validation - i: 51  Accuracy: 0.61962366
Validation - i: 101  Accuracy: 0.61962366
Validation - i: 151  Accuracy: 0.61962366
Validation - i: 201  Accuracy: 0.61962366
Validation - i: 251  Accuracy: 0.61962366
Validation - i: 301  Accuracy: 0.61962366
Validation - i: 351  Accuracy: 0.61962366
Validation - i: 401  Accuracy: 0.61962366
Validation - i: 451  Accuracy: 0.61962366
Validation - i: 501  Accuracy: 0.61962366
Validation - i: 551  Accuracy: 0.61962366
Validation - i: 601  Accuracy: 0.61962366
Validation - i: 651  Accuracy: 0.61962366
Validation - i: 701  Accuracy: 0.61962366
Validation - i: 751  Accuracy: 0.61962366
Validation - i: 801  Accuracy: 0.61962366
Validation - i: 851  Accuracy: 0.61962366
Validation - i: 901  Accuracy: 0.61962366
Validation - i: 951  Accuracy: 0.61962366
Validation - i: 1001  Accuracy: 0.61962366
Validation - i: 1051  Accuracy: 0.61962366
Validation - i: 1101  Accuracy: 0.61962366
Validation - i: 1151  Accuracy: 0.

## Validation

We validate the model with the data it has not seen yet:

In [19]:
# with session:
#     # Validate the model with unseen data
#     acc_val = session.run([accuracy], feed_dict={x: X_valid, y: Y_valid})

#     # Print test metrics
#     print('Accuracy:', acc_val)

## Exporting & Importing

In [20]:
save_path = '{}.ckpt'.format(os.path.join(__MODEL_PATH, model_fn.__name__, model_fn.__name__))

model_to_load = model_5_2

load_path = '{}.ckpt'.format(os.path.join(__MODEL_PATH, model_to_load.__name__, model_to_load.__name__))

We save the model that worked best:

In [21]:
# with session:
#     tf.train.Saver().save(session, save_path)

We load the model that worked best:

In [22]:
with tf.Session() as saved_session:
    tf.train.Saver().restore(saved_session, load_path)

    # Validate the model with unseen data
    acc_val = saved_session.run([accuracy], feed_dict={x: X_valid, y: Y_valid})

    # Print test metrics
    print('Accuracy:', acc_val)

INFO:tensorflow:Restoring parameters from models/model_5_2/model_5_2.ckpt


InvalidArgumentError: Restoring from checkpoint failed. This is most likely due to a mismatch between the current graph and the graph from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Assign requires shapes of both tensors to match. lhs shape= [128,7] rhs shape= [600,300]
	 [[node save/Assign_13 (defined at <ipython-input-22-cc336575bd0f>:2)  = Assign[T=DT_FLOAT, _class=["loc:@dense_1/kernel"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](dense_1/kernel/Adam_1, save/RestoreV2:13)]]

Caused by op 'save/Assign_13', defined at:
  File "/anaconda3/envs/MLBD/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/anaconda3/envs/MLBD/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/anaconda3/envs/MLBD/lib/python3.6/asyncio/base_events.py", line 427, in run_forever
    self._run_once()
  File "/anaconda3/envs/MLBD/lib/python3.6/asyncio/base_events.py", line 1440, in _run_once
    handle._run()
  File "/anaconda3/envs/MLBD/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 370, in dispatch_queue
    yield self.process_one()
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tornado/gen.py", line 346, in wrapper
    runner = Runner(result, future, yielded)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tornado/gen.py", line 1080, in __init__
    self.run()
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-22-cc336575bd0f>", line 2, in <module>
    tf.train.Saver().restore(saved_session, load_path)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1102, in __init__
    self.build()
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1114, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1151, in _build
    build_save=build_save, build_restore=build_restore)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 795, in _build_internal
    restore_sequentially, reshape)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 428, in _AddRestoreOps
    assign_ops.append(saveable.restore(saveable_tensors, shapes))
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 119, in restore
    self.op.get_shape().is_fully_defined())
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tensorflow/python/ops/state_ops.py", line 221, in assign
    validate_shape=validate_shape)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tensorflow/python/ops/gen_state_ops.py", line 61, in assign
    use_locking=use_locking, name=name)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/anaconda3/envs/MLBD/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): Restoring from checkpoint failed. This is most likely due to a mismatch between the current graph and the graph from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Assign requires shapes of both tensors to match. lhs shape= [128,7] rhs shape= [600,300]
	 [[node save/Assign_13 (defined at <ipython-input-22-cc336575bd0f>:2)  = Assign[T=DT_FLOAT, _class=["loc:@dense_1/kernel"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](dense_1/kernel/Adam_1, save/RestoreV2:13)]]


## Conclusion