# FFNet (Hartford et al 2016)

## Session

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from __future__ import division
sess = tf.InteractiveSession()

## Data

In [2]:
import_csv = pd.read_csv('gamesmxn.csv')
inputs_csv = np.zeros((import_csv.shape[0],18))
target_csv = np.zeros((import_csv.shape[0],3))
for i in range(import_csv.shape[0]):
    if import_csv['shape'][i] == '3 3' and import_csv['symmetric'][i] == 1:
        Ur = np.matrix(import_csv['matrixrow'][i])
        Ur = (Ur-np.mean(Ur))/np.std(Ur)
        Ur_vector = np.array(Ur).flatten()
        Uc = np.transpose(Ur)
        Uc_vector = np.array(Uc).flatten()
        inputs_csv[i] = np.concatenate((Ur_vector, Uc_vector), axis=0)
        ar = np.matrix(import_csv['choicerow'][i])
        if ar.shape[1] == 2:
            ar = ar[:,0]/ar[:,1]
        ar_vector = ar.flatten()
        target_csv[i] = ar_vector
inputs_set = inputs_csv[~np.all(inputs_csv == 0, axis=1)]
target_set = target_csv[~np.all(inputs_csv == 0, axis=1)]

### Data augmentation

In [3]:
p = [[[1,0,0],[0,1,0],[0,0,1]],[[1,0,0],[0,0,1],[0,1,0]],
     [[0,1,0],[1,0,0],[0,0,1]],[[0,1,0],[0,0,1],[1,0,0]],
     [[0,0,1],[1,0,0],[0,1,0]],[[0,0,1],[0,1,0],[1,0,0]]]

In [4]:
inputs_aug = np.zeros((inputs_set.shape[0]*6*6,18))
target_aug = np.zeros((inputs_set.shape[0]*6*6,3))
for k in range(6):
    for i in range(inputs_set.shape[0]*6)[::6]:
        Ur = p[k]*np.matrix(inputs_set[i//6,0:9]).reshape(3,3)
        Ur = (Ur-np.mean(Ur))/np.std(Ur)
        for j in range(6):
            Ur = Ur*p[j]
            Ur_vector = np.array(Ur).flatten()
            Uc = np.transpose(Ur)
            Uc = Uc*p[j]
            Uc_vector = np.array(Uc).flatten()
            inputs_aug[k*inputs_set.shape[0]*6+i+j] = np.concatenate((Ur_vector, Uc_vector), axis=0)
            ar = np.matrix(target_set[i//6]).reshape(3,1)
            ar = p[k]*ar
            ar_vector = ar.flatten()
            target_aug[k*inputs_set.shape[0]*6+i+j] = ar_vector

### Training and test sets

In [5]:
inputs_train = inputs_aug
target_train = target_aug
inputs_tests = inputs_set
target_tests = target_set

## Model 

### Inputs and target

In [6]:
x = tf.placeholder(tf.float32, shape=[None, 18])
y_ = tf.placeholder(tf.float32, shape=[None, 3])

### Weights and biases

In [7]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

### Fully-connected layers

In [8]:
hi = 50

In [9]:
W_1 = weight_variable([18, hi])
b_1 = bias_variable([hi])
h_1 = tf.nn.relu(tf.matmul(x, W_1) + b_1)

In [10]:
W_2 = weight_variable([hi, hi])
b_2 = bias_variable([hi])
h_2 = tf.nn.relu(tf.matmul(h_1, W_2) + b_2)

### Dropout
- Drop probability = 0.2

In [11]:
keep_prob = tf.placeholder(tf.float32)
h_2_drop = tf.nn.dropout(h_2, keep_prob)

### Readout

In [12]:
W_3 = weight_variable([hi, 3])
b_3 = bias_variable([3])
y = tf.nn.softmax(tf.matmul(h_2_drop, W_3) + b_3)

### Cost function

#### $L_2$ regularization 
- $ \beta = 0.01 $

In [13]:
beta = 0.01
regularizer = tf.nn.l2_loss(W_1) + tf.nn.l2_loss(W_2) + tf.nn.l2_loss(W_3)

#### Cross-entropy

In [14]:
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y) + beta * regularizer, reduction_indices=[1]))

### Optimization

#### Adam
- initial learning rate = $ 0.0002 $
- $ \beta_1 = 0.9 $
- $ \beta_2 = 0.999 $
- $ \epsilon = 1e-8 $

In [15]:
train_step = tf.train.AdamOptimizer(0.0002,0.9,0.999,1e-8).minimize(cross_entropy)

## Train

In [16]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.global_variables_initializer())
index_shuffle = np.random.permutation(range(inputs_train.shape[0]))
for i in range(2500):
    if i%6 == 0:
        index_shuffle = np.random.permutation(range(inputs_train.shape[0]))
    inputs_train_batch = inputs_train[index_shuffle[index_shuffle.shape[0]//6*(i%6):index_shuffle.shape[0]//6*(i%6+1)]]
    target_train_batch = target_train[index_shuffle[index_shuffle.shape[0]//6*(i%6):index_shuffle.shape[0]//6*(i%6+1)]]
    if i%100 == 0:
        train_accuracy = accuracy.eval(feed_dict={x: inputs_train_batch, y_: target_train_batch, keep_prob: 1.0})
        train_NLL = cross_entropy.eval(feed_dict={x: inputs_train_batch, y_: target_train_batch, keep_prob: 1.0})
        print("step %d, train accuracy %g, train NLL %g"%(i, train_accuracy, train_NLL))
    train_step.run(feed_dict={x: inputs_train_batch, y_: target_train_batch, keep_prob: 0.8})

step 0, train accuracy 0.265432, train NLL 0.699599
step 100, train accuracy 0.523148, train NLL 0.499286
step 200, train accuracy 0.625, train NLL 0.203482
step 300, train accuracy 0.736111, train NLL -0.16775
step 400, train accuracy 0.768519, train NLL -0.590908
step 500, train accuracy 0.746914, train NLL -1.01584
step 600, train accuracy 0.762346, train NLL -1.49931
step 700, train accuracy 0.799383, train NLL -2.07745
step 800, train accuracy 0.759259, train NLL -2.70878
step 900, train accuracy 0.773148, train NLL -3.44219
step 1000, train accuracy 0.774691, train NLL -4.26238
step 1100, train accuracy 0.782407, train NLL -5.1589
step 1200, train accuracy 0.75463, train NLL -6.12547
step 1300, train accuracy 0.734568, train NLL -7.20263
step 1400, train accuracy 0.785494, train NLL -8.35143
step 1500, train accuracy 0.74537, train NLL -9.57695
step 1600, train accuracy 0.734568, train NLL -10.8969
step 1700, train accuracy 0.768519, train NLL -12.2762
step 1800, train accuracy 0

## Test

In [17]:
test_accuracy = accuracy.eval(feed_dict={x: inputs_tests, y_: target_tests, keep_prob: 1.0})
test_NLL = cross_entropy.eval(feed_dict={x: inputs_tests, y_: target_tests, keep_prob: 1.0})
print("test accuracy %g, test NLL %g"%(test_accuracy, test_NLL))

test accuracy 0.759259, test NLL -25.8901


In [18]:
pd.set_option('display.max_rows', None)
compare = pd.DataFrame(np.concatenate((y.eval(feed_dict={x: inputs_tests, y_: target_tests, keep_prob: 1.0}),
                            y_.eval(feed_dict={x: inputs_tests, y_: target_tests, keep_prob: 1.0})),axis=1))
compare.columns = ["y1","y2","y3","y_1","y_2","y_3"]
compare

Unnamed: 0,y1,y2,y3,y_1,y_2,y_3
0,0.231543,0.205917,0.56254,0.275,0.0,0.725
1,0.392368,0.269764,0.337868,0.65,0.175,0.175
2,0.443386,0.515899,0.040715,0.35,0.65,0.0
3,0.029966,0.591738,0.378297,0.0,0.675,0.325
4,0.456095,0.181239,0.362666,0.45,0.0,0.55
5,0.242409,0.592754,0.164837,0.1,0.875,0.025
6,0.043063,0.40622,0.550717,0.15,0.775,0.075
7,0.266536,0.318728,0.414736,0.2,0.325,0.475
8,0.686134,0.156928,0.156938,0.65,0.0,0.35
9,0.481345,0.007312,0.511343,0.125,0.0,0.875
