# FFNet (Hartford et al 2016)

## Start session

In [147]:
import tensorflow as tf
import numpy as np
import pandas as pd
sess = tf.InteractiveSession()

## Import data

In [277]:
import_csv = pd.read_csv("games3x3ffnet.csv")
inputs_set = np.zeros((import_csv.shape[0],9))
target_set = np.zeros((import_csv.shape[0],3))
for i in range(import_csv.shape[0]):
    inputs_set[i] = [import_csv['matrix1'][i],import_csv['matrix2'][i],import_csv['matrix3'][i],import_csv['matrix4'][i],import_csv['matrix5'][i],import_csv['matrix6'][i],import_csv['matrix7'][i],import_csv['matrix8'][i],import_csv['matrix9'][i]]
    target_set[i] = [import_csv['choice1'][i],import_csv['choice2'][i],import_csv['choice3'][i]]

## Define train and test sets

In [278]:
shuffle_index_set = np.random.permutation(range(inputs_set.shape[0]))

In [292]:
train_index_set = shuffle_index[0:55] 
tests_index_set = shuffle_index[55:68]
train_inputs_set = inputs_set[train_index_set]
train_target_set = target_set[train_index_set]
tests_inputs_set = inputs_set[tests_index_set]
tests_target_set = target_set[tests_index_set]

## Define input and target (placeholders)

In [143]:
x = tf.placeholder(tf.float32, shape=[None, 9])
y_ = tf.placeholder(tf.float32, shape=[None, 3])

## Define weight and bias (variables)

In [284]:
W = tf.Variable(tf.zeros([9,3]))
b = tf.Variable(tf.zeros([3]))
sess.run(tf.global_variables_initializer())

## Define model and loss function

In [145]:
y = tf.matmul(x,W) + b
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))

## Train model

In [285]:
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
for i in range(100):
    shuffle_index_batch = np.random.permutation(range(train_inputs_set.shape[0]))[0:10]
    train_inputs_batch = train_inputs_set[shuffle_index_batch]
    train_target_batch = train_target_set[shuffle_index_batch]
    train_step.run(feed_dict={x: train_inputs_batch, y_: train_target_batch})

## Evaluate performance

In [293]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval(feed_dict={x: tests_inputs_set, y_: tests_target_set}))

0.615385


# GameNet (Hartford et al 2016)

## Build model

### Input

The input is $ 2 $ matricies  
where one is the row player payoff matrix  
where the other is the column player payoff matrix  

### Hidden layer 1

The first hidden layer is $ 50 $ matricies in the form  

$$ H_{1,k} = relu(w_i U_i + b) $$  
where $ i $ ranges over the set {1, ... 6}  
where $ k $ ranges over the set {1, ... 50}  
where $ U_1 $ is the row player payoff matrix  
where $ U_2 $ is the row player payoff matrix after column-wise pooling  
where $ U_3 $ is the row player payoff matrix after row-wise pooling  
where $ U_4 $ is the column player payoff matrix  
where $ U_5 $ is the column player payoff matrix after column-wise pooling  
where $ U_6 $ is the column player payoff matrix after row-wise pooling  

### Hidden layer 2

The second hidden feature layer is $ 50 $ matricies in the form  

$$ H_{2,k} = relu(w_i H_i + b) $$  
where $ i $ ranges over the set {1, ... 150}  
where $ k $ ranges over the set {1, ... 50}  
where $ H_1 $ is $ H_{1,1} $  
where $ H_2 $ is $ H_{1,1} $ after column-wise pooling  
where $ H_3 $ is $ H_{1,1} $ after row-wise pooling  
where $ H_4 $ is $ H_{1,2} $    
...  
where $ H_{150} $ is $ H_{1,50} $ after row-wise pooling

### Action response layer 0

The zeroth action response layer is a mixture of distributions in the form  

$$ R_0 = w_i f_i $$  
where $ i $ ranges over the set {1, ... 50}  
where $ f_i = softmax(H_i z) $  
where $ H_1 $ is $ H_{2,1} $  
...  
where $ H_{50} $ is $ H_{2,50} $  
where $ z $ is a column vector of ones

### Action response layer 1

The first action response layer is a mixture of distributions in the form  

$$ R_1 = softmax(\lambda(v_j ((w_i H_i) \cdot R^c_0))) $$  
where $ i $ ranges over the set {1, ... 50}  
where $ j $ ranges over the set {0, ... 0}  
where $ H_1 $ is $ H_{2,1} $  
...  
where $ H_{50} $ is $ H_{2,50} $  
where $ R^c_0 $ is the column player zeroth action response layer  

### Output

The output is a mixture of distributions in the form  

$$ y = w_i R_i $$  
where $ i $ ranges over the set {0, ... 1}  

In [None]:
# input
x = tf.placeholder(tf.float32, [None, None])
# weights
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
# model
y = tf.nn.softmax(tf.matmul(x, W) + b)

## Add optimizer

- Adam
    - initial learning rate = $ 2e-4 $
    - $ \beta_1 = 0.9 $
    - $ \beta_2 = 0.999 $
    - $ \epsilon = 1e-8 $
- Dropout
    - drop probability = $ 0.2 $
    - $ L_1 $ regularization parameter = $ 0.01 $
- $ 25000 $ epochs
- Projected gradient algorithm (modified SGD)

In [26]:
# dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)



# cost function (here defined by cross-entropy)
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
# optimization algorithm
train_step = tf.train.AdamOptimizer(2e-4, 0.9, 0.999, 1e-8).minimize(cross_entropy)
# initialize variables
init = tf.global_variables_initializer()
# launch model in session
sess = tf.Session()
sess.run(init)
# run training step x times
for i in range(1000):
  batch_xs, batch_ys = mnist.train.next_batch(100)
  sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## Evaluate performance

- 10-fold-cross-validation

In [None]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# accuracy?
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))