# FFNet, permuted (Hartford et al 2016)

## Start session

In [201]:
import tensorflow as tf
import numpy as np
import pandas as pd
sess = tf.InteractiveSession()

## Import data and augment data

In [202]:
import_csv = pd.read_csv("games3x3.csv")
p = [[[1,0,0],[0,1,0],[0,0,1]],[[1,0,0],[0,0,1],[0,1,0]],
     [[0,1,0],[1,0,0],[0,0,1]],[[0,1,0],[0,0,1],[1,0,0]],
     [[0,0,1],[1,0,0],[0,1,0]],[[0,0,1],[0,1,0],[1,0,0]]]
inputs_set = np.zeros((import_csv.shape[0]*6,18))
target_set = np.zeros((import_csv.shape[0]*6,3))
for i in range(import_csv.shape[0]*6)[::6]:
    a = np.matrix(import_csv['matrix'][i/6])
    for j in range(6):
        b = a*p[j]
        c = np.array(b).flatten()
        d = np.transpose(a)
        e = d*p[j]
        f = np.array(e).flatten()
        inputs_set[i+j] = np.concatenate((c, f), axis=0)
        target_set[i+j] = np.matrix(import_csv['choice'][i/6])

for k in range(1,6):
    inputs_tmp = np.zeros((import_csv.shape[0]*6,18))
    target_tmp = np.zeros((import_csv.shape[0]*6,3))
    for i in range(import_csv.shape[0]*6)[::6]:
        a = p[k]*np.matrix(import_csv['matrix'][i/6])
        for j in range(6):
            b = a*p[j]
            c = np.array(b).flatten()
            d = np.transpose(a)
            e = d*p[j]
            f = np.array(e).flatten()
            inputs_tmp[i+j] = np.concatenate((c, f), axis=0)
            target_tmp[i+j] = np.transpose(p[k]*np.transpose(np.matrix(import_csv['choice'][i/6])))
    inputs_set = np.concatenate((inputs_set, inputs_tmp), axis=0)
    target_set = np.concatenate((target_set, target_tmp), axis=0)

## Define train and test sets

In [203]:
shuffle_index_set = np.random.permutation(range(inputs_set.shape[0]))

In [204]:
train_index_set = shuffle_index_set[0:inputs_set.shape[0]/5*4] 
tests_index_set = shuffle_index_set[inputs_set.shape[0]/5*4:inputs_set.shape[0]]
train_inputs_set = inputs_set[train_index_set]
train_target_set = target_set[train_index_set]
tests_inputs_set = inputs_set[tests_index_set]
tests_target_set = target_set[tests_index_set]

## Define input and target (placeholders)

In [205]:
x = tf.placeholder(tf.float32, shape=[None, 18])
y_ = tf.placeholder(tf.float32, shape=[None, 3])

## Define weight and bias (variables)

In [206]:
W = tf.Variable(tf.zeros([18,3]))
b = tf.Variable(tf.zeros([3]))

## Define model and loss function

In [207]:
y = tf.matmul(x,W) + b
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))

## Train model

In [211]:
train_step = tf.train.AdamOptimizer(0.0002,0.9,0.999,1e-8).minimize(cross_entropy)
sess.run(tf.global_variables_initializer())
for i in range(1000):
    shuffle_index_batch = np.random.permutation(range(train_inputs_set.shape[0]))[0:100]
    train_inputs_batch = train_inputs_set[shuffle_index_batch]
    train_target_batch = train_target_set[shuffle_index_batch]
    #train_step.run(feed_dict={x: train_inputs_batch, y_: train_target_batch})
    [_, loss] = sess.run([train_step, cross_entropy],
                           feed_dict={x: train_inputs_batch, y_: train_target_batch})
    print '%s' % loss

1.10126
1.09371
1.09995
1.09369
1.07379
1.0668
1.06704
1.07076
1.0635
1.05724
1.03837
1.03871
1.04228
1.03848
1.03977
1.02808
1.02514
1.02926
1.01942
1.00697
1.008
1.00377
1.00179
0.982283
0.97992
0.996025
0.978395
0.998739
0.985235
0.975499
0.944244
0.939109
0.981937
0.960682
0.955339
0.954569
0.953129
0.952639
0.945779
0.914002
0.955773
0.95645
0.926338
0.934347
0.950837
0.935618
0.926658
0.939741
0.93833
0.9181
0.899226
0.912282
0.924619
0.903442
0.906803
0.938262
0.909907
0.89272
0.895593
0.918997
0.8669
0.889423
0.89826
0.901968
0.906204
0.917779
0.892425
0.872496
0.909394
0.875749
0.903951
0.892026
0.913966
0.865258
0.853791
0.905345
0.895644
0.878826
0.893254
0.869507
0.868579
0.901104
0.889457
0.846039
0.879296
0.86343
0.885167
0.901845
0.892622
0.879104
0.890792
0.912466
0.882461
0.871492
0.893098
0.812447
0.8915
0.864104
0.908953
0.863084
0.859179
0.875073
0.867726
0.865565
0.834553
0.85315
0.869432
0.889569
0.907622
0.879745
0.859532
0.848479
0.881553
0.839738
0.889658
0.891

## Evaluate performance

In [212]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval(feed_dict={x: tests_inputs_set, y_: tests_target_set}))


0.731707


# GameNet (Hartford et al 2016)

## Build model

### Input

The input is $ 2 $ matricies  
where one is the row player payoff matrix  
where the other is the column player payoff matrix  

### Hidden layer 1

The first hidden layer is $ 50 $ matricies in the form  

$$ H_{1,k} = relu(w_i U_i + b) $$  
where $ i $ ranges over the set {1, ... 6}  
where $ k $ ranges over the set {1, ... 50}  
where $ U_1 $ is the row player payoff matrix  
where $ U_2 $ is the row player payoff matrix after column-wise pooling  
where $ U_3 $ is the row player payoff matrix after row-wise pooling  
where $ U_4 $ is the column player payoff matrix  
where $ U_5 $ is the column player payoff matrix after column-wise pooling  
where $ U_6 $ is the column player payoff matrix after row-wise pooling  

### Hidden layer 2

The second hidden feature layer is $ 50 $ matricies in the form  

$$ H_{2,k} = relu(w_i H_i + b) $$  
where $ i $ ranges over the set {1, ... 150}  
where $ k $ ranges over the set {1, ... 50}  
where $ H_1 $ is $ H_{1,1} $  
where $ H_2 $ is $ H_{1,1} $ after column-wise pooling  
where $ H_3 $ is $ H_{1,1} $ after row-wise pooling  
where $ H_4 $ is $ H_{1,2} $    
...  
where $ H_{150} $ is $ H_{1,50} $ after row-wise pooling

### Action response layer 0

The zeroth action response layer is a mixture of distributions in the form  

$$ R_0 = w_i f_i $$  
where $ i $ ranges over the set {1, ... 50}  
where $ f_i = softmax(H_i z) $  
where $ H_1 $ is $ H_{2,1} $  
...  
where $ H_{50} $ is $ H_{2,50} $  
where $ z $ is a column vector of ones

### Action response layer 1

The first action response layer is a mixture of distributions in the form  

$$ R_1 = softmax(\lambda(v_j ((w_i H_i) \cdot R^c_0))) $$  
where $ i $ ranges over the set {1, ... 50}  
where $ j $ ranges over the set {0, ... 0}  
where $ H_1 $ is $ H_{2,1} $  
...  
where $ H_{50} $ is $ H_{2,50} $  
where $ R^c_0 $ is the column player zeroth action response layer  

### Output

The output is a mixture of distributions in the form  

$$ y = w_i R_i $$  
where $ i $ ranges over the set {0, ... 1}  

In [None]:
# input
x = tf.placeholder(tf.float32, [None, None])
# weights
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
# model
y = tf.nn.softmax(tf.matmul(x, W) + b)

## Add optimizer

- Adam
    - initial learning rate = $ 2e-4 $
    - $ \beta_1 = 0.9 $
    - $ \beta_2 = 0.999 $
    - $ \epsilon = 1e-8 $
- Dropout
    - drop probability = $ 0.2 $
    - $ L_1 $ regularization parameter = $ 0.01 $
- $ 25000 $ epochs
- Projected gradient algorithm (modified SGD)

In [26]:
# dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)



# cost function (here defined by cross-entropy)
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
# optimization algorithm
train_step = tf.train.AdamOptimizer(2e-4, 0.9, 0.999, 1e-8).minimize(cross_entropy)
# initialize variables
init = tf.global_variables_initializer()
# launch model in session
sess = tf.Session()
sess.run(init)
# run training step x times
for i in range(1000):
  batch_xs, batch_ys = mnist.train.next_batch(100)
  sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## Evaluate performance

- 10-fold-cross-validation

In [None]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# accuracy?
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))