# FFNet, permuted (Hartford et al 2016)

## Still to do
- Implement L1 regularization
- Implement projected gradient descent
- Achieve satisfactory performance on model

## Session, start

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
sess = tf.InteractiveSession()

## Data, import and augment

In [3]:
import_csv = pd.read_csv("games3x3.csv")
p = [[[1,0,0],[0,1,0],[0,0,1]],[[1,0,0],[0,0,1],[0,1,0]],
     [[0,1,0],[1,0,0],[0,0,1]],[[0,1,0],[0,0,1],[1,0,0]],
     [[0,0,1],[1,0,0],[0,1,0]],[[0,0,1],[0,1,0],[1,0,0]]]
inputs_set = np.zeros((import_csv.shape[0]*6,18))
target_set = np.zeros((import_csv.shape[0]*6,3))
for i in range(import_csv.shape[0]*6)[::6]:
    a = np.matrix(import_csv['matrix'][i/6])
    for j in range(6):
        b = a*p[j]
        c = np.array(b).flatten()
        d = np.transpose(a)
        e = d*p[j]
        f = np.array(e).flatten()
        inputs_set[i+j] = np.concatenate((c, f), axis=0)
        target_set[i+j] = np.matrix(import_csv['choice'][i/6])

for k in range(1,6):
    inputs_tmp = np.zeros((import_csv.shape[0]*6,18))
    target_tmp = np.zeros((import_csv.shape[0]*6,3))
    for i in range(import_csv.shape[0]*6)[::6]:
        a = p[k]*np.matrix(import_csv['matrix'][i/6])
        for j in range(6):
            b = a*p[j]
            c = np.array(b).flatten()
            d = np.transpose(a)
            e = d*p[j]
            f = np.array(e).flatten()
            inputs_tmp[i+j] = np.concatenate((c, f), axis=0)
            target_tmp[i+j] = np.transpose(p[k]*np.transpose(np.matrix(import_csv['choice'][i/6])))
    inputs_set = np.concatenate((inputs_set, inputs_tmp), axis=0)
    target_set = np.concatenate((target_set, target_tmp), axis=0)

## Data, define training and test sets

In [4]:
shuffle_index_set = np.random.permutation(range(inputs_set.shape[0]))

In [5]:
train_index_set = shuffle_index_set[0:inputs_set.shape[0]/5*4] 
tests_index_set = shuffle_index_set[inputs_set.shape[0]/5*4:inputs_set.shape[0]]
train_inputs_set = inputs_set[train_index_set]
train_target_set = target_set[train_index_set]
tests_inputs_set = inputs_set[tests_index_set]
tests_target_set = target_set[tests_index_set]

## Model, define input and target layer

In [6]:
x = tf.placeholder(tf.float32, shape=[None, 18])
y_ = tf.placeholder(tf.float32, shape=[None, 3])

## Model, initialize weights and bias

In [7]:
def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

## Model, define layers

In [8]:
W_1 = weight_variable([18, 12])
b_1 = bias_variable([12])
h_1 = tf.nn.relu(tf.matmul(x, W_1) + b_1)

In [10]:
W_2 = weight_variable([12, 6])
b_2 = bias_variable([6])
h_2 = tf.nn.relu(tf.matmul(h_1, W_2) + b_2)

In [11]:
keep_prob = tf.placeholder(tf.float32)
h_2_drop = tf.nn.dropout(h_2, keep_prob)

In [13]:
W_3 = weight_variable([6, 3])
b_3 = bias_variable([3])
y = tf.matmul(h_2_drop, W_3) + b_3

## Model, define cost and optimizer, then train

In [213]:
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
train_step = tf.train.AdamOptimizer(0.0002,0.9,0.999,1e-8).minimize(cross_entropy)
sess.run(tf.global_variables_initializer())
for i in range(1000):
    shuffle_index_batch = np.random.permutation(range(train_inputs_set.shape[0]))[0:100]
    train_inputs_batch = train_inputs_set[shuffle_index_batch]
    train_target_batch = train_target_set[shuffle_index_batch]
    #train_step.run(feed_dict={x: train_inputs_batch, y_: train_target_batch})
    [_, loss] = sess.run([train_step, cross_entropy],
                           feed_dict={x: train_inputs_batch, y_: train_target_batch})
    #print '%s' % loss
loss

0.85262561

## Model, evaluate performance

In [214]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval(feed_dict={x: tests_inputs_set, y_: tests_target_set}))

0.731707


# GameNet (Hartford et al 2016)

## Still to do
- Write model after FFNet performance satisfactory

## Model, define layers

### Input

The input is $ 2 $ matricies  
where one is the row player payoff matrix  
where the other is the column player payoff matrix  

### Hidden layer 1

The first hidden layer is $ 50 $ matricies in the form  

$$ H_{1,k} = relu(w_i U_i + b) $$  
where $ i $ ranges over the set {1, ... 6}  
where $ k $ ranges over the set {1, ... 50}  
where $ U_1 $ is the row player payoff matrix  
where $ U_2 $ is the row player payoff matrix after column-wise pooling  
where $ U_3 $ is the row player payoff matrix after row-wise pooling  
where $ U_4 $ is the column player payoff matrix  
where $ U_5 $ is the column player payoff matrix after column-wise pooling  
where $ U_6 $ is the column player payoff matrix after row-wise pooling  

### Hidden layer 2

The second hidden feature layer is $ 50 $ matricies in the form  

$$ H_{2,k} = relu(w_i H_i + b) $$  
where $ i $ ranges over the set {1, ... 150}  
where $ k $ ranges over the set {1, ... 50}  
where $ H_1 $ is $ H_{1,1} $  
where $ H_2 $ is $ H_{1,1} $ after column-wise pooling  
where $ H_3 $ is $ H_{1,1} $ after row-wise pooling  
where $ H_4 $ is $ H_{1,2} $    
...  
where $ H_{150} $ is $ H_{1,50} $ after row-wise pooling

### Action response layer 0

The zeroth action response layer is a mixture of distributions in the form  

$$ R_0 = w_i f_i $$  
where $ i $ ranges over the set {1, ... 50}  
where $ f_i = softmax(H_i z) $  
where $ H_1 $ is $ H_{2,1} $  
...  
where $ H_{50} $ is $ H_{2,50} $  
where $ z $ is a column vector of ones

### Action response layer 1

The first action response layer is a mixture of distributions in the form  

$$ R_1 = softmax(\lambda(v_j ((w_i H_i) \cdot R^c_0))) $$  
where $ i $ ranges over the set {1, ... 50}  
where $ j $ ranges over the set {0, ... 0}  
where $ H_1 $ is $ H_{2,1} $  
...  
where $ H_{50} $ is $ H_{2,50} $  
where $ R^c_0 $ is the column player zeroth action response layer  

### Output

The output is a mixture of distributions in the form  

$$ y = w_i R_i $$  
where $ i $ ranges over the set {0, ... 1}  

## Model, define optimizer

- Adam optimizer
    - initial learning rate = $ 0.0002 $
    - $ \beta_1 = 0.9 $
    - $ \beta_2 = 0.999 $
    - $ \epsilon = 1e-8 $
- Dropout
    - drop probability = $ 0.2 $
- Other regularization
    - $ L_1 $ regularization parameter = $ 0.01 $
- Training steps
    - $ 25000 $ epochs
- Gradient
    - Projected gradient descent

## Model, evaluate performance

- 10-fold-cross-validation