In [1]:
import os
import json

import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [14]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [15]:
class NeuralNetwork():
    def __init__(self):
        self.optimizer = tf.optimizers.SGD(
            learning_rate=0.001, momentum=0.8
        )
        self.loss = 'mean_absolute_error'
        self.model = None
        self.results = ""
        # self.metrics= ['accuracy']

        # self.load_or_create()
        self.define()
        # self.model.compile(optimizer=self.optimizer, loss=self.loss)
        
    def define(self):
        dropout = 0.3
        num_channels = 512
        input_layer= tf.keras.Input(shape=(12, 8, 8))       
        x_image = tf.keras.layers.Reshape((12, 8, 8, 1))(input_layer)                # batch_size  x board_x x board_y x 1
        h_conv1 = tf.keras.layers.Activation('relu')(tf.keras.layers.BatchNormalization(axis=3)(tf.keras.layers.Conv2D(num_channels, 3, padding='same', use_bias=False)(x_image)))         # batch_size  x board_x x board_y x num_channels
        h_conv2 = tf.keras.layers.Activation('relu')(tf.keras.layers.BatchNormalization(axis=3)(tf.keras.layers.Conv2D(num_channels, 3, padding='same', use_bias=False)(h_conv1)))         # batch_size  x board_x x board_y x num_channels
        h_conv3 = tf.keras.layers.Activation('relu')(tf.keras.layers.BatchNormalization(axis=3)(tf.keras.layers.Conv2D(num_channels, 3, padding='valid', use_bias=False)(h_conv2)))        # batch_size  x (board_x-2) x (board_y-2) x num_channels
        h_conv4 = tf.keras.layers.Activation('relu')(tf.keras.layers.BatchNormalization(axis=3)(tf.keras.layers.Conv2D(num_channels, 3, padding='valid', use_bias=False)(h_conv3)))        # batch_size  x (board_x-4) x (board_y-4) x num_channels
        h_conv4_flat = tf.keras.layers.Flatten()(h_conv4)       
        s_fc1 = tf.keras.layers.Dropout(dropout)(tf.keras.layers.Activation('relu')(tf.keras.layers.BatchNormalization(axis=1)(tf.keras.layers.Dense(1024, use_bias=False)(h_conv4_flat))))  # batch_size x 1024
        s_fc2 = tf.keras.layers.Dropout(dropout)(tf.keras.layers.Activation('relu')(tf.keras.layers.BatchNormalization(axis=1)(tf.keras.layers.Dense(512, use_bias=False)(s_fc1))))          # batch_size x 1024
        self.pi = tf.keras.layers.Dense(64*64, activation='softmax', name='pi')(s_fc2)   # batch_size x self.action_size
        self.v = tf.keras.layers.Dense(1, activation='tanh', name='v')(s_fc2)                    # batch_size x 1

        self.model = tf.keras.Model(inputs=input_layer, outputs=[self.pi, self.v])
        self.model.compile(loss=['categorical_crossentropy','mean_squared_error'], optimizer=self.optimizer)

    def load_or_create(self):
        path = f"models/{self.name}/architecutre.json"
        if not os.path.exists(path):
            self.define()
            return
        
        with open(f"{path}") as file:
            config = json.load(file)
            self.model = tf.keras.Model.from_config(config)
            
    
    def save_architecutre(self):
        path = f"models/{self.name}"
        if not os.path.exists(path):
            os.mkdir(path)
        
        with open(f"{path}/architecutre.json", 'w') as file:
            json.dump(self.model.get_config(), file, indent=4)
        
        with open(f"{path}/summary.txt", 'w') as file:
            def print_to_file(s):
                print()
            self.model.summary(print_fn=lambda x: print(x, file=file))
    
    def save_results(self):
        path = f"models/{self.name}"
        if not os.path.exists(path):
            os.mkdir(path)
        
        with open(f"{path}/results.txt", 'w') as file:
            file.write(self.results)
    
    def predict(self, board):
        board = board[np.newaxis, :, :]
        # run
        pi, v = self.model.predict(board, verbose=False)

        return pi[0], v[0]
    
    def train(self, examples):
        input_boards, target_pis, target_vs = list(zip(*examples))
        input_boards = np.asarray(input_boards)
        target_pis = np.asarray(target_pis)
        target_vs = np.asarray(target_vs)
        self.model.fit(x = input_boards, y = [target_pis, target_vs], batch_size = 64, epochs = 10)


In [None]:
from game import MCTS

def policyIterSP(game):
    nnet = NeuralNetwork()                                       # initialise random neural network
    examples = []    
    for i in range(10):
        for e in range(10):
            examples += executeEpisode(game, nnet)          # collect examples from this game
        new_nnet = nnet.train(examples)                  
        # frac_win = pit(new_nnet, nnet)                      # compare new net with previous net
        # if frac_win > threshold: 
            # nnet = new_nnet                                 # replace with new net            
    return nnet

def assignRewards(examples, )

def executeEpisode(game, nnet):
    examples = []
    s = game.startState()
    mcts = MCTS()                                           # initialise search tree
        
    while True:
        for _ in range(10):
            mcts.search(s, game, nnet)
        examples.append([s, mcts.pi(s), None])              # rewards can not be determined yet 
        a = np.random.choice(len(mcts.pi(s)), p=mcts.pi(s))    # sample action from improved policy
        s = game.nextState(s,a)
        if game.gameEnded(s):
            examples = assignRewards(examples, game.gameReward(s)) 
            return examples
