# Deep Reinforcement Learning using AlphaZero methodology

Please see https://applied-data.science/blog/how-to-build-your-own-alphazero-ai-using-python-and-keras/ for further notes on the codebase

## 1. First load the core libraries

In [20]:
# -*- coding: utf-8 -*-
# %matplotlib inline
%load_ext autoreload
%autoreload 2

import numpy as np
np.set_printoptions(suppress=True)


from game import Game, GameState
from memory import Memory
from model import Residual_CNN
from funcs import playMatches, playMatchesBetweenVersions
from agent import Agent

import initialise
import pickle
import config

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 2. Now run this block to start the learning process

This block loops for ever, continually learning from new game data.

The current best model and memories are saved in the run folder so you can kill the process and restart from the last checkpoint.

In [14]:

env = Game()

current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS,0)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) +  env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS,1)


best_player_version  = initialise.INITIAL_MODEL_VERSION
print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version)
current_NN.model.set_weights(m_tmp.get_weights())
best_NN.model.set_weights(m_tmp.get_weights())



LOADING MODEL VERSION None...


## The following panels are not involved in the learning process

### Play matches between versions (use -1 for human player)

In [21]:
best_player = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, best_NN)

In [26]:
gs = GameState(np.array([
    0,0,0,
    0,-1,0,
    0,0,0
]), 1)

preds = best_player.get_preds(gs)

print(preds)

[[[[0 0 0]
   [0 0 0]
   [0 0 0]]

  [[0 0 0]
   [0 1 0]
   [0 0 0]]]]
(array([-0.6298428], dtype=float32), array([0.13412867, 0.11138948, 0.15018022, 0.10972662, 0.        ,
       0.11218587, 0.13815972, 0.10855111, 0.13567828], dtype=float32), [0, 1, 2, 3, 5, 6, 7, 8])


### Pass a particular game state through the neural network (setup below for Connect4)

### See the layers of the current neural network

In [7]:
current_player.model.viewLayers()

LAYER 0


<Figure size 216x0 with 0 Axes>

LAYER 1


KeyboardInterrupt: 

### Output a diagram of the neural network architecture

In [6]:
from keras.utils import plot_model
plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True)