In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

In [None]:
MAX_ROUNDS = 69
BASE_BOARD_S = 'xxxxx____ooooo'

class Alak:
  def __init__(self, my_token, opp_token, home_team=True, verbose=False, file_name=None, pred_func=lambda x : np.full((x.shape[0],1), .5)):
    self.player = Alak.Player(my_token, opp_token)
    self.home_team = home_team
    self.verbose = verbose
    self.board_s = BASE_BOARD_S
    self.round_ct = 0
    self.pred_func = pred_func

    self.record_results = file_name != None
    self.file_name = file_name
    self.round_data = np.empty((0, len(BASE_BOARD_S)), dtype=np.int8)
    self.round_scores = np.empty((0), dtype=np.float32)


  # handle switching from string to embedded np.array(ints)
  class Player:
    def __init__(self, my_token, opp_token):
      self.my_token = my_token
      self.opp_token = opp_token

    def embed(self, board_in):
      embedded = []
      for c in board_in:
        if c == self.my_token:
          embedded.append(1)
        elif c == '_':
          embedded.append(0)
        else:
          embedded.append(-1)
      return np.array(embedded)

    def unembed(self, board_in):
      unembed = ""
      for c in board_in:
        if c == 1:
          unembed += self.my_token
        elif c == 0:
          unembed += '_'
        else:
          unembed += self.opp_token
      return unembed

  # check for kills, attacker first
  def eval_board(self, board_in, is_my_move):
    board = self.player.embed(board_in)
    if not is_my_move:
      board = board * -1
    self.eval(board)
    board = board * -1
    self.eval(board)
    if is_my_move:
      board = board * -1
    return self.player.unembed(board)

  # recursive check for kills
  def eval(self, board):
    kill_start = -1
    i = 0
    while i < len(board):
      if board[i] == 1:
        if kill_start >= 0 and kill_start <= i - 2:
          for j in range(kill_start+1, i):
            board[j] = 0
        kill_start = i
      elif board[i] == 0:
        kill_start = -1
      i += 1

  # apply a move like [ 0, 11 ] to the board state
  def move_s(self, move_tuple):
    board = list(self.board_s)
    t = board[move_tuple[0]]
    board[move_tuple[0]] = '_'
    board[move_tuple[1]] = t
    return ''.join(board)

  # aggregate all possible moves player can make from current board state
  def legal_moves(self):
    board = self.player.embed(self.board_s)
    ones = np.argwhere(board == 1)
    spaces = np.repeat(np.argwhere(board == 0).T, ones.shape[0], axis=0)
    ones = np.repeat(ones, spaces.shape[1])
    return np.column_stack((ones, spaces.ravel()))

  # aggregate and eval kills on all possible boards using all legal moves
  def legal_boards(self, move_tuples):
    boards = np.tile(self.player.embed(self.board_s), (move_tuples.shape[0], 1))
    for i in range(move_tuples.shape[0]):
      t = boards[i, move_tuples[i, 0]]
      boards[i, move_tuples[i, 0]] = 0
      boards[i, move_tuples[i, 1]] = t
    return boards

  # calc highest winrate board from all legal boards
  def choose_move(self, boards):
    wrs = self.pred_func(boards)
    max_wrs = np.argwhere(wrs == np.max(wrs)).ravel()
    return np.random.choice(max_wrs)

  # verbose move print helper
  def encode_move(self, move):
    a = hex(move[0])[2:]
    b = hex(move[1])[2:]
    return '[ {:s} to {:s} ]'.format(a, b)

  # calc moves, boards, win probabilities, make a move, clean board, check if game over
  def make_move(self):
    moves = self.legal_moves()
    boards = self.legal_boards(moves)
    move = self.choose_move(boards)
    if self.verbose:
      old_board = self.board_s
      self.board_s = self.eval_board(self.move_s(moves[move]), is_my_move=True)
      print('{:s} round {:<3d}:: move {:s}\n  {:s}  ->  {:s}\n'.format(self.player.my_token, self.round_ct, self.encode_move(moves[move]), old_board, self.board_s))
    else:
      self.board_s = self.eval_board(self.move_s(moves[move]), is_my_move=True)
    if self.record_results:
      self.round_data = np.vstack((self.round_data, self.player.embed(self.board_s)))
    over = self.check_game()
    self.round_ct += 1
    if over >= 0:
      return self.game_over(over)
    else:
      return self.board_s

  # clean and apply board string input
  def opps_move_s(self, board_in):
    self.board_s = self.eval_board(board_in, is_my_move=False)
    over = self.check_game()
    if over >= 0:
      self.game_over(over)

  # clean and apply move pair input
  def opps_move_a(self, move):
    self.move_s(move)
    self.board_s = self.eval_board(self.board_s, is_my_move=False)
    over = self.check_game()
    if over >= 0:
      self.game_over(over)

  # check for win con or draw
  def check_game(self):
    if self.round_ct >= MAX_ROUNDS:
      self.round_ct = MAX_ROUNDS
      return 0
    elif self.board_s.count(self.player.my_token) < 2:
      return 0
    elif self.board_s.count(self.player.opp_token) < 2:
      return 1
    return -1

  # end game, update dataset
  def game_over(self, winner):
    if winner >= 0:
      if winner == 1:
        s = 'You won\n{:s}'.format(self.board_s)
      else:
        s = 'Opponent won\n{:s}'.format(self.board_s)
      if self.record_results:
        if winner > 0:
          self.round_scores = np.hstack((self.round_scores, np.full((self.round_ct), score_curve(self.round_ct), dtype=np.float32)))
        else:
          self.round_scores = np.hstack((self.round_scores, np.zeros((self.round_ct), dtype=np.float32)))
      self.reset()
      return s

  # reset state
  def reset(self):
    self.board_s = BASE_BOARD_S
    self.round_ct = 0

  # write datasets to files
  def dump(self):
    if not self.record_results:
      return
    print('{:s} won %{:2f} of matches'.format(self.player.my_token, ((self.round_scores > 0).sum() / self.round_scores.shape[0])))
    np.save(self.file_name, self.round_data)
    np.save('{:s}_y'.format(self.file_name), np.atleast_2d(self.round_scores).T)

# adjust score data based on how long the match took (longer -> lower score)
def score_curve(round_ct):
  if round_ct <= 5:
    return 1
  return 1 - ((round_ct-5) / (MAX_ROUNDS-5))

In [None]:
class DataGen:
  def __init__(self, x_fn='alak_x', o_fn='alax_o', verbose=False, x_pred_func=lambda x: np.full((x.shape[0],1),.5), o_pred_func=lambda x: np.full((x.shape[0],1),.5)):
    self.alak_x = Alak('x', 'o', home_team=True, file_name=x_fn, verbose=verbose, pred_func=x_pred_func)
    self.alak_o = Alak('o', 'x', home_team=False, file_name=o_fn, verbose=verbose, pred_func=o_pred_func)
    self.verbose = verbose

  def gen(self, reps):
    for r in range(reps):
      # if not self.verbose:
      #   if r%500==0:
      #     print()
      #   if r%100==0:
      #     print('rep #', r, end=', ')
      # else:
      #   print('rep #', r)
      print('rep #', r)
      self.play_to_bust(r%2==0)
    print()
    self.alak_x.dump()
    self.alak_o.dump()

  def play_to_bust(self, even):
    alaks = [self.alak_x, self.alak_o] if even else [self.alak_o, self.alak_x]
    alaks[0].reset()
    alaks[1].reset()
    i = 0
    while True:
      s = alaks[i%2].make_move()
      i += 1
      if len(s) > 14:
        alaks[i%2].opps_move_s(s.split("\n")[1])
        return
      else:
        alaks[i%2].opps_move_s(s)

In [None]:
g_full_random = DataGen()
g_full_random.gen(1000)


rep # 0, rep # 100, rep # 200, rep # 300, rep # 400, 
rep # 500, rep # 600, rep # 700, rep # 800, rep # 900, x won %0.581559 of matches
o won %0.349181 of matches


In [None]:
learning_rate = 0.001
layer_height = 32
training_epochs = 64
batch_size = 32

In [None]:
# X_stack = np.concatenate((np.load('alak_x.npy'), np.load('alak_o.npy')), axis=0, dtype=np.float32)
# y_stack = np.concatenate((np.load('alak_x_y.npy'), np.load('alak_o_y.npy')), axis=0, dtype=np.float32)

# X_stack = np.concatenate((np.load('smart_x.npy'), np.load('random_o.npy')), axis=0, dtype=np.float32)
# y_stack = np.concatenate((np.load('smart_x_y.npy'), np.load('random_o_y.npy')), axis=0, dtype=np.float32)

# X_stack = np.concatenate((np.load('duo_x.npy'), np.load('random_o.npy'), np.load('smart_x2.npy'), np.load('random_o2.npy')), axis=0, dtype=np.float32)
# y_stack = np.concatenate((np.load('smart_x_y.npy'), np.load('random_o_y.npy'), np.load('smart_x2_y.npy'), np.load('random_o2_y.npy')), axis=0, dtype=np.float32)

# X_stack = np.concatenate((np.load('duo_x.npy'), np.load('duo_o.npy')), axis=0, dtype=np.float32)
# y_stack = np.concatenate((np.load('duo_x_y.npy'), np.load('duo_o_y.npy')), axis=0, dtype=np.float32)
######################################
# X_stack = np.concatenate((np.load('alak_x.npy'), np.load('alak_o.npy')), axis=0, dtype=np.float32)
# y_stack = np.concatenate((np.load('alak_x_y.npy'), np.load('alak_o_y.npy')), axis=0, dtype=np.float32)

X_stack = np.concatenate((np.load('random_x.npy'), np.load('smart_o.npy')), axis=0, dtype=np.float32)
y_stack = np.concatenate((np.load('random_x_y.npy'), np.load('smart_o_y.npy')), axis=0, dtype=np.float32)


In [None]:
brk = int(len(X_stack)*.7)
X_train, y_train, X_test, y_test = X_stack[:brk], y_stack[:brk], X_stack[:brk], y_stack[:brk]

train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(buffer_size=1000).batch(batch_size)
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size)

In [None]:
def create_model():
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(layer_height, input_shape=[14], activation='relu'))
    model.add(tf.keras.layers.Dense(layer_height, input_shape=[layer_height], activation='relu'))
    model.add(tf.keras.layers.Dense(1, input_shape=[layer_height]))
    model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(learning_rate))
    return model

model = create_model()
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 32)                480       
                                                                 
 dense_1 (Dense)             (None, 32)                1056      
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 1,569
Trainable params: 1,569
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.load_weights('drive/MyDrive/alak/weights')
# model.load_weights('final_o')

In [None]:
history = model.fit(X_train, y_train, epochs=training_epochs)

In [None]:
model.save_weights('final_x')

In [None]:
y_pred = model.predict(X_test[0:1])
print(y_pred[0,0])

0.47229603


In [None]:
pred_dif = np.abs(y_pred-y_test)
print(pred_dif)

[[0.34020397]
 [0.34020397]
 [0.34020397]
 ...
 [0.527704  ]
 [0.527704  ]
 [0.527704  ]]


In [None]:
# wr alak_x alak_o -> smart_x random_o :: x .88 o .12
# wr smart_x random_o -> smart_x random_o :: x .71 o .29
# wr smart_x random_o stack -> smart_x random_o :: x .87 o .13
# wr smart_x random_o stack -> duo :: x .75 o .25
#####################
# wr alak_x alak_o -> random_x smart_o :: x .51 o .49
# wr random_x smart_o -> random_x2 smart_o2 :: x .51 o .49

In [None]:
g_nn_training = DataGen(x_fn='random_x2', o_fn='smart_o2', o_pred_func=lambda x: model.predict(x))
g_nn_training.gen(1000)

In [None]:
g  = Alak('x', 'o', verbose=True, pred_func=lambda x: model.predict(x))
# g.board_s = '_xxxx_x_o_oooo'
g.make_move()
g.make_move()
print(g.board_s)

x round 0  :: move [ 1 to 0 ]
  _xxxx_x_o_oooo  ->  x_xxx_x_o_oooo

x_xxx_x_o_oooo
