In [1]:
from mock5 import Mock5
from mock5.analysis import Analysis as M5Analysis
import mock5.agent_random as m5rand
import mock5.agent_analysis_based as m5aa
import mock5.agent_ad as m5ad
import mock5.agent_pt as m5pt
import mock5.agent_df as m5df

import matplotlib.pyplot as plt

import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import random

import os
import time

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: {}".format(device))

torch.backends.cudnn.benchmark = True
torch.autograd.set_detect_anomaly(False)

print("cudnn: {} (det {}; bench {})".format(
    torch.backends.cudnn.enabled,
    torch.backends.cudnn.deterministic,
    torch.backends.cudnn.benchmark))
print("OpenMP: {}".format(torch.backends.openmp.is_available()))

Device: cuda
cudnn: True (det False; bench True)
OpenMP: True


In [2]:
kibolist = torch.load("21_kibo.1")

In [3]:
kibolist

[[tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [2]:
loss_list = []

In [3]:
# Board Size
W = 15
H = W

In [4]:
def fn_name(fn):
  if hasattr(fn, 'name'): return fn.name
  else: return repr(fn)
  
def agent(pi, epsilon=0):
  # pi must return array of non-negative values
  def c(game):
    w, h = game.width, game.height
    m, p = np.ones(h * w), np.array(pi(game))
    for i in range(h * w):
      if game.board[i] != 0: m[i], p[i] = 0, 0
    s = p.sum()
    if np.random.uniform() < epsilon or s == 0:
      s = m.sum()
      if s == 0: return None, None # Cannot do anything
      else: idx = np.random.choice(h * w, p=(m / s))
    else: idx = np.random.choice(h * w, p=(p / s))
    return idx // w, idx % w
  c.name = 'stochastic({})'.format(fn_name(pi))
  return c

def softmax(arr, tau=1.0):
  arr = np.array(arr, dtype=np.float64)
  arr /= tau
  m = max(arr)
  z = np.exp(arr - m)
  return z / z.sum()

def pt_softmax(policy, tau=1.0):
  def p(game):
    p = policy(game)
    return softmax(p, tau=tau)
  p.name = 'pt_softmax({},tau={})'.format(fn_name(policy), tau)
  return p

def pt_norm(policy):
  def p(game):
    p = policy(game)
    return p / p.max()
  p.name = 'pt_norm({})'.format(fn_name(policy))
  return p

def policy_uniform(game):
  return np.ones(game.height * game.width)
policy_uniform.name = 'uniform'

def agent_mixed(game):
  a = np.random.uniform()
  if a < 0.4: return m5aa.agent(game)
  elif a < 0.8: return m5ad.agent(game)
  elif a < 0.9: return m5pt.agent(game)
  else: return m5df.agent(game)
agent_mixed.name = 'agent-mixed-analysis-based'

In [5]:
class Flatten(nn.Module):
  def forward(self, x):
    if len(x.shape) == 3: return x.view(-1)
    else: return x.flatten(1, -1)

class Block2(nn.Module):
  def __init__(self, ch, int_ch, ker):
    super().__init__()
    self.seq = nn.Sequential(
      nn.Conv2d(ch, int_ch, ker, padding='same'),
      nn.BatchNorm2d(int_ch),
      nn.Sigmoid(),
      nn.Conv2d(int_ch, ch, ker, padding='same'),
      nn.BatchNorm2d(ch))
  def forward(self, x):
    y_0 = self.seq(x)
    inp = x + y_0
    return torch.sigmoid(inp)

class Block1(nn.Module):
  def __init__(self, ch, int_ch, ker):
    super().__init__()
    self.seq = nn.Sequential(
      nn.Conv2d(ch, int_ch, ker, padding='same'),
      nn.GELU(),
      nn.Conv2d(int_ch, ch, ker, padding='same'))
  def forward(self, x):
    y_0 = self.seq(x)
    return nn.functional.gelu(x + y_0)

class Value(nn.Module):
  def __init__(self):
    super().__init__()
    self.seq = nn.Sequential(
      nn.Conv2d(2, 128, 3, padding='same'),
      nn.BatchNorm2d(128),
      nn.Sigmoid(),
      Block2(128, 128, 3),
      Block2(128, 128, 3),
      Block2(128, 128, 3),
      nn.Conv2d(128, 1, 1, padding='same'),
      nn.BatchNorm2d(1),
      nn.Sigmoid(),
      # Flatten
      Flatten(),
      # Winrate
      nn.Linear(H*W,1))
      
  def forward(self, x):
    return self.seq(x)

class Policy(nn.Module):
  def __init__(self):
    super().__init__()
    self.seq = nn.Sequential(
      nn.Conv2d(2, 128, 3, padding='same'),
      nn.GELU(),
      Block1(128, 128, 3),
      Block1(128, 128, 3),
      Block1(128, 128, 3),
      nn.Conv2d(128, 1, 5, padding='same'),
      nn.GELU(),
      # Flatten
      Flatten(),
      # Softmax
      nn.LogSoftmax(dim=-1))

  def forward(self, x):
    return self.seq(x)

In [6]:
def game_to_tensor(game):
  t = torch.zeros(2, game.height * game.width, dtype=torch.float)
  for i in range(game.height * game.width):
    b = game.board[i]
    if b == 1: t[0][i] = 1.0
    elif b == 2: t[1][i] = 1.0
  return t.view(2, game.height, game.width)

In [7]:
def policy_model(net):
  def c(game):
    X = game_to_tensor(game).to(device)
    with torch.no_grad():
      p = torch.exp(net(X))
    arr = p.squeeze().to('cpu').numpy()
    return arr
  c.name = 'model({:x})'.format(id(net))
  return c

In [8]:
def append_game_replay(Xs, Vs, nsample, game, result):
  # Append to Batch
  exch = game.exchanged
  if result == 1:
    v = 1 # 흑 승리시 1
  elif result == 2:
    v = 0 # 백 승리시 0
  else: v = 0.5 # 무승부
  if exch:
    v = 1-v
  
  X = []
  V = []
  for f in range(2):
    for r in range(4):
      g = game.replay(angle=r, flip=f)
      while len(g.history) > 0:
        g.undo()
        X.append(game_to_tensor(g))
        V.append(v)
  if len(X) < nsample:
    Xs += X
    Vs += V
  else:
    Xs += random.sample(X, nsample)
    Vs += random.sample(V, nsample)

In [9]:
def gen_episode_by_play(
    agent1,
    agent2
):
  def generator(Xs, Vs, nsample):
    # Run Game
    game = Mock5(H, W)
    result = game.play(agent1, agent2,
      print_intermediate_state=False, print_messages=False)
    # Make reward
    append_game_replay(Xs, Vs, nsample, game, result)
    return 1
  return generator

In [10]:
def tensor_to_game(X):
  board = []
  for i in range(H):
    for j in range(W):
      if(X[0][i][j] == 1):
        board.append(1)
      elif(X[1][i][j] == 1):
        board.append(2)
      else:
        board.append(0)
  return Mock5(H,W,board=board)

In [11]:
import threading
class Epi_generator(threading.Thread):
    def __init__(self, func, Xs, Vs, gamma):
        super().__init__()
        self.func = func
        self.Xs = Xs
        self.Vs = Vs
        self.gamma = gamma
        self.cnt == 0

    def run(self):
        while len(self.Xs) < 50 and cnt < 100000:
            self.func(self.Xs,self.Vs,self.gamma)
            cnt += 1

In [12]:
def learn(
    opt,
    n_step,
    gen_episode, #: take (Xs, Vs, nsample)
    n_epoch,
    nsample,
    batch_size,
    interval_stat,
    filename
):
  Xs, Vs = [], []
  step = 0
  global loss_list, loss_10
  loss_10 = 0
  cnt = 0
  while step < n_step:
    # Generate episode
    gen_episode(Xs, Vs, nsample)
    # If batch is full enough, perform gradient ascent
    # Print status and evaluate
    if step % interval_stat == 0:
      def save(Xs, Vs, f):
        if not os.path.exists(f):
          torch.save([Xs,Vs],f)
        while True:
          try:
            [Xo,Vo] = torch.load(f)
            break
          except:
            pass
        torch.save([Xo+Xs,Vo+Vs],f)
        Xs, Vs = [], []
      save(Xs, Vs, filename)
      print("Step #", step)
  
  plt.plot(100 * np.arange(0,len(loss_list)),np.array(loss_list))
  plt.show()

In [13]:
import os

global policy

def run():
  filename = "21_kibo.1"
  global value, policy

  policy = Policy().to(device)
  f = torch.load('19_weight_10hr')["network state dict"]
  policy.load_state_dict(f)
                    
  opt = None

  agent1 = agent(policy_model(policy))
  agent2 = agent(policy_model(policy))
  
  
  learn(
      opt = opt,
      n_step = 10000,
      gen_episode = gen_episode_by_play(agent1, agent2),
      n_epoch = 1,
      nsample = 4,
      batch_size = 256,
      interval_stat = 16,
      filename = filename)
run()

Step # 0
Step # 0
Step # 0
Step # 0
Step # 0
Step # 0
Step # 0
Step # 0
Step # 0
Step # 0
Step # 0
Step # 0
Step # 0
Step # 0
Step # 0
Step # 0


In [None]:
plt.plot(100 * np.arange(0,len(loss_list)),np.array(loss_list))
plt.show()