In [1]:
import numpy as np
import theano
import theano.tensor as T

import lasagne
floatX = theano.config.floatX
floatX

Using gpu device 0: GeForce GTX 965M (CNMeM is disabled)


'float32'

In [2]:
from IPython.display import HTML, display

In [3]:
%load_ext Cython

In [4]:
%%cython
# cython: infer_types=True, annotation_typing=True
## cython: infer_types.verbose=True 
from IPython.display import HTML, display
import numpy as np

floatX = np.float32

binary6 = np.array([ list(map(int,bin(2**6+i)[:2:-1])) for i in range(2**6)], dtype=floatX)
height = np.array([-1]*65, dtype=np.int32)
for __i in range(6):
    height[2**__i]=__i

cdef class Connect4:
    cdef public:
        long turn
        long long[2] data
    cpdef long get_col_row(self, col: long, row: long):
        pos = col * 7 + row
        mask = (<long long>1) << pos 
        if self.data[1] & mask:
            return 2
        return bool(self.data[0] & mask)
    
    cpdef long is_end(self):
        cdef long long mask
        bitboard = self.data[1-self.turn%2]
        bound = (<long long>1)<<48 # 49 = 7*(6+1)  
        # horizontal: 0x204081 = 1|(1<<7)|(1<<14)|(1<<21)
        # vertical: 0xf = 1|(1<<1)|(1<<2)|(1<<3)
        # up-right: 0x1010101 = 1|(1<<8)|(1<<16)|(1<<24)
        # down-right: 0x208208 = (1<<3)|(1<<9)|(1<<15)|(1<<21)
        for mask in [0x204081, 0xf,  0x1010101, 0x208208]:
            while mask < bound:
                if mask & bitboard == mask:
                    return True
                mask <<= 1
        return False
    
    cpdef set_col_row(self, col:long, row:long, value:long):
        # assert value in [0,1,2]
        pos = col * 7 + row
        mask = (<long long>1) << pos
        neg_mask = ~mask       
        if value == 1 or value ==2:
            self.data[value-1] |= mask
            self.data[2-value] &= neg_mask
        else:
            self.data[0] &= neg_mask
            self.data[1] &= neg_mask
            
    def __init__(self, data=None, turn=0):
        if data is not None:
            self.data = data[:]
        else:
            self.data = [0, 0]
        self.turn = turn
        
    cpdef remove(self, col:long):
        shift = col*7
        mask = (((self.data[0]|self.data[1]) >> shift) &0x3f) +1
        mask = (mask >> 1) << shift
        # print(shift, hex(mask), hex(self.data[0]), hex(self.data[1]))
        neg_mask = ~mask
        self.data[0] &= neg_mask
        self.data[1] &= neg_mask
    
    def _np_branch(self):
        c = self.turn%2 # who's turn
        base = np.zeros((2,7,6), dtype=floatX)
        pos = []
        moves = []
        red, yellow = self.data
        for i in range(7):
            mask = ((red|yellow) &0x3f) + 1
            p = height[mask]
            if p != -1:
                moves.append(i)
                pos.append(height[mask])
            base[c, i] = binary6[red&0x3f]
            base[1-c, i] = binary6[yellow&0x3f]
            red >>= 7
            yellow >>= 7
        boards = np.zeros( (len(moves), 2, 7, 6), dtype=floatX)
        for i in range(len(moves)):
            m = moves[i]
            p = pos[i]
            boards[i]=base
            boards[i, 0, m, p] = 1
        return moves, boards
            
        
    cpdef move(self, col:long, test=False):
        # assert 0<= col <7
        shift = col*7
        mask = (((self.data[0]|self.data[1]) >> shift) &0x3f) +1
        # print("mask=", mask)
        if mask >= 64:
            return None
        if not test:
            self.data[self.turn%2] |= (mask<<shift)
            self.turn += 1
        return self
    
    def board_data(self):
        for i in range(7):
            for j in range(6):
                c = self.get_col_row(i,j)
                if c!=0:
                    yield i,j,c
                    
    def _repr_html_(self):
        def pos(i):
            return int(7+(220-6.5)*i/8)
        imgstr = "<img src='img/%s.png' width='23px' height='23px' style='position: absolute; top: %spx; left: %spx;margin-top: 0;z-index: %d' />"
        header = """<div style="width: 200px; height:180px;position: relative;background: blue">"""
        header += "\n".join(imgstr%('empty', pos(5-j), pos(i), 0) for i in range(7) for j in range(6))
        return header +"\n".join(imgstr%('red_coin' if c==1 else 'yellow_coin', pos(5-j), pos(i), 2) for (i,j,c) in self.board_data()) +"</div>"
    
    def display(self):
        display(HTML(self._repr_html_()))
    
    def __repr__(self):
        row_str = lambda j: "".join(".ox"[self.get_col_row(i,j)] for i in range(7))
        return "\n".join(row_str(j) for j in range(5,-1,-1))

from random import randint
def random_play(init_data=None, init_turn=0, display=False):
    game = Connect4(init_data, init_turn)
    while game.turn < 42 and not game.is_end():
        while game.move(randint(0,6)) is None:
            continue
    if display:
        game.display()
    if game.is_end():
        return game.turn
    return 0

  warn("get_ipython_cache_dir has moved to the IPython.paths module")


In [5]:
def MC_agent(_game, N=500):
    score = [-1.0*N]*7
    for i in range(7):
        game = Connect4(_game.data, _game.turn)
        if game.move(i):
            if game.is_end():
                return i
            s = 0
            for j in range(N):
                #print("move", i, "case", j)
                r = random_play(game.data, game.turn)
                turn = (r-1)%2
                if r == 0:
                    pass
                elif  (r-1)%2 == _game.turn%2:
                    s += 0.95** (r-_game.turn-1)
                else:
                    s -= .95** (r-_game.turn-1)
            score[i] = s/N
    return max(zip(score, range(7)))[1]

In [6]:
def random_vs_MC(init_data=None, init_turn=0, display=False):
    game = Connect4(init_data, init_turn)
    while game.turn < 42 and not game.is_end():
        if game.turn%2 == 0:
            while game.move(randint(0,6)) is None:
                continue
        else:
            i = MC_agent(game)
            game.move(i)
        if display == 'all':
            game.display()
    if display:
        game.display()
    if game.is_end():
        return game.turn
    return 0

In [7]:
input_var = T.tensor4('inputs')
target_var = T.vector('targets')
l_in = lasagne.layers.InputLayer(shape=(None, 2, 7, 6), input_var=input_var)
#l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)
l_hidden = lasagne.layers.DenseLayer(l_in, num_units=400, nonlinearity=lasagne.nonlinearities.tanh, W=lasagne.init.GlorotUniform())
#l_hidden_drop = lasagne.layers.DropoutLayer(l_hidden, p=0.5)
l_out = lasagne.layers.DenseLayer(l_hidden, num_units=1, nonlinearity=lasagne.nonlinearities.tanh, W=lasagne.init.GlorotUniform())

prediction = lasagne.layers.get_output(l_out).flatten()
V = theano.function([input_var], prediction)
#loss = lasagne.objectives.binary_crossentropy(prediction, target_var)
loss = lasagne.objectives.squared_error(prediction, target_var)

loss = loss.mean()

params = lasagne.layers.get_all_params(l_out, trainable=True)
#updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9)
updates = lasagne.updates.adam(loss, params)
train_fn = theano.function([input_var, target_var], loss, updates=updates)
test_prediction = lasagne.layers.get_output(l_out, deterministic=True).flatten()
prediction_fn = theano.function([input_var], T.gt(test_prediction, 0.))

In [8]:
from random import random, randint


In [9]:
def random_vs_NN(init_data=None, init_turn=0, display=False, random_side=0):
    game = Connect4(init_data, init_turn)
    while game.turn < 42 and not game.is_end():
        if game.turn%2 == random_side:
            while game.move(randint(0,6)) is None:
                continue
        else:
            moves, boards = game._np_branch()
            idx = np.argmax(V(boards))
            game.move(moves[idx])
        if display == 'all':
            game.display()
    if display:
        game.display()
    if game.is_end():
        return game.turn
    return 0

In [12]:
def test_random(random_side, ngames=1000):
    result = [0,0,0]
    for i in range(ngames):
        r = random_vs_NN(random_side=random_side)
        if r == 0:
            result[0]+=1
        else:
            result[1 + (r-1)%2]+=1
    return result

    

In [11]:
%%timeit -n 1 -r 1
def run_game(V, verbose = False, epsilon=0.1, lmbd=0.95):
    game = Connect4()
    history=[]
    runtime_V=[]
    while game.turn < 42 and not game.is_end():
        s = 1 if game.turn%2 == 0 else -1
        moves, boards = game._np_branch()
        #print(boards)
        if random() < epsilon:
            idx = randint(0, len(moves)-1)
            values=[None]*len(moves)
        else:
            values = V(boards)
            idx = np.argmax(values)
        m = moves[idx]
        game.move(m)
        history.append(boards[idx])
        runtime_V.append(values[idx])
    if game.is_end():
        result = 1.
    else:
        result = 0. # Tie
    # train here
    #game.display()
    estimate_V = np.zeros(len(history), dtype=floatX)
    r = result
    for i in range(len(history)-1, -1, -1):
        estimate_V[i]=r
        r *= -lmbd
    loss = train_fn(np.array(history, dtype=floatX), estimate_V)
    #print("loss", loss)
    #print("estimate_V", estimate_V)
    #print("runtime_V", runtime_V)
    return loss
total_loss = 0
N = 3000
for i in range(60000):
    total_loss += run_game(V)
    if i%N==N-1:
        print("game #%d avgloss=%f, rand vs nn: %s, nn vs rand %s"%(i+1, total_loss/N, test_random(0), test_random(1)) )
        total_loss = 0
print("final result: rand vs nn: %s, nn vs rand %s"%(test_random(0, 10000), test_random(1, 10000)) )

game #3000 avgloss=0.473511, rand vs nn: [1, 179, 820], nn vs rand [0, 911, 89]
game #6000 avgloss=0.447365, rand vs nn: [0, 139, 861], nn vs rand [0, 906, 94]
game #9000 avgloss=0.430961, rand vs nn: [0, 178, 822], nn vs rand [1, 875, 124]
game #12000 avgloss=0.413101, rand vs nn: [1, 168, 831], nn vs rand [0, 884, 116]
game #15000 avgloss=0.415886, rand vs nn: [0, 140, 860], nn vs rand [0, 927, 73]
game #18000 avgloss=0.400588, rand vs nn: [0, 193, 807], nn vs rand [0, 879, 121]
game #21000 avgloss=0.355718, rand vs nn: [0, 163, 837], nn vs rand [0, 924, 76]
game #24000 avgloss=0.413263, rand vs nn: [0, 140, 860], nn vs rand [0, 923, 77]
game #27000 avgloss=0.394454, rand vs nn: [0, 200, 800], nn vs rand [0, 902, 98]
game #30000 avgloss=0.387902, rand vs nn: [0, 134, 866], nn vs rand [0, 923, 77]
game #33000 avgloss=0.407715, rand vs nn: [0, 158, 842], nn vs rand [1, 935, 64]
game #36000 avgloss=0.388903, rand vs nn: [0, 177, 823], nn vs rand [0, 867, 133]
game #39000 avgloss=0.40679

In [13]:
print("final result: rand vs nn: %s, nn vs rand %s"%(test_random(0, 10000), test_random(1, 10000)) )

final result: rand vs nn: [1, 1233, 8766], nn vs rand [0, 9173, 827]
