In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from __future__ import print_function, absolute_import, division

In [3]:
import numpy as np
from wgomoku import GomokuBoard, GomokuTools as gt, NH9x9, Heuristics

In [4]:
def num_offensive(o, d):
    s, l, offset = gt.mask2(o, d)
    m2o_bits = gt.as_bit_array(s)[:l]
    max_count = 0
    for w in [2,1,0]:
        i = 0
        while i <= len(m2o_bits) - 2 - w:
            count = sum(m2o_bits[i:i+w+2])
            count = 3*count - (w+2)
            if count > max_count:
                max_count = count
            i+=1
    if m2o_bits[0] == 0:
        max_count += 1.5
    if m2o_bits[-1] == 0:
        max_count += 1.5

    return max_count        


In [5]:
def line_score(xo):
    o,d = gt.line_for_xo(xo)
    m = gt.mask(o,d)
    m2 = gt.mask2(o,d)
    if m2[1] >= 4 and sum(gt.as_bit_array(m2[0])) >= 1:
        return num_offensive(o,d) - 2
    else:
        return 0

In [6]:
for i in [
    '...ox...',
    '...x..xo',
    '.....x..',
    '...x.xo.',
    '...x..x.',
    '...xxo..',
    'xx......',
    '...x.x..',
    '..oxx.x.',
    '...xx...',
    '.xx.....',
    '...xxxo.',
    '...xx.x.',
    '...xxx..',
    '.oxxxx..',
    '..xxxx..',
]: 
    print(i, line_score(i))

...ox... 0.5
...x..xo 1.5
.....x.. 2.0
...x.xo. 2.5
...x..x. 3.0
...xxo.. 3.5
xx...... 3.5
...x.x.. 4.0
..oxx.x. 4.5
...xx... 5.0
.xx..... 5.0
...xxxo. 5.5
...xx.x. 6.0
...xxx.. 7.0
.oxxxx.. 7.5
..xxxx.. 9.0


In [7]:
kappa=3.0
def nhcombine(l):
    l_ = sorted(l)
    
    if l_[-1]>7:
        return 8 # Done
        
    if l_[-1]==7 or (l_[-1] in [4.5,5.5,6,6.5,7.0] and l_[-2] >= 4):
        return 7 # truly strong
    
    if l_[-1] in [4,5] and l_[-2] in [4,5]:
        return 6.9 # can only be countered by strong counter-attack
    
    return (l_[-1]**kappa + l_[-2]**kappa)**(1/kappa)

In [8]:
values = np.arange(20)/2
values

array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. ,
       6.5, 7. , 7.5, 8. , 8.5, 9. , 9.5])

In [9]:
precomputed = np.zeros([160000])
for e in values:
    for ne in values:
        for n in values:
            for nw in values:
                v = nhcombine([e, ne, n, nw])
                precomputed[int(2*(8000*e+400*ne+20*n+nw))]=v

In [10]:
line_counts=9-(np.sqrt(np.random.randint(0,18**2, [5, 5, 4]))).astype(int)/2

In [11]:
e,ne,n,nw = np.rollaxis(line_counts, 2, 0)
indices = (2*(8000*e+400*ne+20*n+nw)).astype(int)
scores = precomputed[indices]

In [12]:
for i in range(5):
    for j in range(5):
        print(line_counts[i][j], scores[i][j])

[1.5 1.  3.5 2. ] 3.7053975277103093
[2. 4. 2. 3.] 4.497941445275415
[4.5 0.5 4.5 6.5] 7.0
[3.  4.5 8.  3.5] 8.0
[8.  1.  2.5 1.5] 8.0
[1.5 2.5 4.5 6.5] 7.0
[1.  2.5 0.5 3.5] 3.881968038328153
[1.  0.5 1.  5.5] 5.510997279969543
[2.5 2.  0.5 3. ] 3.4931840139090524
[1.5 1.5 3.  4.5] 4.906599465282884
[2.  1.  1.  1.5] 2.2489707226377074
[4.5 4.5 0.5 1. ] 7.0
[1.  2.5 1.  0.5] 2.5522343610007314
[7.  2.5 3.  1. ] 7.0
[4.  3.5 0.5 5.5] 7.0
[5.  4.  6.5 2. ] 7.0
[8.  6.5 2.5 2. ] 8.0
[1.  3.  0.5 4. ] 4.497941445275415
[4.  3.5 0.5 3.5] 4.745609979014665
[2.  6.  2.5 1.5] 6.141321179758671
[2.5 3.5 7.  2.5] 7.0
[0.5 3.  3.5 0.5] 4.118830692140462
[1.5 3.5 1.  0.5] 3.589527176034159
[3.  1.  1.5 4.5] 4.906599465282884
[1.5 4.  7.5 6. ] 8.0


In [13]:
nhcombine([6.5, 0.,  2.5, 3.5 ])

6.822044948812934

Now, a classical strategy could be: (A:attack, D:defend)
- Win with level A7+ position
- if not possible, ALWAYS defend single level D7+ position
- Give up if more than 1 D7+ position exist. 

- ALWAYS attack at level A7 position
- if not possible, ALWAYS defend level D7 position

- ALWAYS tree search level A6.9 for a strong defense
    - if not a strong defense, attack at that position
    - if strong defense possible, consider deeper tree search or alternatives
- ALWAYS tree search level D6.9 for a strong counter-attack
    - if no strong counter-attack possible, defend that position
    - if strong counter-attack possible, consider deeper tree search or alternatives
- Choose greedy (when fighting) or e-greedy when learning
    - allow policies with more agressive or more defensive style
