In [452]:
from __future__ import absolute_import, division, print_function
from minimax.connect_4 import Connect4
from minimax.connect_4 import YELLOW_PLAYER, RED_PLAYER
import minimax.min_max as min_max
from operator import itemgetter
import random

P1 = YELLOW_PLAYER
p2 = RED_PLAYER


def game(p1, p2):
    board = Connect4()
    i = 0;
    while board.get_winner() is None and len(board.available_moves()) > 0:
        player = p1 if i % 2 == 0 else p2
        move = player.pick_move(board)
        board.play(player.colour, *move)
        i += 1
    return board
    
class MinMaxPlayer():
    def __init__(self, colour, depth=2):
        self.depth = depth
        self.colour = colour
    
    def pick_move(self, board):
        move, _ = min_max.pick_move(board, self.colour, self.depth)
        return move
    

    

In [266]:
import numpy as np

def sigmoid(x):                                        
    return 1 / (1 + np.exp(-x))

class NNEtPlayer():
    def __init__(self, colour, w1=None, w2=None):
        self.n_features = 6*7
        self.n_h1 = 64
        self.classes = 7
        self.colour = colour
        
        self.w1 = w1 if w1 is not None else (np.random.rand(self.n_features + 1, self.n_h1) -0.5)*2
        self.w2 = w2 if w2 is not None else (np.random.rand(self.n_h1 + 1, 7) -0.5)*2
    
    def board_to_vector(self, board):
        board = np.array(board.board).reshape([1, self.n_features])
        vec = np.zeros([1, self.n_features], np.float32)
        vec[np.where(board==1)] = 1
        vec[np.where(board==0)] = -1
        return vec
    
    @staticmethod
    def add_1s(vec):
        return np.c_[np.ones([vec.shape[0],1]), vec]

    def apply_net(self,vec):
        a1 = sigmoid(np.matmul(NNEtPlayer.add_1s(vec), self.w1))
        a2 = np.matmul(NNEtPlayer.add_1s(a1), self.w2)
        return a2
    
    def pick_move(self, board):
        v = self.board_to_vector(board)
        move = np.argmax(self.apply_net(v))
        return (move,)



print(game(MinMaxPlayer(YELLOW_PLAYER,3), NNEtPlayer(RED_PLAYER)))

| | | | |R| | |
| | | | |R| | |
| | | | |Y| | |
| | | | |R| | |
| | | | |R|Y| |
|Y|Y|Y|Y|R|Y|Y|


array([[-0.77237489, -0.44707348, -0.9141783 , -0.15623991],
       [-0.58733523,  0.81122396,  0.73571893,  0.75727924],
       [-0.55072809, -0.21502542,  0.05261472, -0.06413061]])

In [461]:
def full_mutate(p1):
    mw1 = p1.w1 * (0.5 + np.random.rand(*p1.w1.shape))
    mw2 = p1.w2 * (0.5 + np.random.rand(*p1.w2.shape))
    return NNEtPlayer(p1.colour,w1=mw1, w2=mw2)

def small_mut(p, n=1):
    w = p.w1 if np.random.rand() > 0.5 else p.w2
    new_w = np.copy(w)
    for _ in range(n):
        index = (np.random.randint(w.shape[0]), np.random.randint(w.shape[1]))
        new_w[index] = np.random.uniform(-1,-1)
    
    if w is p.w1:
        new_p = NNEtPlayer(RED_PLAYER, w1=new_w, w2=p.w2)
    else:
        new_p = NNEtPlayer(RED_PLAYER, w1=p.w1, w2=new_w)
    
    return new_p


def mutate(p):
    if np.random.rand() < 0.9:
        new_p = small_mut(p, np.random.randint(1,10))
    else:
        new_p = full_mutate(p)
    return new_p


for _ in range(500):
    p = NNEtPlayer(RED_PLAYER)
    m = mutate(p)
    assert p.w1.shape == m.w1.shape
    assert p.w2.shape == m.w2.shape
    
print(len(np.where(p.w1 == m.w1)[0]))
print(len(np.where(p.w2 == m.w2)[0]))




2743
455


In [29]:
def score_game(player, opponent_depth=2):
    player.colour = RED_PLAYER
    opponent = MinMaxPlayer(YELLOW_PLAYER, opponent_depth)
    board = game(opponent, player)
    if board.get_winner == player.colour:
        return 500
    else:
        return 6*7 - np.array(board.board).flatten().tolist().count(None)

def score_n_games(player, opponent_depth=2, n_games=100):
    return sum(score_game(player, opponent_depth) for _ in range(n_games))/n_games
    
    
score_n_games(NNEtPlayer(RED_PLAYER))

9.61

In [30]:
n_players = 20
players = [{'p':NNEtPlayer(RED_PLAYER),'s':0} for _ in range(n_players)]

def keep_top(players,n=5):
    top = sorted(players, key=itemgetter('s'), reverse=True)[:n]
    return top + [{'p':NNEtPlayer(RED_PLAYER),'s':0} for _ in range(n_players - n)]
    

for i in range(100):
    for player in players:
        player['s'] = score_n_games(player['p'], opponent_depth=2, n_games=10)

    print ('\n\nScores: %s' % sorted(p['s'] for p in players))
    players = keep_top(players)

    




Scores: [9.4, 9.7, 9.8, 9.8, 9.9, 10.1, 10.4, 10.4, 10.7, 11.0, 11.1, 11.2, 11.3, 11.9, 11.9, 12.2, 12.3, 12.5, 13.8, 13.8]


Scores: [9.9, 10.0, 10.0, 10.2, 10.5, 11.1, 11.1, 11.2, 11.9, 11.9, 12.0, 12.1, 12.1, 12.6, 13.4, 13.6, 13.6, 13.7, 13.9, 14.0]


Scores: [9.4, 9.5, 9.8, 10.3, 10.4, 11.4, 11.7, 11.9, 12.3, 12.5, 12.8, 12.9, 12.9, 13.2, 13.6, 13.7, 14.5, 14.6, 14.7, 14.9]


Scores: [8.7, 9.6, 10.0, 10.2, 10.3, 10.6, 10.9, 10.9, 10.9, 11.8, 12.2, 12.2, 12.7, 13.3, 13.3, 13.5, 13.6, 14.0, 14.8, 15.6]


Scores: [9.2, 9.5, 10.0, 10.2, 10.4, 10.7, 11.4, 11.4, 12.2, 12.3, 12.4, 12.5, 12.6, 12.7, 12.7, 13.1, 13.2, 13.9, 14.2, 14.2]


Scores: [10.0, 10.1, 10.8, 10.9, 10.9, 11.0, 11.2, 11.7, 11.7, 11.9, 12.0, 12.1, 12.3, 12.8, 13.5, 13.7, 13.9, 13.9, 14.1, 15.5]


Scores: [9.2, 9.2, 9.6, 9.6, 10.1, 10.2, 10.3, 10.4, 10.9, 10.9, 11.0, 11.2, 11.2, 11.9, 12.8, 13.3, 13.7, 14.0, 14.5, 15.8]


Scores: [9.1, 10.0, 10.2, 10.3, 10.5, 10.5, 10.7, 10.8, 10.9, 10.9, 11.2, 11.4, 11.4, 12.3, 12.9, 

In [33]:
top_p = players[:5]
top_p

[{'p': <__main__.NNEtPlayer instance at 0x7f7eca74d4d0>, 's': 16.6},
 {'p': <__main__.NNEtPlayer instance at 0x7f7eca74dbd8>, 's': 15.2},
 {'p': <__main__.NNEtPlayer instance at 0x7f7eca74db00>, 's': 13.6},
 {'p': <__main__.NNEtPlayer instance at 0x7f7eca74d1b8>, 's': 13.1},
 {'p': <__main__.NNEtPlayer instance at 0x7f7eca74d5a8>, 's': 12.7}]

In [467]:
def breed(p1, p2):
    x = np.random.random()
    if True:# x < 0.5:
        return swap_one_w(p1, p2)
    elif x < 0.9:
        return swap_one_n_w(p1, p2)
    else:
        return swap_one_layer(p1, p2)
    

def swap_one_w(p1, p2):
    if np.random.random() > 0.5:
        # layer 1
        p1_w = p1.w1
        p2_w = p2.w1
        index = (np.random.randint(p1_w.shape[0]), np.random.randint(p1_w.shape[1]))
        p1_w_new = np.copy(p1_w)
        p2_w_new = np.copy(p2_w)
        p1_w_new[index] = p2_w[index]
        p2_w_new[index] = p1_w[index]
        return (NNEtPlayer(RED_PLAYER,w1=p1_w_new), NNEtPlayer(RED_PLAYER,w1=p2_w_new))
    else:
        pass
        # layer 2
        p1_w = p1.w2
        p2_w = p2.w2
        index = (np.random.randint(p1_w.shape[0]), np.random.randint(p1_w.shape[1]))
        p1_w_new = np.copy(p1_w)
        p2_w_new = np.copy(p2_w)
        p1_w_new[index] = p2_w[index]
        p2_w_new[index] = p1_w[index]
        return (NNEtPlayer(RED_PLAYER,w1=p1.w1, w2=p1_w_new), 
                NNEtPlayer(RED_PLAYER,w1=p2.w1, w2=p2_w_new))
    

    
def swap_one_n_w(p1, p2):
    if np.random.random() > 0.5:
        # layer 1
        p1_w = p1.w1
        p2_w = p2.w1
        index = np.random.randint(p1_w.shape[1])
        p1_w_new = np.copy(p1_w)
        p2_w_new = np.copy(p2_w)
        p1_w_new[:, index] = p2_w[:, index]
        p2_w_new[:, index] = p1_w[:, index]
        return (NNEtPlayer(RED_PLAYER,w1=p1_w_new, w2=p1.w2), 
                NNEtPlayer(RED_PLAYER,w1=p2_w_new, w2=p2.w2))
    else:
        pass
        # layer 2
        p1_w = p1.w2
        p2_w = p2.w2
        index = np.random.randint(p1_w.shape[1])
        p1_w_new = np.copy(p1_w)
        p2_w_new = np.copy(p2_w)
        p1_w_new[:, index] = p2_w[:, index]
        p2_w_new[:, index] = p1_w[:, index]
        return (NNEtPlayer(RED_PLAYER,w1=p1.w1, w2=p1_w_new), 
                NNEtPlayer(RED_PLAYER,w1=p2.w1, w2=p2_w_new))

dad = top_p[1]['p']
mum = top_p[0]['p']
c1, c2 = swap_one_n_w(dad, mum)

print(len(np.where(c2.w1 == mum.w1)[0]))
print(len(np.where(c2.w2 == mum.w2)[0]))

for _ in range(500):
    p = NNEtPlayer(RED_PLAYER)
    p2 = NNEtPlayer(RED_PLAYER)
    c1, c2 = breed(p, p2)
    assert p.w1.shape == c1.w1.shape
    assert p.w2.shape == c1.w2.shape
    assert p.w1.shape == c2.w1.shape
    assert p.w2.shape == c2.w2.shape

2709
455


In [None]:
def breed_group(parents, times = 30):
    children = []
    for t in range(times):
        mum = mutate(random.choice(parents))
        dad = mutate(random.choice(parents))
        children += breed(mum, dad)
    return children


n_players = 30
players = [NNEtPlayer(RED_PLAYER) for _ in range(n_players)]

In [None]:
def keep_top(players,n=10):
    top = sorted(players, key=itemgetter('s'), reverse=True)
    scores = [p['s'] for p in top]
    print('Average: %s. Ave top 5: %s, top: %s' %(sum(scores)/len(scores), sum(scores[:5])/5, scores[0]))
    return [p['p'] for p in top[:n]]
    

for i in range(1000):
    scores = []
    for player in players:
        scores.append({'s':score_n_games(player, opponent_depth=2, n_games=10), 'p':player})
    
    players = breed_group(keep_top(scores))
    
    

Average: 13.605. Ave top 5: 16.88, top: 17.5
Average: 13.2833333333. Ave top 5: 17.38, top: 17.9
Average: 13.2916666667. Ave top 5: 16.7, top: 17.8
Average: 14.085. Ave top 5: 17.58, top: 18.0
Average: 12.7683333333. Ave top 5: 17.32, top: 18.4
Average: 13.4516666667. Ave top 5: 17.62, top: 18.3
Average: 13.6583333333. Ave top 5: 17.34, top: 18.3
Average: 13.855. Ave top 5: 17.36, top: 17.7
Average: 13.675. Ave top 5: 17.66, top: 19.1
Average: 14.1133333333. Ave top 5: 18.0, top: 18.9
Average: 13.9416666667. Ave top 5: 18.38, top: 19.1
Average: 13.7566666667. Ave top 5: 17.58, top: 18.6
Average: 13.49. Ave top 5: 17.32, top: 18.1
Average: 13.43. Ave top 5: 17.22, top: 17.5
Average: 14.3783333333. Ave top 5: 18.36, top: 19.8
Average: 14.1616666667. Ave top 5: 17.86, top: 18.6
Average: 13.405. Ave top 5: 16.84, top: 17.7
Average: 14.11. Ave top 5: 18.22, top: 18.7
Average: 13.0166666667. Ave top 5: 17.2, top: 18.3
Average: 13.6. Ave top 5: 17.78, top: 18.7
Average: 13.65. Ave top 5: 18.2