# Часть 2

In [None]:
import json
import random
import numpy as np

**Snake**

In [None]:

width = 400
height = 400
grid = 16
count = 0
snake = dict(x=160, y=160, dx=grid, dy=0, cells=[], maxCells=4)
apple = dict(x=320, y=320)

In [None]:
def restart():
    snake = dict(x=160, y=160, dx=grid, dy=0, cells=[], maxCells=4)
    apple = dict(x=320, y=320)
    return snake, apple


def generate_apple(apple):
    apple['x'] = random.randint(0, 25) * grid
    apple['y'] = random.randint(0, 25) * grid


def loop(snake, apple):
    snake['x'] += snake['dx']
    snake['y'] += snake['dy']

    if snake['x'] < 0:
        snake['x'] = width - grid
    elif snake['x'] >= width:
        snake['x'] = 0

    if snake['y'] < 0:
        snake['y'] = height - grid
    elif snake['y'] >= height:
        snake['y'] = 0

    snake['cells'] = [(snake['x'], snake['y'])] + snake['cells']

    if len(snake['cells']) > snake['maxCells']:
        snake['cells'].pop()

    for index, cell in enumerate(snake['cells']):
        if cell[0] == apple['x'] and cell[1] == apple['y']:
            snake['maxCells'] += 1
            generate_apple(apple)

        for i in range(index + 1, len(snake['cells'])):
            # snake occupies same space as a body part. reset game
            if (cell[0] == snake['cells'][i][0] and
                cell[1] == snake['cells'][i][1]):
                return -1#restart(snake, apple)

    return 0

In [None]:

actionMap = {0: 37, 1: 38, 2: 39, 3: 40}

def apply_action(snake, actionId):
    key = actionMap[actionId]

    if (key == 37 and snake['dx'] == 0):
        snake['dx'] = -grid
        snake['dy'] = 0
    elif (key == 38 and snake['dy'] == 0):
        snake['dx'] = 0
        snake['dy'] = -grid
    elif (key == 39 and snake['dx'] == 0):
        snake['dx'] = grid
        snake['dy'] = 0
    elif (key == 40 and snake['dy'] == 0):
        snake['dx'] = 0
        snake['dy'] = grid

In [None]:

def get_features(snake, apple):
    sensors = [
        np.sign(snake['dx']),
        np.sign(snake['dy']),
        (snake['x'] - snake['cells'][-1][0])/width if len(snake['cells']) else 0,
        (snake['y'] - snake['cells'][-1][1])/height if len(snake['cells']) else 0,
        snake['x'] == apple['x'],
        snake['y'] == apple['y'],
        (snake['x'] - apple['x'])/width>0,
        (snake['x'] - apple['x'])/width<0,
        (snake['y'] - apple['y'])/height>0,
        (snake['y'] - apple['y'])/height<0,
        any([(snake['x'] == cell[0] and snake['dy'] == 0) for cell in snake['cells'][1:]]),
        any([(snake['y'] == cell[1] and snake['dx'] == 0) for cell in snake['cells'][1:]]),
        any([(snake['x'] == cell[0] and snake['dy'] > 0) for cell in snake['cells'][1:]]),
        any([(snake['y'] == cell[1] and snake['dx'] > 0) for cell in snake['cells'][1:]]),
        any([(snake['x'] == cell[0] and snake['dy'] < 0) for cell in snake['cells'][1:]]),
        any([(snake['y'] == cell[1] and snake['dx'] < 0) for cell in snake['cells'][1:]]),
    ]
    return sensors

%timeit get_features(snake, apple)

5.06 µs ± 78.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [None]:
def get_one():
    W = np.random.normal(size=(16, 4))
    b = np.random.normal(size=(4,))
    return W, b


def getAction(snake, apple, W, b):
    return (W.T.dot(get_features(snake, apple)) + b).argmax()

getAction(snake, apple, *get_one())

1

In [None]:

def get_score(W, b, patience=100):
    snake, apple = restart()
    maxCells_patience = patience
    maxCells_prev = snake['maxCells']
    while loop(snake, apple) != -1:
        apply_action(snake, getAction(snake, apple, W, b))
        if snake['maxCells'] > maxCells_prev:
            maxCells_prev = snake['maxCells']
            maxCells_patience = patience
        maxCells_patience -= 1
        if maxCells_patience < 0:
            snake['maxCells'] = snake['maxCells']/2
            break
    return snake['maxCells']

In [None]:

def mutate(W, b, mutation_rate=0.02):
    dW, db = get_one()
    dWM, dbM = get_one()
    return (W + dW * (dWM > 0) * mutation_rate,
            b + db * (dbM > 0) * mutation_rate)


def crossover(W1, b1, W2, b2):
    maskW = np.random.random(W1.shape) < 0.5
    maskb = np.random.random(b1.shape) < 0.5
    return W1 * maskW + W2 * (~maskW), b1 * maskb + b2 * (~maskb)
     

In [None]:
def generate_random(population, size):
    new_population = []
    for _ in range(size):
        if np.random.random()<0.5:
            new_population.append(get_one())
        else:
            new_population.append(mutate(*population[0]))
    return new_population


In [None]:
def selection(population, scores, topK=2):
    scores = np.array(scores)*1.
    scores /= scores.sum()
    elitismTopK = np.argsort(scores)[::-1][:topK//2]
    roulleteTopK = np.random.choice(len(scores),
                                    p=scores,
                                    size=topK//2)

    new_population = [tuple(map(lambda x: x.copy(), population[i])) for i in elitismTopK]+\
                     [tuple(map(lambda x: x.copy(), population[i])) for i in roulleteTopK]

    return new_population


def breed(population, scores, nChilds=10):
    scores = np.array(scores)*1.
    scores /= scores.sum()
    parents = np.random.choice(len(scores),
                               p=scores,
                               size=(nChilds, 2))

    new_population = []
    for parentA, parentB in parents:
        new_population.append(mutate(*crossover(*population[parentA], *population[parentB])))

    return new_population

In [None]:
def get_new_population(population, scores, topK=4, randomNum=10):
    return (
    selection(population, scores, topK) + \
    breed(population, scores, nChilds=max(0, len(population) - randomNum - topK)) + \
    generate_random(population, randomNum)
    )

In [None]:
def get_scores(population, patience=100):
    scores = []
    for W, b in population:
        scores.append(get_score(W, b, patience))
    return scores

In [None]:
POPULATION_SIZE = 64
NUM_GENERATIONS = 10
NUM_REPEATS = 3 # зачем?
NUM_RESTARTS = 5
PATIENCE = lambda x: 100*((x+5)//5)

best_thingey = None
best_score = 0

for n_restart in range(NUM_RESTARTS):
    print('='*50)
    print('Cтарт перезапуска №%d'%(n_restart+1))
    print('Лучшая пока что: %.1f'%best_score)
    print('='*50)
    population = [get_one() for _ in range(POPULATION_SIZE)]

    for generation in range(NUM_GENERATIONS):
        scores = 0
        for _ in range(NUM_REPEATS):
            scores += np.array(get_scores(population, PATIENCE(generation)))
        scores /= NUM_REPEATS
        bscore = max(scores)

        scores **= 4 # зачем?
        population = get_new_population(population, scores, topK=5, randomNum=20)
        if bscore > best_score:
            best_score = bscore
            best_thingey = np.concatenate([population[0][0],     # W
                                           [population[0][1]]])  # b
            print('Рестарт: %d\tПоколение: %d\tЗначение: %.1f'%(n_restart+1,
                                                                generation,
                                                                bscore))

Cтарт перезапуска №1
Лучшая пока что: 0.0
Рестарт: 1	Поколение: 0	Значение: 5.8
Рестарт: 1	Поколение: 1	Значение: 19.5
Рестарт: 1	Поколение: 2	Значение: 21.7
Рестарт: 1	Поколение: 3	Значение: 23.0
Рестарт: 1	Поколение: 4	Значение: 30.3
Cтарт перезапуска №2
Лучшая пока что: 30.3
Рестарт: 2	Поколение: 6	Значение: 32.3
Cтарт перезапуска №3
Лучшая пока что: 32.3
Cтарт перезапуска №4
Лучшая пока что: 32.3
Рестарт: 4	Поколение: 8	Значение: 32.5
Cтарт перезапуска №5
Лучшая пока что: 32.5
Рестарт: 5	Поколение: 5	Значение: 35.0


In [None]:
with open('snake_weights.js', 'w') as f:
    f.write('var W = %s;\n'%
            (json.dumps([[int(1e3*w)/1e3 for w in W] for W in best_thingey])))

**Ping-Pong**

In [None]:
import json
import random
import numpy as np

In [None]:
width = 750
height = 585
grid = 15
paddleHeight = grid*5
maxPaddleY = height - grid - paddleHeight
paddleSpeed = 6
ballSpeed = 5

leftPaddle = dict(x=grid*2,
                  y=height/2 - paddleHeight/2,
                  width=grid,
                  height=paddleHeight,
                  dy=0)
rightPaddle = dict(x=width-grid*3,
                   y = height / 2 - paddleHeight/2,
                   width=grid,
                   height=paddleHeight,
                   dy=0)
ball = dict(x=width/2,
            y=height/2,
            width=grid,
            height=grid,
            resetting=False,
            dx=ballSpeed,
            dy=-ballSpeed,
            score=0)

In [None]:

def collides(obj1, obj2):
    return (
        obj1['x'] < obj2['x'] + obj2['width'] and
        obj1['x'] + obj1['width'] > obj2['x'] and
        obj1['y'] < obj2['y'] + obj2['height'] and
        obj1['y'] + obj1['height'] > obj2['y']
    )


def restart(leftPaddle, rightPaddle, ball):
    ball['resetting'] = False
    ball['x'] = width / 2
    ball['y'] = height / 2
    ball['score'] = 0

    leftPaddle['x'] = grid*2
    leftPaddle['y'] = height/2 - paddleHeight/2

    rightPaddle['x'] = width - grid*3
    rightPaddle['y'] = height / 2 - paddleHeight/2

In [None]:
def loop(leftPaddle, rightPaddle, ball):
    leftPaddle['y'] += leftPaddle['dy']
    rightPaddle['y'] += rightPaddle['dy']

    if (leftPaddle['y'] < grid):
        leftPaddle['y'] = grid
    elif (leftPaddle['y'] > maxPaddleY):
        leftPaddle['y'] = maxPaddleY

    if (rightPaddle['y'] < grid):
        rightPaddle['y'] = grid
    elif (rightPaddle['y'] > maxPaddleY):
        rightPaddle['y'] = maxPaddleY

    ball['x'] += ball['dx']
    ball['y'] += ball['dy']

    if (ball['y'] < grid):
        ball['y'] = grid
        ball['dy'] *= -1
    elif (ball['y'] + grid > height - grid):
        ball['y'] = height - grid * 2
        ball['dy'] *= -1

    if ( (ball['x'] < 0 or ball['x'] > width) and not ball['resetting']):
        return -1

    if (collides(ball, leftPaddle)):
        ball['dx'] *= -1
        ball['x'] = leftPaddle['x'] + leftPaddle['width']
        ball['score'] += 1
    elif (collides(ball, rightPaddle)):
        ball['dx'] *= -1
        ball['x'] = rightPaddle['x'] - rightPaddle['width']
        ball['score'] += 1

    return 0
     

In [None]:
actionMap = {0: 38, # правый игрок вверх
             1: 40, # правый игрок вниз
             2: 87, # левый игрок вверх
             3: 83, # левый игрок вниз
             4: -1, # правый игрок ожидает
             5: -2} # вниз игрок ожидает

def apply_action(leftPaddle, rightPaddle, actionId):
    key = actionMap[actionId]

    if key == 38:
        rightPaddle['dy'] = -paddleSpeed
    elif key == 40:
        rightPaddle['dy'] = +paddleSpeed
    elif key == -1:
        rightPaddle['dy'] = 0
    elif key == 87:
        leftPaddle['dy'] = -paddleSpeed
    elif key == 83:
        leftPaddle['dy'] = +paddleSpeed
    elif key == -2:
        leftPaddle['dy'] = 0

In [None]:
def get_features(leftPaddle, rightPaddle, ball):
    sensors = [
        np.sign(leftPaddle['y'] - ball['y']),
        np.abs(leftPaddle['y'] - ball['y']) / height,
        np.abs(leftPaddle['x'] - ball['x']) / width,
        np.sign(rightPaddle['y'] - ball['y']),
        np.abs(rightPaddle['y'] - ball['y']) / height,
        np.abs(rightPaddle['x'] - ball['x']) / width,
        np.sign(leftPaddle['dy']),
        np.sign(leftPaddle['dy'])==0,
        np.sign(rightPaddle['dy']),
        np.sign(rightPaddle['dy'])==0,
        np.sign(ball['dx']),
        np.sign(ball['dy']),
        np.sign(ball['x'] - width//2),
        np.sign(ball['y'] - height//2),
        1 # что это?
    ]

    return sensors

%timeit get_features(leftPaddle, rightPaddle, ball)

13.1 µs ± 738 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:

def get_one():
    W = np.random.normal(size=(15, 6))
    return W

def getAction(leftPaddle, rightPaddle, ball, W):
    return (W.T.dot(get_features(leftPaddle, rightPaddle, ball))).argmax()

getAction(leftPaddle, rightPaddle, ball, get_one())
     

1

In [None]:
def get_score(W, patience=100):
    restart(leftPaddle, rightPaddle, ball)
    maxScore_patience = patience
    maxScore_prev = ball['score']
    action = getAction(leftPaddle, rightPaddle, ball, W)
    for _ in range(int(2e4)):
        if loop(leftPaddle, rightPaddle, ball) == -1:
            break
        # симуляция запоздалой реакции агента
        if np.random.random() < 0.5:
            action = getAction(leftPaddle, rightPaddle, ball, W)
        apply_action(leftPaddle, rightPaddle, action)
        if  ball['score'] > maxScore_prev:
            maxScore_prev = ball['score']
            maxScore_patience = patience
        maxScore_patience -= 1
        if maxScore_patience < 0:
            break
    return ball['score']

In [None]:
def mutate(W, mutation_rate=0.02):
    dW = get_one()
    dM = get_one() > 0
    return W + dW * dM * mutation_rate


def crossover(W1, W2):
    maskW = np.random.random(W1.shape) < 0.5
    return W1 * maskW + W2 * (~maskW)
     

In [None]:

def generate_random(population, size):
    new_population = []
    for _ in range(size):
        if np.random.random() < 0.5:
            new_population.append(get_one())
        else:
            new_population.append(mutate(population[0]))
    return new_population


def selection(population, scores, topK=2):
    scores = np.array(scores)*1.
    scores /= scores.sum()
    elitismTopK = np.argsort(scores)[::-1][:topK//2]
    roulleteTopK = np.random.choice(len(scores),
                                    p=scores,
                                    size=topK//2)

    new_population = [population[i].copy() for i in elitismTopK] + \
                     [population[i].copy() for i in roulleteTopK]

    return new_population

In [None]:
def breed(population, scores, nChilds=10):
    scores = np.array(scores)*1.
    scores /= scores.sum()
    parents = np.random.choice(len(scores),
                               p=scores,
                               size=(nChilds, 2))

    new_population = []
    for parentA, parentB in parents:
        new_population.append(mutate(crossover(population[parentA],
                                               population[parentB])))

    return new_population


def get_new_population(population, scores, topK=4, randomNum=10):
    return (
    selection(population, scores, topK) + \
    breed(population, scores,
          nChilds=max(0, len(population) - randomNum - topK)) + \
    generate_random(population, randomNum)
    )

In [None]:

def get_scores(population, patience=100):
    scores = []
    for W in population:
        scores.append(get_score(W, patience))
    return scores

In [None]:
RANDOM_SIZE = 20
ELITE_SIZE = 5
NUM_GENERATIONS = 100
NUM_REPEATS = 3 # зачем?
NUM_RESTARTS = 5
PATIENCE = lambda x: 1000*((x+2)//2)

best_thingey = None
best_score = 0

for n_restart in range(NUM_RESTARTS):
    print('='*50)
    print('Cтарт перезапуска №%d'%(n_restart+1))
    print('Лучшая пока что: %.1f'%best_score)
    print('='*50)
    population = [get_one() for _ in range(POPULATION_SIZE)]

    for generation in range(NUM_GENERATIONS):
        scores = 1e-10
        for _ in range(NUM_REPEATS):
            scores += np.array(get_scores(population, PATIENCE(generation)))
        scores /= NUM_REPEATS
        bscore = max(scores)

        scores **= 4 # зачем?
        population = get_new_population(population, scores,
                                        topK=ELITE_SIZE,
                                        randomNum=RANDOM_SIZE)
        if bscore > best_score:
            best_score = bscore
            best_thingey = np.array(population[0])
            print('Рестарт: %d\tПоколение: %d\tЗначение: %.1f'%(n_restart+1,
                                                                generation,
                                                                bscore))

Cтарт перезапуска №1
Лучшая пока что: 0.0
Рестарт: 1	Поколение: 0	Значение: 2.0
Рестарт: 1	Поколение: 1	Значение: 3.0
Рестарт: 1	Поколение: 3	Значение: 3.3
Рестарт: 1	Поколение: 5	Значение: 4.0
Рестарт: 1	Поколение: 6	Значение: 4.3
Рестарт: 1	Поколение: 48	Значение: 5.0
Cтарт перезапуска №2
Лучшая пока что: 5.0
Cтарт перезапуска №3
Лучшая пока что: 5.0
Cтарт перезапуска №4
Лучшая пока что: 5.0
Рестарт: 4	Поколение: 22	Значение: 5.3
Рестарт: 4	Поколение: 24	Значение: 6.0
Рестарт: 4	Поколение: 33	Значение: 6.3
Cтарт перезапуска №5
Лучшая пока что: 6.3


In [None]:
with open('pingpong_weights.js', 'w') as f:
    f.write('var W = %s;\n'%
            (json.dumps([[int(1e3*w)/1e3 for w in W] for W in best_thingey])))

**DoodleJump**

In [None]:

import json
import random
import numpy as np

In [None]:
def init_states():
    settings = dict(
        width = 375,
        height = 667,
        platformWidth = 65,
        platformHeight = 20,

        gravity = 0.33,
        drag = 0.3,
        bounceVelocity = -12.5,

        minPlatformSpace = 15,
        maxPlatformSpace = 20,
        keydown = False,
        score = 0,
    )
    settings['platformStart'] = settings['height'] - 50

    platforms = [dict(x=settings['width'] / 2 - settings['platformWidth'] / 2,
                      y=settings['platformStart'])]
    y = settings['platformStart']
    while (y > 0):
        y -= settings['platformHeight'] + \
             np.random.randint(settings['minPlatformSpace'],
                               settings['maxPlatformSpace'])

        while True:
            x = np.random.uniform(25, settings['width'] \
                                  - 25 - settings['platformWidth'])
            if not ((y > settings['height'] / 2) and
                    (x > settings['width'] / 2 
                     - settings['platformWidth'] * 1.5) and
                    (x < settings['width'] / 2 
                     + settings['platformWidth'] / 2)):
                break
        platforms.append(dict(x=x, y=y))

    doodle = dict(
      width=40,
      height=60,
      x=settings['width'] / 2 - 20,
      y=settings['platformStart'] - 60,
      dx=0,
      dy=0,
      playerDir=0,
      prevDoodleY=settings['platformStart'] - 60,
    )

    return doodle, platforms, settings

doodle, platforms, settings = init_states()

In [None]:

def restart():
    doodle, platforms, settings = init_states()
    return doodle, platforms, settings


def loop(doodle, platforms, settings):
    doodle['dy'] += settings['gravity']

    if (doodle['y'] < settings['height'] / 2 and doodle['dy'] < 0):
        for i, _ in enumerate(platforms):
            platforms[i]['y'] -= doodle['dy']

        while (platforms[-1]['y'] > 0):
            platforms.append(dict(
                x=np.random.uniform(25,
                                    settings['width'] - 25
                                    - settings['platformWidth']),
                y=np.random.uniform(platforms[-1]['y'] -
                                    (settings['platformHeight'] +
                                     np.random.uniform(
                                         settings['minPlatformSpace'],
                                         settings['maxPlatformSpace']))
                                    )
                                )
                            )

            settings['minPlatformSpace'] = min(settings['minPlatformSpace'] 
                                               + 0.5,
                                               settings['height'] / 2 - 0.5)
            settings['maxPlatformSpace'] = min(settings['maxPlatformSpace'] 
                                               + 0.5,
                                               settings['height'] / 2)
    else:
        doodle['y'] += doodle['dy']

    if not settings['keydown']:
        if (doodle['playerDir'] < 0):
            doodle['dx'] += settings['drag'];
            if (doodle['dx'] > 0):
                doodle['dx'] = 0
                doodle['playerDir'] = 0
        elif (doodle['playerDir'] > 0):
            doodle['dx'] -= settings['drag']

            if (doodle['dx'] < 0):
                doodle['dx'] = 0
                doodle['playerDir'] = 0

    doodle['x'] += doodle['dx']

    if (doodle['x'] + doodle['width'] < 0):
        doodle['x'] = settings['width']
    elif (doodle['x'] > settings['width']):
        doodle['x'] = -doodle['width']

    for platform in platforms:
        if (
          (doodle['dy'] > 0) and
          (doodle['prevDoodleY'] + doodle['height'] <= platform['y']) and
          (doodle['x'] < platform['x'] + settings['platformWidth']) and
          (doodle['x'] + doodle['width'] > platform['x']) and
          (doodle['y'] < platform['y'] + settings['platformHeight']) and
          (doodle['y'] + doodle['height'] > platform['y'])
        ):
            doodle['y'] = platform['y'] - doodle['height']
            doodle['dy'] = settings['bounceVelocity']

    doodle['prevDoodleY'] = doodle['y']
    platforms_cleared = len(platforms)
    platforms = list(filter(lambda platform: platform['y'] < settings['height'],
                            platforms))
    platforms_cleared -= len(platforms)
    settings['score'] += platforms_cleared

    if doodle['y'] > settings['height'] + doodle['height']:
        return -1
    return 0


In [None]:

actionMap = {0: 37, # движение влево
             1: 39, # движение вправо
             2: -1} # ожидание

def apply_action(doodle, platforms, settings, actionId):
    key = actionMap[actionId]

    if key == 37:
        settings['keydown'] = True
        settings['playerDir'] = -1
        doodle['dx'] = -3
    elif key == 39:
        settings['keydown'] = True
        settings['playerDir'] = 1
        doodle['dx'] = 3
    else:
        settings['keydown'] = False

In [None]:

# агент видел значения в этих пикселях
sensor_web = np.meshgrid(np.arange(-settings['width']*2//3,
                                   +settings['width']*2//3, 50),
                         np.arange(-settings['height']*2//3,
                                   +settings['height']*2//3, 75))
sensor_web = np.concatenate([sensor_web[0].flatten()[:, None],
                             sensor_web[1].flatten()[:, None]], axis=1)

In [None]:

def get_features(doodle, platforms, settings):
    points = np.array([(p['x'], p['y']) for p in platforms])

    sensor_x = (sensor_web[:, 0]*1 + doodle['x']) % settings['width']
    sensor_y = np.clip((sensor_web[:, 1]*1 + doodle['y']),
                       1, settings['height']-1)

    xx = sensor_x.reshape(-1, 1) - points[:, 0]
    yy = sensor_y.reshape(-1, 1) - points[:, 1]
    cond1 = (xx - settings['platformWidth']) < 0
    cond2 = (xx) > 0
    cond3 = (yy - settings['platformHeight']) < 0
    cond4 = (yy) > 0

    sensors = ((cond1*cond2*cond3*cond4).any(axis=1))*1.
    return np.concatenate([sensors, [doodle['dx'],
                                     doodle['dy'],
                                     1]])

%timeit get_features(doodle, platforms, settings)

85.6 µs ± 1.64 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:

# Многослойный персептрон из ЛР3 передаёт привет:)
def get_one(h1=5, n_classes=3):
    W = np.random.normal(size=(sensor_web.shape[0]+3, h1))
    W2 = np.random.normal(size=(h1, n_classes))
    return W, W2

def softmax(x):
    xe = np.exp(x-x.max())
    return xe/xe.sum()

def getAction(doodle, platforms, settings, weights):
    W, W2 = weights
    logits = np.maximum(W.T.dot(get_features(doodle, platforms, settings)),
                        0).dot(W2)
    # действия выбираются не детерминированно, а вероятностно
    return np.random.choice(np.arange(logits.size), p=softmax(logits))

getAction(doodle, platforms, settings, get_one())

0

In [None]:

def get_score(W, patience=100, return_actions=False):
    doodle, platforms, settings = restart()
    maxScore_patience = patience
    maxScore_prev = settings['minPlatformSpace']
    actions = []
    xcoords = []
    action = getAction(doodle, platforms, settings, W)
    for _ in range(int(5e4)):
        if loop(doodle, platforms, settings) == -1:
            break
        # симуляция запоздалой реакции агента
        if np.random.random() < 0.25:
            action = getAction(doodle, platforms, settings, W)
        actions.append(action)
        xcoords.append(doodle['x'])
        apply_action(doodle, platforms, settings, action)
        if  settings['minPlatformSpace'] > maxScore_prev:
            maxScore_prev = settings['minPlatformSpace']
            maxScore_patience = patience
        maxScore_patience -= 1
        if maxScore_patience < 0:
            break
    if return_actions:
        return actions, xcoords, settings['minPlatformSpace']
    return settings['minPlatformSpace']

In [None]:
def mutate(weights, mutation_rate=0.01):
    W, W2 = weights
    dW, dW2 = get_one()
    dM, dM2 = get_one()
    return W + dW*(dM>0)*mutation_rate, W2 + dW2*(dM2>0)*mutation_rate


def crossover(W1, W2):
    result = []
    for w1, w2 in zip(W1, W2):
        maskW = np.random.random(w1.shape)<0.5
        result.append(w1*maskW+w2*(~maskW))
    return result

In [None]:

def generate_random(population, size):
    new_population = []
    for _ in range(size):
        if np.random.random()<0.5:
            new_population.append(get_one())
        else:
            new_population.append(mutate(population[0]))
    return new_population


def selection(population, scores, topK=2):
    scores = np.array(scores)*1.
    scores /= scores.sum()
    elitismTopK = np.argsort(scores)[::-1][:topK//2]
    roulleteTopK = np.random.choice(len(scores),
                                    p=scores,
                                    size=topK//2)

    new_population = [tuple(map(lambda x: x.copy(), population[i]))
                      for i in elitismTopK]+\
                     [tuple(map(lambda x: x.copy(), population[i]))
                      for i in roulleteTopK]

    return new_population


def breed(population, scores, nChilds=10):
    scores = np.array(scores)*1.
    scores /= scores.sum()
    parents = np.random.choice(len(scores),
                               p=scores,
                               size=(nChilds, 2))

    new_population = []
    for parentA, parentB in parents:
        new_population.append(mutate(crossover(population[parentA],
                                               population[parentB])))

    return new_population

# зачем?
def factorize(population, factor=3):
    for i, p in enumerate(population):
        population[i] = tuple([np.array([[int(10**factor*w)/10**factor
                                          for w in W]
                                         for W in pp])
                               for pp in p])
    return population


def get_new_population(population, scores, topK=4, randomNum=10):
    return factorize(
    selection(population, scores, topK) + \
    breed(population, scores,
          nChilds=max(0, len(population) - randomNum - topK)) + \
    generate_random(population, randomNum)
    )

In [None]:

def get_scores(population, patience=100):
    scores = []
    for W in population:
        scores.append(get_score(W, patience))
    return scores

In [None]:

# сохранение чекпоинта "мозгов" интеллектуального агента
def save_thingey(best_thingey, score):
    with open('doodlejump_weights_%.1f.js'%score, 'w') as f:
        f.write('var sensorWeb = %s;\n\nvar W = %s;\n\nvar W2 = %s;\n'%
                (json.dumps([[int(w) for w in W] for W in sensor_web]),
                 json.dumps([[int(1e2*w)/1e2 for w in W]
                             for W in best_thingey[0]]),
                 json.dumps([[int(1e2*w)/1e2 for w in W]
                             for W in best_thingey[1]])))

In [None]:

POPULATION_SIZE = 64
RANDOM_SIZE = 20
ELITE_SIZE = 4
NUM_GENERATIONS = 100
NUM_REPEATS = 3 # зачем?
NUM_RESTARTS = 5
PATIENCE = lambda x: 100*((x+2)//2)
population = [get_one() for _ in range(POPULATION_SIZE)]

best_thingey = None
best_score = 0

for n_restart in range(NUM_RESTARTS):
    print('='*50)
    print('Cтарт перезапуска №%d'%(n_restart+1))
    print('Лучшая пока что: %.1f'%best_score)
    print('='*50)
    population = [get_one() for _ in range(POPULATION_SIZE)]
    for generation in range(NUM_GENERATIONS):
        scores = 0.
        for _ in range(NUM_REPEATS):
            scores += np.array(get_scores(population, PATIENCE(generation)))**4
        scores /= NUM_REPEATS

        population = get_new_population(population, scores,
                                        topK=ELITE_SIZE,
                                        randomNum=RANDOM_SIZE)
        bscore = max(scores)**0.25
        if bscore > best_score:
            best_score = bscore
            best_thingey = [x.copy() for x in population[0]]
            print('Рестарт: %d\tПоколение: %d\tЗначение: %.1f'%(n_restart+1,
                                                                generation,
                                                                bscore))
            if bscore > 100:
                save_thingey(best_thingey, best_score)
save_thingey(best_thingey, best_score)

Cтарт перезапуска №1
Лучшая пока что: 0.0
Рестарт: 1	Поколение: 0	Значение: 44.4
Рестарт: 1	Поколение: 2	Значение: 62.0
Рестарт: 1	Поколение: 4	Значение: 86.3
Рестарт: 1	Поколение: 6	Значение: 86.6
Рестарт: 1	Поколение: 7	Значение: 102.6
Рестарт: 1	Поколение: 9	Значение: 110.4
Рестарт: 1	Поколение: 26	Значение: 110.6
Рестарт: 1	Поколение: 70	Значение: 111.3


KeyboardInterrupt: ignored