In [None]:
# UNCOMMENT FOR INTERACTIVE PLOTTING
# %matplotlib notebook
%matplotlib widget
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import animation, rc, cm
import IPython, io, urllib
import curses
import ipywidgets as widgets
from matplotlib import animation, rc, cm
import time
from snakelib import FastSnake

rc("animation", html="html5")

(ML:practical_work:genetic_snake)=
# Reinforcement learning on snake with a genetic neural network

:::{admonition} Required files
:class: important
In order to work properly, this notebook requires the following modules in its folder`:

* {download}`snakelib.py <snakelib.py>`


Put it in your working directory along with this notebook. 
:::


This notebook is an example of supervised learning applied to video games. You will use the legendary game Snake rewritten in Python for the occasion and will try to develop an automatic game strategy. In a first step, by hand and in a second step using a genetic algorithm to evolve a neural network. Graphical examples will allow to see the evolution of the game performances.

## Part 1: Try the game

In this first part, you are asked to try the game and check that you understand the rules. Try to imagine what you need to know to win a game.

In [None]:
left_widget = widgets.Button(
    description="",
    disabled=False,
    button_style="success",
    tooltip="Want to go left ?",
    icon="fa-arrow-left",
)

right_widget = widgets.Button(
    description="",
    disabled=False,
    button_style="success",
    tooltip="Want to go right ?",
    icon="fa-arrow-right",
)

up_widget = widgets.Button(
    description="",
    disabled=False,
    button_style="success",
    tooltip="Want to go up ?",
    icon="fa-arrow-up",
)


down_widget = widgets.Button(
    description="",
    disabled=False,
    button_style="success",
    tooltip="Want to go down ?",
    icon="fa-arrow-down",
)

reset_widget = widgets.Button(
    description="Reset",
    disabled=False,
    button_style="danger",
    tooltip="Want to reset ?",
    icon="fa-power-off",
)

direction = 0


def set_direction(direction):
    snake.play(direction)
    update_fig()


def reset_game():
    snake.reset()
    update_fig()


left_widget.on_click(lambda arg: set_direction(2))
right_widget.on_click(lambda arg: set_direction(0))
up_widget.on_click(lambda arg: set_direction(1))
down_widget.on_click(lambda arg: set_direction(3))
reset_widget.on_click(lambda arg: reset_game())


def update_fig():
    im.set_array(snake.grid)
    status = snake.status
    if status == 0:
        mess = "PLAY"
    elif status == -1:
        mess = "YOU DIED (YOURSELF)"
    elif status == -2:
        mess = "YOU DIED (LAVA)"
    title.set_text(f"Score = {snake.score}, {mess}")
    plt.draw()
    return (im,)


snake = FastSnake(Nrow=10, Ncol=10)

fig1, ax = plt.subplots()
ax.axis("off")
title = plt.title(f"Score = {snake.score}, PLAY ")
im = plt.imshow(snake.grid, interpolation="nearest", animated=True)
# anim = animation.FuncAnimation(fig, updatefig, frames=400, interval=1, blit=True)
plt.show()
widgets.Box([left_widget, right_widget, up_widget, down_widget, reset_widget])

In [None]:
plt.close()

## Automatic playing

At first, you are asked to build an agent that will play automatically. It will have to make decisions according to the snake's sensor values. These decisions are to choose the best direction to take.

In [None]:
def my_agent(sensors):
    neighbors = sensors[:4]
    dcol, drow = sensors[4:]
    # FRUIT IS NEAR ?
    for direction in np.arange(4):
        if neighbors[direction] == 1:
            return direction
    # OK, NO, THEN GET CLOSER TO IT ?
    if 0.0 in neighbors:
        prefered_directions = np.zeros(4)
        if dcol > 0.0:
            prefered_directions[:] += 2, 1, 0, 1
        else:
            prefered_directions[:] += 0, 1, 2, 1
        if drow > 0.0:
            prefered_directions[:] += 1, 0, 1, 2
        else:
            prefered_directions[:] += 1, 2, 1, 0
        p = prefered_directions * (neighbors > -1.0)
        my_choice = np.random.choice(np.where(p == p.max())[0])
        return my_choice
    else:
        # OK LET'S SUICIDE
        return np.random.randint(4)


my_choice = my_agent(snake.sensors())
my_choice

## Automatic play with graphic output


In [None]:
snake2 = FastSnake(Nrow=15, Ncol=15)


def updatefig(*args):
    sensors = snake2.sensors()
    my_choice = my_agent(sensors)
    snake2.play(my_choice)
    im2.set_array(snake2.grid)
    if snake2.status != 0:
        snake2.reset()
    return (im,)


fig2, ax2 = plt.subplots()
ax2.axis("off")
im2 = plt.imshow(snake2.grid, interpolation="nearest", animated=True)
anim = animation.FuncAnimation(fig2, updatefig, frames=40, interval=50, blit=True)
plt.show()

In [None]:
anim.pause()
plt.close(fig2)

## Benchmark

In [None]:
import tqdm

Ntests = 200
max_turns = 1000
snake3 = FastSnake(Nrow=10, Ncol=10)
scores = np.zeros(Ntests)
turns = np.zeros(Ntests)
# for test in tqdm.trange(Ntests):
for test in range(Ntests):
    snake3.reset()
    turn = 0
    while snake3.status == 0:
        sensors = snake3.sensors()
        my_choice = my_agent(sensors)
        snake3.play(my_choice)
        turn += 1
        if turn >= max_turns:
            break
    scores[test] = snake3.score
    turns[test] = turn

In [None]:
data = pd.DataFrame({"score": scores, "turns": turns})
data.describe().loc[["mean", "std", "max", "min", "count"]].T

## Genetic Neural Network agent 

In [None]:
Npop = 2000
Ngen = 4
Ntries = 3
Nin = 6
Nout = 4
keep_ratio = 0.2
mutation_ratio = 0.2
mutation_sigma = 0.001
keep_individuals = int(keep_ratio * Npop)
max_turns = 500


# all_bias = np.random.rand(Npop, Nout)
all_weights = (np.random.rand(Npop, Nout, Nin + 1) * 2 - 1.0) * 1.0


def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))


def genetic_algorithm(weigths, data):
    data.sort_values("score", ascending=False, inplace=True)
    keep_individuals = int(keep_ratio * Npop)
    kept_data = data.iloc[:keep_individuals]
    new_all_weights = np.zeros_like(all_weights)
    new_all_weights[:keep_individuals] = all_weights[kept_data.index]
    keep_range = np.arange(keep_individuals)
    for indiv in range(keep_individuals, Npop):
        parents = np.random.choice(keep_range, 2)
        pw = np.random.rand()
        w = new_all_weights[parents[0]] * pw + (1.0 - pw) + new_all_weights[parents[1]]

    weights = new_all_weights
    return weights


def neural_agent(weights, sensors):
    out = sigmoid((weights[:, :-1] @ sensors + weights[:, -1]))
    return np.where(out == out.max())[0][0]

In [None]:
import tqdm


# def make_sensors2(sensors):
#     sensors2 = np.zeros(6)
#     sensors2[:4] = sensors[:8:2] * 2 - 1.0
#     for i in range(2):
#         s = sensors[8 + i]
#         if s == 0.0:
#             sensors2[4 + i] = 0.0
#         else:
#             sensors2[4 + i] = np.sign(s)
#     sensors2[4:] = np.sign(sensors[8:])
#     return sensors2


snake4 = FastSnake(Nrow=10, Ncol=10)
scores = np.zeros(Npop)
turns = np.zeros(Npop)
# for generation in tqdm.trange(Ngen):
for generation in range(Ngen):
    print(f"Generation: {generation}, best score = {scores.max()}")
    scores *= 0.0
    turns *= 0.0
    for test in range(Npop):  # tqdm.trange(Npop):
        tries_scores = np.zeros(Ntries)
        tries_turns = np.zeros(Ntries)
        for trial in range(Ntries):
            snake4.reset()
            Ncol = snake4.Ncol
            Nrow = snake4.Nrow
            snake4.fruit_position = (Nrow - 2) * Ncol + Ncol - 2
            turn = 0
            weights = all_weights[test]
            while snake4.status == 0:
                sensors = snake4.sensors()
                # sensors2 = np.zeros(6)
                # sensors2[:4] = sensors[:8:2] * 2 - 1.0
                # for i in range(2):
                #     s = sensors[8+i]
                #     if s == 0.:
                #         sensors2[4+i] = 0.
                #     else:
                #         sensors2[4+i] = np.sign(s)
                # sensors2[4:] = np.sign(sensors[8:])
                # sensors2 = make_sensors2(sensors)
                my_choice = neural_agent(weights, sensors)
                snake4.play(my_choice)
                turn += 1
                if turn >= max_turns:
                    break
            tries_scores[trial] = snake4.score
            tries_turns[trial] = turn
        scores[test] = tries_scores.mean()
        turns[test] = tries_turns.mean()
    perf = scores * turns
    order = np.argsort(scores)[::-1]

    # data = pd.DataFrame({"score": scores, "turns": turns})
    # data["perf"] = data.score * data.turns
    # all_weights = genetic_algorithm(all_weights, data)
    # data.sort_values("score", ascending=False, inplace=True)
    # kept_data = data.iloc[:keep_individuals]
    new_all_weights = np.zeros_like(all_weights)
    # SELECTION
    new_all_weights[:keep_individuals] = all_weights[order][:keep_individuals]
    # HYBRIDATION
    keep_range = np.arange(keep_individuals)
    for indiv in range(keep_individuals, Npop):
        parents = np.random.choice(keep_range, 2)
        pw = np.random.rand()
        w = new_all_weights[parents[0]] * pw + (1.0 - pw) + new_all_weights[parents[1]]
        # MUTATION:
        if np.random.rand() <= mutation_ratio:
            w *= np.random.normal(loc=1.0, scale=mutation_sigma, size=w.shape)
        new_all_weights[indiv] = w

    all_weights = new_all_weights

data = pd.DataFrame({"score": scores, "turns": turns, "perf": perf}).sort_values(
    "score", ascending=False
)
data.head(5)

In [None]:
snake2 = FastSnake(Nrow=10, Ncol=10)

weights = all_weights[0]  # BEST AGENT


def updatefig(*args):
    sensors = snake2.sensors()
    sensors2 = np.zeros(6)
    # sensors2[:4] = sensors[:8:2] * 2 - 1.0
    # for i in range(2):
    #     s = sensors[8+i]
    #     if s == 0.:
    #         sensors2[4+i] = 0.
    #     else:
    #         sensors2[4+i] = np.sign(s)
    # sensors2 = make_sensors2(sensors)
    my_choice = neural_agent(weights, sensors)
    snake2.play(my_choice)
    im2.set_array(snake2.grid)
    if snake2.status != 0:
        snake2.reset()
    return (im2,)


fig2, ax2 = plt.subplots()
ax2.axis("off")
im2 = plt.imshow(snake2.grid, interpolation="nearest", animated=True)
anim = animation.FuncAnimation(fig2, updatefig, frames=40, interval=50, blit=True)
plt.show()

In [None]:
anim.pause()
plt.close("all")

In [None]:
neural_agent(weights, snake2.sensors())

In [None]:
data.sort_values("score", ascending=False, inplace=True)
"""kept_data = data.iloc[:keep_individuals]
new_all_weights = np.zeros_like(all_weights)
new_all_weights[:keep_individuals] = all_weights[kept_data.index]
keep_range = np.arange(keep_individuals)"""

In [None]:
sensors = np.array([0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0])
out = sigmoid((weights[:, :-1] @ sensors + weights[:, -1]))  # + weights[-1])
out

In [None]:
all_weights.max()

In [None]:
all_weights.min()