In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:75% !important; }</style>"))

import os

import numpy as np
import torch
import time

import pandas as pd
from carle.env import CARLE
from carle.mcl import RND2D, AE2D, SpeedDetector, PufferDetector, CornerBonus
from game_of_carle.agents.grnn import ConvGRNN
from game_of_carle.agents.carla import CARLA
from game_of_carle.agents.harli import HARLI
from game_of_carle.algos.cma import CMAPopulation

import bokeh
import bokeh.io as bio
from bokeh.io import output_notebook, show
from bokeh.plotting import figure

from bokeh.layouts import column, row
from bokeh.models import TextInput, Button, Paragraph
from bokeh.models import ColumnDataSource

from bokeh.events import DoubleTap, Tap

import matplotlib
import matplotlib.pyplot as plt
matplotlib.rc("font", size=28)

cmap_name = "magma"
my_cmap = plt.get_cmap(cmap_name)

output_notebook()

In [None]:
# manual designs
life_glider = torch.zeros(1,1, 64, 64)
life_glider[:,:,33,33] = 1.0
life_glider[:,:,34,32:34] = 1.0
life_glider[:,:,35,32:35:2] = 1.0

morley_puffer = torch.zeros(1, 1, 64, 64)
morley_puffer[:,:, 33, 35] = 1.0
morley_puffer[:,:, 34, 33:37] = 1.0
morley_puffer[:,:, 35, 32] = 1.0
morley_puffer[:,:, 35, 35] = 1.0
morley_puffer[:,:, 35, 37] = 1.0
morley_puffer[:,:, 36, 32] = 1.0
morley_puffer[:,:, 36, 35] = 1.0
morley_puffer[:,:, 36, 37] = 1.0
morley_puffer[:,:, 37, 33:37] = 1.0
morley_puffer[:,:, 38, 35] = 1.0

morley_glider = torch.zeros(1,1, 64, 64)
morley_glider[:, :, 42, 47:50] = 1.0
morley_glider[:, :, 43, 48:50] = 1.0
morley_glider[:, :, 44, 49:50] = 1.0


morley_glider[:, :, 42, 55:58] = 1.0
morley_glider[:, :, 43, 56:58] = 1.0
morley_glider[:, :, 44, 57:58] = 1.0

seed_pattern = torch.zeros(1,1, 64,64)
seed_pattern[:,:, 32, 30:33] = 1.0
seed_pattern[:,:, 33:35, 32] = 1.0


seed_pattern = torch.zeros(1,1, 64,64)
seed_pattern[:,:, 0, 4:7] = 1.0
seed_pattern[:,:, 1, 6] = 1.0
seed_pattern[:,:, 2, 4:7] = 1.0

seed_pattern[:,:, 9, 1:4] = 1.0
seed_pattern[:,:, 10, 3] = 1.0
seed_pattern[:,:, 11, 1:4] = 1.0

plt.figure(figsize=(16,16))
plt.subplot(221)
plt.imshow(life_glider.squeeze()[30:46,30:46].numpy(), cmap=cmap_name)
plt.title("Life Glider", fontweight="bold")
plt.subplot(222)
plt.imshow(morley_puffer.squeeze()[30:46,30:46].numpy(), cmap=cmap_name)
plt.title("Common Morley Puffer", fontweight="bold")
plt.subplot(223)
plt.imshow(seed_pattern.squeeze()[0:16, 0:16].numpy(), cmap=cmap_name)
plt.title("Life Without Death Seed", fontweight="bold")
plt.subplot(224)
plt.imshow(morley_glider.squeeze()[35:51,45:61].numpy(), cmap=cmap_name)
plt.title("Morley Gliders", fontweight="bold")
plt.tight_layout()
plt.show()
    

In [None]:
# Glider (speed) Reward
env = CARLE(device="cpu", instances=1, height=128, width=128)
env.rules_from_string("B3/S23")

env = SpeedDetector(env)
obs = env.reset()

rewards = [0]

action = life_glider

my_image = obs + env.inner_env.action_padding(1.0*action)
      
obs, reward, done, info = env.step(action)

for step in range(1,236):
    
    if (step % 17 == 0):
        my_image += obs.numpy() * step 
        
    rewards.append(reward.item())
    
    obs, reward, done, info = env.step(action*0)
    
my_image += obs.numpy() * step

rewards.append(reward.item())

fig, ax = plt.subplots(2, 1, figsize=(14,32))

rect = plt.Rectangle(((env.height-64) // 2, (env.width-64) // 2), 64, 64,
                     facecolor="blue", alpha=0.1)
ax[0].add_patch(rect)

#plt.subplot(121)
ax[0].imshow(my_image.squeeze(), cmap=cmap_name)
ax[0].add_patch(rect)
ax[0].set_title("Game of Life Glider Progression", fontsize=32, fontweight="bold")


for ii in range(len(rewards)-1):
    ax[1].plot([ii, ii+1], rewards[ii:ii+2], 'o-',ms=10,lw=6, color=my_cmap((ii+1.)/len(rewards)))

ax[1].set_title("Game of Life SpeedDetector Bonus", fontsize=32, fontweight="bold")
plt.ylabel("reward",fontweight="bold")
plt.xlabel("step", fontweight="bold")
plt.show()

In [None]:
# Corner Bonus Reward
env = CARLE(device="cpu", instances=1, height=196, width=196)
env.rules_from_string("B3/S012345678")

env = CornerBonus(env)
obs = env.reset()
my_image = (obs + env.inner_env.action_padding(1.0*action)).numpy()

rewards = [0]

action = seed_pattern

      
obs, reward, done, info = env.step(action)

for step in range(1,512):
    
    if (step % 17 == 0):
        my_image[my_image == 0] = (obs.numpy() * step)[my_image == 0]
        
    rewards.append(reward.item())
    
    obs, reward, done, info = env.step(action*0)
    
my_image[my_image == 0] = (obs.numpy() * step)[my_image == 0]

rewards.append(reward.item())

fig, ax = plt.subplots(2, 1, figsize=(14,32))

action_rect = plt.Rectangle(((env.height-64) // 2, (env.width-64) // 2), 64, 64,
                     facecolor="blue", alpha=0.2)


punish_rect_0 = plt.Rectangle(((env.width-64),0), 64, 64,
                     facecolor="red", alpha=0.3)
punish_rect_1 = plt.Rectangle(((env.width-64),(env.width-64)), 64, 64,
                     facecolor="red", alpha=0.3)


inferno_cmap = plt.get_cmap("inferno")
reward_rect_0 = plt.Rectangle((0,0), 16, 16,
                     facecolor=inferno_cmap(64), alpha=0.3)


#plt.subplot(121)
ax[0].imshow(my_image.squeeze(), cmap=cmap_name)
ax[0].add_patch(action_rect)
ax[0].add_patch(punish_rect_0)
ax[0].add_patch(punish_rect_1)
ax[0].add_patch(reward_rect_0)

for jj in range(14,96):
    reward_rect = plt.Rectangle((jj,jj), 4, 4, facecolor=inferno_cmap(64), alpha=0.3/2)
    ax[0].add_patch(reward_rect)
    
ax[0].add_patch(reward_rect_0)
ax[0].set_title("Life Without Death Growth Pattern", fontsize=28, fontweight="bold")


for ii in range(len(rewards)-1):
    ax[1].plot([ii, ii+1], rewards[ii:ii+2], 'o-',ms=10,lw=6, color=my_cmap((ii+1.)/len(rewards)))

ax[1].set_title("Life Without Death CornerBonus Bonus", fontweight="bold")
plt.ylabel("reward", fontweight="bold")
plt.xlabel("step", fontweight="bold")
plt.show()

In [None]:
# "Puffer Detection" growth Bonus Reward
env = CARLE(device="cpu", instances=1, height=128, width=128)
env.rules_from_string("B368/S245")

env = PufferDetector(env)
obs = env.reset()
my_image = (obs + env.inner_env.action_padding(1.0*action)).numpy()

rewards = [0]

action = morley_puffer

      
obs, reward, done, info = env.step(action)

for step in range(1,753):
    
         
    rewards.append(reward.item())
    
    obs, reward, done, info = env.step(action*0)
    
my_image[my_image == 0] = (obs.numpy() * step)[my_image == 0]

rewards.append(reward.item())

fig, ax = plt.subplots(2, 1, figsize=(14,32))

action_rect = plt.Rectangle(((env.height-64) // 2, (env.width-64) // 2), 64, 64,
                     facecolor="blue", alpha=0.2)


punish_rect_0 = plt.Rectangle(((env.width-64),0), 64, 64,
                     facecolor="red", alpha=0.3)
punish_rect_1 = plt.Rectangle(((env.width-64),(env.width-64)), 64, 64,
                     facecolor="red", alpha=0.3)


reward_rect_0 = plt.Rectangle((0,0), 16, 16,
                     facecolor="green", alpha=0.3)


#plt.subplot(121)
ax[0].imshow(my_image.squeeze(), cmap=cmap_name)
ax[0].add_patch(action_rect)

    
ax[0].set_title("Common Morley Puffer Progression", fontsize=32, fontweight="bold")


for ii in range(len(rewards)-1):
    ax[1].plot([ii, ii+1], rewards[ii:ii+2], 'o-',ms=10,lw=6, color=my_cmap((ii+1.)/len(rewards)))

ax[1].set_title("PufferDetector Bonus", fontweight="bold")
plt.ylabel("reward")
plt.xlabel("step")
plt.show()

In [None]:
# Autoencoder Exploration Bonus Reward
env = CARLE(device="cpu", instances=1, height=128, width=128)
env.rules_from_string("B368/S245")

env = AE2D(env)
obs = env.reset()
my_image = (obs + env.inner_env.action_padding(1.0*action)).numpy()

rewards = [0]

action = morley_puffer # + morley_glider

      
obs, reward, done, info = env.step(action)

count = 0
for step in range(1, 2070):
    
    if step % 500 == 0:
        my_image[:,:, count*32:count*32+32, :] = step * obs.numpy()[:,:,48:80,:]
        count += 1
         
    rewards.append(reward.item())
    
    obs, reward, done, info = env.step(action*0)
    
    
fig, ax = plt.subplots(2, 1, figsize=(14,32))

ax[0].imshow(my_image.squeeze(), cmap=cmap_name)

for jj in range(4):
    time_rect = plt.Rectangle((0, jj*32), 128, 64, \
                     facecolor=my_cmap(32+32*jj), alpha=0.2)
    ax[0].add_patch(time_rect)
    
ax[0].set_title("Common Morley Puffer Progression", fontsize=32, fontweight="bold")

for ii in range(len(rewards)-1):
    ax[1].plot([ii, ii+1], rewards[ii:ii+2], 'o-',ms=10,lw=6, color=my_cmap((ii+1.)/len(rewards)))

ax[1].set_title("Autoencoder Loss Exploration Bonus", fontsize=32, fontweight="bold")
plt.ylabel("reward", fontweight="bold")
plt.xlabel("step", fontweight="bold")
plt.show()

In [None]:
# random network distillation
env = CARLE(device="cpu", instances=1, height=128, width=128)
env.rules_from_string("B368/S245")

env = RND2D(env)
obs = env.reset()
my_image = (obs + env.inner_env.action_padding(1.0*action)).numpy()

rewards = [0]

action = morley_puffer #+ morley_glider

      
obs, reward, done, info = env.step(action)

count =0
for step in range(1, 2070):
    
    if step % 500 == 0:
        my_image[:,:, count*32:count*32+32, :] = step * obs.numpy()[:,:,48:80,:]
        count += 1
        
    rewards.append(reward.item())
    
    obs, reward, done, info = env.step(action*0)

rewards.append(reward.item())

fig, ax = plt.subplots(2, 1, figsize=(14,28))

#plt.subplot(121)
ax[0].imshow(my_image.squeeze(), cmap=cmap_name)
for jj in range(4):
    time_rect = plt.Rectangle((0, jj*32), 128, 64, \
                     facecolor=my_cmap(32+32*jj), alpha=0.2)
    ax[0].add_patch(time_rect)

     
ax[0].set_title("Common Morley Puffer Progression", fontsize=32, fontweight="bold")


for ii in range(len(rewards)-1):
    ax[1].plot([ii, ii+1], rewards[ii:ii+2], 'o-',ms=10,lw=6, color=my_cmap((ii+1.)/len(rewards)))

ax[1].set_title("Random Network Distillation Exploration Bonus", fontsize=32, fontweight="bold")
plt.ylabel("reward", fontweight="bold")
plt.xlabel("step", fontweight="bold")
plt.show()