In [78]:
import gym
import numpy as np

import ipywidgets as widgets
from IPython.display import display
from functools import partial

%run ../env/gym_bart/envs/bart_env.py

In [85]:
class GameDisplay:
    def __init__(self, env):
        self.env = env
        env.reset()
        self.output = widgets.Output()

        if self.env.hold_to_inflate:
            labels = ['Stop', 'Inflate', 'N/A']
        else:
            labels = ['Wait', 'Start/Stop', 'N/A']
        self.buttons = []
        for i, label in enumerate(labels):
            button = widgets.Button(description=label)
            button.on_click(self.generate_button_callback(i))
            self.buttons.append(button)
        self.widgets = [*self.buttons, self.output]

    def update(self, output):
        self.output.clear_output()
        with self.output:
            print(output)

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        self.output.clear_output()
        print(f'action {action}')
        with self.output:
            # print(f'Observation {obs}, Reward {reward}, Done {done}')
            print(f'Color: {self.env.current_color}, Size: {self.env.current_size}')
            print(f'Observation: {obs}')

            # print(done)
            if done:
                if reward > 0:
                    print(f'Banked {reward}')
                else:
                    print(f'Popped! Reward {reward}')
                print('Env Reset')
                obs = env.reset()
        return obs, reward, done, info

    def generate_button_callback(self, action):
        on_click = lambda b: self.step(action)
        return on_click

    def display(self):
        display(*self.widgets)

In [86]:
env = BartEnv(hold_to_inflate=False)
out = GameDisplay(env)
out.display()

Button(description='Wait', style=ButtonStyle())

Button(description='Start/Stop', style=ButtonStyle())

Button(description='N/A', style=ButtonStyle())

Output()