In [1]:
import gymnasium as gym
import gym_bart
import numpy as np

import ipywidgets as widgets
from IPython.display import display
from functools import partial

%run ../env/gym_bart/envs/bart_env.py
%run ../env/gym_bart/envs/bart_meta_env.py

# Display

In [9]:
class GameDisplay:
    def __init__(self, env):
        self.env = env
        env.reset()
        self.output = widgets.Output()

        if self.env.toggle_task:
            labels = ['Wait', 'Start/Stop', 'N/A']
        else:
            labels = ['Stop', 'Inflate', 'N/A']
        self.buttons = []
        for i, label in enumerate(labels):
            button = widgets.Button(description=label)
            button.on_click(self.generate_button_callback(i))
            self.buttons.append(button)
        self.widgets = [*self.buttons, self.output]

    def update(self, output):
        self.output.clear_output()
        with self.output:
            print(output)

    def step(self, action):
        obs, reward, terminated, truncated, info = self.env.step(action)
        self.output.clear_output()
        with self.output:
            # print(f'action {action}')
            # print(f'Observation {obs}, Reward {reward}, Done {done}')
            print(f'Color: {self.env.current_color}, Size: {self.env.current_size}')
            print(f'Observation: {obs}')

            # print(done)
            if terminated:
                if reward > 0:
                    print(f'Banked {reward}')
                else:
                    print(f'Popped! Reward {reward}')
                print('Env Reset')
                obs = self.env.reset()
        return obs, reward, terminated, info

    def generate_button_callback(self, action):
        on_click = lambda b: self.step(action)
        return on_click

    def display(self):
        display(*self.widgets)

In [3]:
env = BartEnv(hold_to_inflate=False)
out = GameDisplay(env)
out.display()

Button(description='Wait', style=ButtonStyle())

Button(description='Start/Stop', style=ButtonStyle())

Button(description='N/A', style=ButtonStyle())

Output()

# BartMetaEnv

In [2]:
class GameDisplay:
    def __init__(self, env):
        self.env = env
        env.reset()
        self.output = widgets.Output()

        if self.env.toggle_task:
            labels = ['Wait', 'Start/Stop', 'N/A']
        else:
            labels = ['Stop', 'Inflate', 'N/A']
        self.buttons = []
        for i, label in enumerate(labels):
            button = widgets.Button(description=label)
            button.on_click(self.generate_button_callback(i))
            self.buttons.append(button)
        self.widgets = [*self.buttons, self.output]

    def update(self, output):
        self.output.clear_output()
        with self.output:
            print(output)

    def step(self, action):
        obs, reward, terminated, truncated, info = self.env.step(action)
        self.output.clear_output()
        with self.output:
            # print(f'action {action}')
            # print(f'Observation {obs}, Reward {reward}, Done {done}')
            print(f'Color: {self.env.current_color}, Size: {self.env.current_size}')
            print(f'Observation: {obs}')
            print(f'Color mean: {self.env.balloon_mean_sizes[self.env.color_to_idx[self.env.current_color]]}')
            print(f'True max: {self.env.current_balloon_limit}')

            # print(done)
            if 'bart_finished' in info and info['bart_finished']:
                if reward > 0:
                    print(f'Banked {reward}')
                else:
                    print(f'Popped! Reward {reward}')
                print('Env Reset')
        return obs, reward, terminated, info

    def generate_button_callback(self, action):
        on_click = lambda b: self.step(action)
        return on_click

    def display(self):
        display(*self.widgets)

In [7]:
env = gym.make('BartMetaEnv', meta_setup=1, colors_used=1)
# env = BartMetaEnv(meta_setup=1)
out = GameDisplay(env)
out.display()

  logger.warn(


Button(description='Wait', style=ButtonStyle())

Button(description='Start/Stop', style=ButtonStyle())

Button(description='N/A', style=ButtonStyle())

Output()

In [5]:
env.colors_used

3

In [6]:
env.balloon_mean_sizes

{0: 0.2834204058608833,
 1: 0.5839354767620443,
 2: 0.976287020113957,
 3: 0.15619335937458723,
 4: 0.9440242345134997}

In [18]:
env.unwrapped.inflate_delay

7

In [2]:

env = gym.make('BartMetaEnv')

In [21]:
env.reset()
env.balloon_mean_sizes

{0: 0.26317928496596465, 1: 0.42511497341812, 2: 0.9850294836086159}