In [2]:
import gym
from gym import spaces
import numpy as np
import random

class BalloonGame(gym.Env):
    def __init__(self):
        super(BalloonGame, self).__init__()

        self.colors = {
            "red": {"mean": 10},
            "yellow": {"mean": 20},
            "orange": {"mean": 30},
            "gray": {"fixed_reward": 0},
            "purple": {"fixed_reward": 1}
        }

        self.observation_space = spaces.Tuple((
            spaces.Discrete(len(self.colors)),  # Color index
            spaces.Box(low=0, high=100, shape=(1,), dtype=np.float32),  # Current size
            spaces.Discrete(2)  # Previous action
        ))

        self.action_space = spaces.Discrete(2)  # 0: do nothing, 1: hold inflate button

        self.prev_action = None

    def reset(self):
        self.current_color = random.choice(list(self.colors.keys()))
        self.current_size = 0.0
        self.prev_action = 0

        return (list(self.colors.keys()).index(self.current_color), self.current_size, self.prev_action)

    def step(self, action):
        done = False
        reward = 0

        if action == 1:  # Hold inflate button
            inflation_speed = 1 + random.gauss(0, 0.1)  # Add noise to the inflation speed
            self.current_size += inflation_speed

            if self.current_color in ["red", "yellow", "orange"]:
                pop_probability = min(self.current_size / self.colors[self.current_color]["mean"], 1)
                if random.random() < pop_probability:
                    self.current_size = 0  # Balloon pops
                    done = True
            elif self.current_color in ["gray", "purple"]:
                if self.current_size >= 20:  # Fixed size for passive trials
                    self.current_size = 20

        else:  # Action 0: stop inflating
            if self.current_color in ["red", "yellow", "orange"]:
                reward = self.current_size
            elif self.current_color in ["gray", "purple"]:
                reward = self.colors[self.current_color]["fixed_reward"]

            done = True

        self.prev_action = action
        return (list(self.colors.keys()).index(self.current_color), self.current_size, self.prev_action), reward, done, {}


In [3]:
import ipywidgets as widgets
from IPython.display import display, clear_output

env = BalloonGame()

def reset_env():
    global env
    obs = env.reset()
    print(f"Initial observation: {obs}")

def step_env(action):
    global env
    obs, reward, done, info = env.step(action)
    print(f"Observation: {obs}, Reward: {reward}, Done: {done}, Info: {info}")
    
    if done:
        reset_env()

def on_inflate_button_click(b):
    step_env(1)

def on_stop_button_click(b):
    step_env(0)

inflate_button = widgets.Button(description="Inflate (Action 1)")
inflate_button.on_click(on_inflate_button_click)

stop_button = widgets.Button(description="Stop (Action 0)")
stop_button.on_click(on_stop_button_click)

display(inflate_button, stop_button)

reset_env()

Button(description='Inflate (Action 1)', style=ButtonStyle())

Button(description='Stop (Action 0)', style=ButtonStyle())

Initial observation: (0, 0.0, 0)


In [6]:
import ipywidgets as widgets
from IPython.display import display

env = BalloonGame()
output = widgets.Output()

def reset_env():
    global env
    obs = env.reset()
    with output:
        print(f"Initial observation: {obs}")

def step_env(action):
    global env
    obs, reward, done, info = env.step(action)
    
    with output:
        print(f"Observation: {obs}, Reward: {reward}, Done: {done}, Info: {info}")
    
    if done:
        reset_env()

def on_inflate_button_click(b):
    step_env(1)

def on_stop_button_click(b):
    step_env(0)

inflate_button = widgets.Button(description="Inflate (Action 1)")
inflate_button.on_click(on_inflate_button_click)

stop_button = widgets.Button(description="Stop (Action 0)")
stop_button.on_click(on_stop_button_click)

display(inflate_button, stop_button, output)

reset_env()

Button(description='Inflate (Action 1)', style=ButtonStyle())

Button(description='Stop (Action 0)', style=ButtonStyle())

Output()