In [1]:
import vibe_widget as vw



VibeWidget(description=' Create a Doodle jump game environment, allow for the user to play using their arrow k...

In [None]:
import numpy as np

class SimpleRLAgent:
    """
    A simple Q-learning agent for Doodle Jump-like environments.
    The agent can use 'left' or 'right' arrow keys, rewarded by highest score (y position).

    State: (vertical velocity, horizontal position binned, vertical position binned)
    Actions: 0 = do nothing, 1 = left, 2 = right
    """

    def __init__(self, n_bins=10, lr=0.05, gamma=0.98, epsilon=1.0, epsilon_decay=0.995, min_epsilon=0.1):
        self.n_bins = n_bins
        self.q_table = np.zeros((n_bins, n_bins, n_bins, 3))  # (vel_y, x, y, action)
        self.lr = lr
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.min_epsilon = min_epsilon

    def discretize(self, obs, obs_space):
        # obs: [vel_y, x, y], obs_space: [(min, max), ...]
        bins = []
        for i in range(3):
            low, high = obs_space[i]
            v = obs[i]
            binned = int((float(v) - low) / (high - low) * (self.n_bins - 1))
            binned = max(0, min(self.n_bins - 1, binned))
            bins.append(binned)
        return tuple(bins)

    def select_action(self, state_bins):
        # epsilon-greedy
        if np.random.rand() < self.epsilon:
            return np.random.choice(3)
        return np.argmax(self.q_table[state_bins])

    def update(self, prev_state, prev_action, reward, next_state, done):
        # Q-learning update
        q_sa = self.q_table[prev_state][prev_action]
        best_next = np.max(self.q_table[next_state])
        new_q = q_sa + self.lr * (reward + self.gamma * best_next * (not done) - q_sa)
        self.q_table[prev_state][prev_action] = new_q

    def decay_epsilon(self):
        self.epsilon = max(self.min_epsilon, self.epsilon * self.epsilon_decay)

# Minimal stub for a Doodle Jump-like environment
class DoodleJumpEnvStub:
    """
    Emulates a Doodle Jump-like API for RL training.
    Provide your own environment with step(action) and reset(), or adapt this stub.
    """
    def __init__(self):
        # Assume y pos [0,600], x pos [0,400], vel_y [-20, 20]
        self.obs_space = [(-20, 20), (0, 400), (0, 600)]
        self.reset()

    def reset(self):
        self.player_y = 0
        self.player_x = 200
        self.player_vely = 0
        self.score = 0
        return [self.player_vely, self.player_x, self.player_y]

    def step(self, action):
        # action: 0=none, 1=left, 2=right
        if action == 1:
            self.player_x -= 20
        elif action == 2:
            self.player_x += 20
        self.player_x = np.clip(self.player_x, 0, 400)
        # Simulate upward jump if landed on a platform (randomly every 40px for demo)
        if self.player_y % 40 == 0:
            self.player_vely = 15
        else:
            self.player_vely -= 1  # gravity
        self.player_y += self.player_vely
        self.player_y = max(self.player_y, 0)
        done = self.player_y == 0 and self.player_vely <= 0
        self.score = max(self.score, self.player_y)
        obs = [self.player_vely, self.player_x, self.player_y]
        reward = self.score  # reward is max y (score)
        return obs, reward, done, {}

# Training loop
def train(agent, env, episodes=300):
    for episode in range(episodes):
        obs = env.reset()
        state = agent.discretize(obs, env.obs_space)
        total_reward = 0
        for t in range(1000):
            action = agent.select_action(state)
            obs2, reward, done, _ = env.step(action)
            state2 = agent.discretize(obs2, env.obs_space)
            agent.update(state, action, reward, state2, done)
            state = state2
            total_reward = reward
            if done:
                break
        agent.decay_epsilon()
        if (episode) % 50 == 0:
            print(f"Ep {episode+1}: score={total_reward} epsilon={agent.epsilon:.2f}")

# Example usage
agent = SimpleRLAgent()
env = DoodleJumpEnvStub()
train(agent, env, episodes=300)

doodle = vw.create(" Create a Doodle jump game environment, allow for the user to play using their arrow keys.",inputs=vw.inputs(agent, env,))


Ep 1: score=8025 epsilon=0.99
Ep 51: score=8025 epsilon=0.77
Ep 101: score=8025 epsilon=0.60
Ep 151: score=8025 epsilon=0.47
Ep 201: score=8025 epsilon=0.37
Ep 251: score=8025 epsilon=0.28


In [None]:
doodle = vw.create(" Create a Doodle jump game environment, allow for the user to play using their arrow keys.",inputs=vw.inputs(agent, env,))