# Humanoid Walking Simulation with AnyWidget

This notebook demonstrates a humanoid walking simulation running in a widget. We can interact with it from Python to collect data and train a model.

In [None]:
import sys
import os
sys.path.append(os.getcwd())

from humanoid_walker.humanoid_walker import HumanoidWalker
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Create the widget
walker = HumanoidWalker()
walker

## 1. Generating Data

We can send actions to the walker and record the state. Let's try sending random actions.

In [None]:
data_records = []

walker.reset_simulation()
time.sleep(1) # Wait for reset

start_time = time.time()
for i in range(200):
    # Random action
    action = {
        'left_hip': np.random.uniform(-50, 50),
        'left_knee': np.random.uniform(-50, 50),
        'right_hip': np.random.uniform(-50, 50),
        'right_knee': np.random.uniform(-50, 50)
    }
    walker.apply_action(action)
    
    # Capture state
    state = walker.state
    if state:
        record = state.copy()
        record.update(action)
        record['timestamp'] = time.time() - start_time
        data_records.append(record)
    
    time.sleep(0.05) # 20Hz control loop

df = pd.DataFrame(data_records)
df.head()

## 2. Learning to Walk

We will use a simple Evolutionary Strategy (ES) to learn a policy that maximizes the distance traveled to the right.
Policy: Linear mapping from State -> Action.

In [None]:
def get_action(weights, state_vec):
    # state_vec: [torso_angle, left_thigh_angle, left_calf_angle, ...]
    # simple linear policy
    action_vec = np.tanh(np.dot(weights, state_vec)) * 100 # scale to torque
    return {
        'left_hip': action_vec[0],
        'left_knee': action_vec[1],
        'right_hip': action_vec[2],
        'right_knee': action_vec[3]
    }

def run_episode(weights, steps=100):
    walker.reset_simulation()
    time.sleep(0.5)
    
    initial_x = walker.state.get('torso_x', 200)
    
    for _ in range(steps):
        s = walker.state
        if not s:
            time.sleep(0.05)
            continue
            
        # Construct state vector (relative angles are better, but using raw for simplicity)
        # Normalize roughly
        state_vec = np.array([
            s.get('torso_angle', 0),
            s.get('left_thigh_angle', 0),
            s.get('left_calf_angle', 0),
            s.get('right_thigh_angle', 0),
            s.get('right_calf_angle', 0),
            1.0 # bias
        ])
        
        action = get_action(weights, state_vec)
        walker.apply_action(action)
        time.sleep(0.05)
        
    final_x = walker.state.get('torso_x', initial_x)
    return final_x - initial_x

# Initialize weights (4 actions x 6 state vars)
best_weights = np.random.randn(4, 6) * 0.1
best_reward = -float('inf')

# Simple Random Search / Mutation loop
for i in range(10):
    # Mutate
    candidate_weights = best_weights + np.random.randn(4, 6) * 0.5
    reward = run_episode(candidate_weights, steps=50)
    
    print(f"Episode {i}: Reward {reward:.2f}")
    
    if reward > best_reward:
        best_reward = reward
        best_weights = candidate_weights
        print("New best!")

print("Training done. Running best policy...")
run_episode(best_weights, steps=200)