In [1]:
import numpy as np
import gymnasium as gym
import pygame
import csv
import time

Fixed Initial Position and Speed, not robust for expert policy 

In [2]:
def run_continuous_action_test_with_initial_state(episodes, render=False):
    # 初始化环境
    env = gym.make('MountainCarContinuous-v0', render_mode='human' if render else None)

    # 初始化 pygame
    pygame.init()

    # 初始化手柄
    pygame.joystick.init()

    # 检查是否连接了手柄
    if pygame.joystick.get_count() > 0:
        joystick = pygame.joystick.Joystick(0)
        joystick.init()
        print(f"Initialized Joystick: {joystick.get_name()}")

        # 获取手柄的轴数量
        num_axes = joystick.get_numaxes()
        print(f"Number of axes: {num_axes}")

        for i in range(episodes):
            # 手动设置小车的初始状态为底部，位置接近 -1.2，速度为 0
            env.reset()
            env.env.state = np.array([-1.2, 0.0]) # not correct, has already reseted in the env.reset()
            
            terminated = False  # True when reached goal

            # 为每个 episode 创建一个 CSV 文件
            filename = f"episode_{i+1}.csv"
            with open(filename, mode='w', newline='') as file:
                writer = csv.writer(file)
                # 写入 CSV 文件的头部
                writer.writerow(["timestamp", "action", "position", "velocity"])

                while not terminated:
                    for event in pygame.event.get():
                        if event.type == pygame.QUIT:
                            terminated = True
                            break

                    # 读取手柄第一个轴的数值作为动作
                    axis_values = [joystick.get_axis(i) for i in range(num_axes)]
                    action = axis_values[0]  # 使用第一个轴的值作为动作 [-1.0, 1.0]

                    # 获取当前时间戳
                    timestamp = time.time()

                    # 应用动作并观察小车的运动
                    new_state, reward, terminated, _, _ = env.step([action])

                    # 打印小车的新状态
                    print(f"Episode {i+1}, Action: {action:.4f}")
                    print(f"New State: Position = {new_state[0]:.4f}, Velocity = {new_state[1]:.4f}\n")

                    # 将数据写入 CSV 文件
                    writer.writerow([timestamp, action, new_state[0], new_state[1]])

                    # 如果小车达到目标或者回到起始位置，终止循环
                    if terminated:
                        break

        env.close()
    else:
        print("No joystick found.")

    # 退出 pygame
    pygame.quit()

if __name__ == '__main__':
    run_continuous_action_test_with_initial_state(10, render=True)


Initialized Joystick: Logicool Dual Action
Number of axes: 4
Episode 1, Action: 0.0000
New State: Position = -0.4498, Velocity = -0.0006

Episode 1, Action: 0.0000
New State: Position = -0.4509, Velocity = -0.0011

Episode 1, Action: 0.0000
New State: Position = -0.4525, Velocity = -0.0016

Episode 1, Action: 0.0000
New State: Position = -0.4547, Velocity = -0.0022

Episode 1, Action: 0.0000
New State: Position = -0.4574, Velocity = -0.0027

Episode 1, Action: 0.0000
New State: Position = -0.4605, Velocity = -0.0032

Episode 1, Action: 0.0000
New State: Position = -0.4642, Velocity = -0.0036

Episode 1, Action: 0.0000
New State: Position = -0.4683, Velocity = -0.0041

Episode 1, Action: 0.0000
New State: Position = -0.4728, Velocity = -0.0045

Episode 1, Action: 0.0000
New State: Position = -0.4777, Velocity = -0.0049

Episode 1, Action: 0.0000
New State: Position = -0.4829, Velocity = -0.0052

Episode 1, Action: 0.0000
New State: Position = -0.4884, Velocity = -0.0055

Episode 1, Acti

Random Inital Position and Speed, Robust for expert policy, 2 failed versions, cannot set initial state to the car and environment

To deal with this problem, 2 trials are proposed: 

## delete some datas manually -- > failed ?

## find the set function in gymnasium mountain car continous --> found and success 

this one good random, but not very big difference

In [None]:
def run_continuous_action_test_with_random_initial_state(episodes, render=False):
    # 初始化环境
    env = gym.make('MountainCarContinuous-v0', render_mode='human' if render else None)

    # 初始化 pygame
    pygame.init()

    # 初始化手柄
    pygame.joystick.init()

    # 检查是否连接了手柄
    if pygame.joystick.get_count() > 0:
        joystick = pygame.joystick.Joystick(0)
        joystick.init()
        print(f"Initialized Joystick: {joystick.get_name()}")

        # 获取手柄的轴数量
        num_axes = joystick.get_numaxes()
        print(f"Number of axes: {num_axes}")

        for i in range(episodes):
            # 显示倒数和回合开始信息
            print("Episode {} start in: ".format(i+1), end="")
            for countdown in range(2, 0, -1):
                print("{} ".format(countdown), end="", flush=True)
                time.sleep(1)
            print("\nEpisode {} start!".format(i+1))

            # 随机设置小车的初始状态
            initial_position = np.random.uniform(-1.2, 0.6)
            initial_velocity = np.random.uniform(0.07,0.6)
            env.reset()
            # env.env.state = np.array([initial_position, initial_velocity])

            terminated = False  # True when reached goal

            # 为每个 episode 创建一个 CSV 文件
            filename = f"episode_{i+1}.csv"
            with open(filename, mode='w', newline='') as file:
                writer = csv.writer(file)
                # 写入 CSV 文件的头部
                writer.writerow(["timestamp", "action", "position", "velocity"])

                while not terminated:
                    for event in pygame.event.get():
                        if event.type == pygame.QUIT:
                            terminated = True
                            break

                    # 读取手柄第一个轴的数值作为动作
                    axis_values = [joystick.get_axis(i) for i in range(num_axes)]
                    action = axis_values[0]  # 使用第一个轴的值作为动作 [-1.0, 1.0]

                    # 获取当前时间戳
                    timestamp = time.time()

                    # 应用动作并观察小车的运动
                    new_state, reward, terminated, _, _ = env.step([action])

                    # 打印小车的新状态
                    print(f"Episode {i+1}, Action: {action:.4f}")
                    print(f"New State: Position = {new_state[0]:.4f}, Velocity = {new_state[1]:.4f}\n")

                    # 将数据写入 CSV 文件
                    writer.writerow([timestamp, action, new_state[0], new_state[1]])

                    # 如果小车达到目标或者回到起始位置，终止循环
                    if terminated:
                        break

        env.close()
    else:
        print("No joystick found.")

    # 退出 pygame
    pygame.quit()

if __name__ == '__main__':
    run_continuous_action_test_with_random_initial_state(50, render=True)

This script has not been verified, do not use 

In [None]:
def run_continuous_action_test_with_random_initial_state(episodes, render=False):
    # 初始化环境
    env = gym.make('MountainCarContinuous-v0', render_mode='human' if render else None)

    # 初始化 pygame
    pygame.init()

    # 初始化手柄
    pygame.joystick.init()

    # 检查是否连接了手柄
    if pygame.joystick.get_count() > 0:
        joystick = pygame.joystick.Joystick(0)
        joystick.init()
        print(f"Initialized Joystick: {joystick.get_name()}")

        # 获取手柄的轴数量
        num_axes = joystick.get_numaxes()
        print(f"Number of axes: {num_axes}")

        for i in range(episodes):
            # 显示倒数和回合开始信息
            print("Episode {} start in: ".format(i+1), end="")
            for countdown in range(2, 0, -1):
                print("{} ".format(countdown), end="", flush=True)
                time.sleep(1)
            print("\nEpisode {} start!".format(i+1))

            # 随机设置小车的初始状态
            initial_position = np.random.uniform(-1.2, 0.6)
            initial_velocity = np.random.uniform(-0.07, 0.07)  # 速度允许范围

            # 使用 reset 并传递随机初始化位置和速度
            env.reset(options={"state": [initial_position, initial_velocity]})

            terminated = False  # True when reached goal

            # 为每个 episode 创建一个 CSV 文件
            filename = f"episode_{i+1}.csv"
            with open(filename, mode='w', newline='') as file:
                writer = csv.writer(file)
                # 写入 CSV 文件的头部
                writer.writerow(["timestamp", "action", "position", "velocity"])

                while not terminated:
                    for event in pygame.event.get():
                        if event.type == pygame.QUIT:
                            terminated = True
                            break

                    # 读取手柄第一个轴的数值作为动作
                    axis_values = [joystick.get_axis(i) for i in range(num_axes)]
                    action = axis_values[0]  # 使用第一个轴的值作为动作 [-1.0, 1.0]

                    # 获取当前时间戳
                    timestamp = time.time()

                    # 应用动作并观察小车的运动
                    new_state, reward, terminated, _, _ = env.step([action])

                    # 打印小车的新状态
                    print(f"Episode {i+1}, Action: {action:.4f}")
                    print(f"New State: Position = {new_state[0]:.4f}, Velocity = {new_state[1]:.4f}\n")

                    # 将数据写入 CSV 文件
                    writer.writerow([timestamp, action, new_state[0], new_state[1]])

                    # 如果小车达到目标或者回到起始位置，终止循环
                    if terminated:
                        break

        env.close()
    else:
        print("No joystick found.")

    # 退出 pygame
    pygame.quit()

if __name__ == '__main__':
    run_continuous_action_test_with_random_initial_state(30, render=True)

### Finally, the initial position and velocity can be set correctly, but need to be limited before the goal (before 0.45)

In [None]:
def run_continuous_action_test_with_random_initial_state(episodes, render=False):
    # 初始化环境
    env = gym.make('MountainCarContinuous-v0', render_mode='human' if render else None)

    # 初始化 pygame
    pygame.init()

    # 初始化手柄
    pygame.joystick.init()

    # 检查是否连接了手柄
    if pygame.joystick.get_count() > 0:
        joystick = pygame.joystick.Joystick(0)
        joystick.init()
        print(f"Initialized Joystick: {joystick.get_name()}")

        # 获取手柄的轴数量
        num_axes = joystick.get_numaxes()
        print(f"Number of axes: {num_axes}")

        for i in range(episodes):
            # 显示倒数和回合开始信息
            print("Episode {} start in: ".format(i+1), end="")
            for countdown in range(2, 0, -1):
                print("{} ".format(countdown), end="", flush=True)
                time.sleep(1)
            print("\nEpisode {} start!".format(i+1))

            # 随机设置小车的初始状态
            initial_position = np.random.uniform(-1.2, 0.44)
            initial_velocity = np.random.uniform(-0.07, 0.07)  # 速度允许范围

            # 重置环境，并手动设置小车的初始状态
            env.reset()  # 重置环境
            env.unwrapped.state = np.array([initial_position, initial_velocity])  # 手动修改状态

            terminated = False  # True when reached goal

            # 为每个 episode 创建一个 CSV 文件
            filename = f"episode_{i+1}.csv"
            with open(filename, mode='w', newline='') as file:
                writer = csv.writer(file)
                # 写入 CSV 文件的头部
                writer.writerow(["timestamp", "action", "position", "velocity"])

                while not terminated:
                    for event in pygame.event.get():
                        if event.type == pygame.QUIT:
                            terminated = True
                            break

                    # 读取手柄第一个轴的数值作为动作
                    axis_values = [joystick.get_axis(i) for i in range(num_axes)]
                    action = axis_values[0]  # 使用第一个轴的值作为动作 [-1.0, 1.0]

                    # 获取当前时间戳
                    timestamp = time.time()

                    # 应用动作并观察小车的运动
                    new_state, reward, terminated, _, _ = env.step([action])

                    # 打印小车的新状态
                    print(f"Episode {i+1}, Action: {action:.4f}")
                    print(f"New State: Position = {new_state[0]:.4f}, Velocity = {new_state[1]:.4f}\n")

                    # 将数据写入 CSV 文件
                    writer.writerow([timestamp, action, new_state[0], new_state[1]])

                    # 如果小车达到目标或者回到起始位置，终止循环
                    if terminated:
                        break

        env.close()
    else:
        print("No joystick found.")

    # 退出 pygame
    pygame.quit()

if __name__ == '__main__':
    run_continuous_action_test_with_random_initial_state(10, render=True)