In [1]:
import logging

from rlduels.src.primitives.trajectory_pair import Transition, Trajectory, TrajectoryPair, NDArray

from rlduels.src.database.database_manager import MongoDBManager

from rlduels.src.create_video import create_video_from_pair

from rlduels.src.env_wrapper import EnvWrapper, GymWrapper

logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s - %(levelname)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S')

In [2]:
import gymnasium as gym

def run_cartpole():
    # Create the CartPole environment
    env = GymWrapper.create_env(name='CartPole-v1', render_mode="rgb_array")

    # Define the number of episodes you want to run
    num_episodes = 5

    trajectories = []

    for episode in range(num_episodes):
        # Reset the environment for a new episode
        seed = 42
        observation, info = env.reset(seed=42)
        print("Observation:", observation)
        total_reward = 0
        done = False

        transitions = []

        while not done:
            env.render()

            action = env.sample_action()

            next_observation, reward, terminated, truncated, info = env.step(action)

            transitions.append(Transition.create(
                state=observation,
                action=action,
                reward=reward,
                terminated=terminated,
                truncated=truncated,
                next_state=next_observation
            ))

            observation = next_observation
            total_reward += reward
            done = terminated or truncated
        
        trajectories.append(Trajectory(
            env_name = env.name,
            information={'seed': 42},
            transitions=transitions
        ))

        if done:
            print(f"Episode {episode + 1}: Total reward = {total_reward}")

    # Close the environment
    env.close()

    # Return trajectories for further analysis if necessary
    return trajectories

In [3]:
trajs = run_cartpole()

x = TrajectoryPair(trajectory1=trajs[0], trajectory2=trajs[1])

x.env_name

Observation: [ 0.0273956  -0.00611216  0.03585979  0.0197368 ]
Episode 1: Total reward = 14.0
Observation: [ 0.0273956  -0.00611216  0.03585979  0.0197368 ]
Episode 2: Total reward = 39.0
Observation: [ 0.0273956  -0.00611216  0.03585979  0.0197368 ]
Episode 3: Total reward = 11.0
Observation: [ 0.0273956  -0.00611216  0.03585979  0.0197368 ]
Episode 4: Total reward = 20.0
Observation: [ 0.0273956  -0.00611216  0.03585979  0.0197368 ]
Episode 5: Total reward = 23.0


'CartPole-v1'

In [4]:
trajs[0].transitions[0].get_state()

array([ 0.0273956 , -0.00611216,  0.03585979,  0.0197368 ], dtype=float32)

In [5]:
create_video_from_pair(x)

Creating env!
{'CartPole-v1': <rlduels.src.env_wrapper.GymWrapper object at 0x79601af73a30>}
Env created
action:  0
action:  0
action:  0
action:  0
action:  1
action:  1
action:  0
action:  1
action:  1
action:  1
action:  1
action:  0
action:  0
action:  1
Frames1: [array([[[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       ...,

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [255, 255, 255],
        [255, 255, 255],
        [255, 255, 255]],

       [[255, 255, 255],
        [255, 255, 255],
        

OpenCV: FFMPEG: tag 0x30385056/'VP80' is not supported with codec id 139 and format 'webm / WebM'
OpenCV: FFMPEG: tag 0x30385056/'VP80' is not supported with codec id 139 and format 'webm / WebM'


(PosixPath('/home/student/Code/RLDuels/trajectory_20240506_213603.webm'),
 PosixPath('/home/student/Code/RLDuels/trajectory_20240506_213603.webm'))

In [6]:
db = MongoDBManager()

In [7]:
print(db.add_entry(x))
print(db.find_entry(x))
print(db.delete_entry(x))
print(db.find_entry(x))

2024-05-06 21:36:03 - DEBUG - {"message": "Server selection started", "selector": "<function writable_server_selector at 0x7960180314c0>", "operation": "insert", "topologyDescription": "<TopologyDescription id: 663931238b5cb0b88cdcfb0f, topology_type: Single, servers: [<ServerDescription ('localhost', 27017) server_type: Standalone, rtt: 0.0009734889972605743>]>", "clientId": {"$oid": "663931238b5cb0b88cdcfb0f"}}
2024-05-06 21:36:03 - DEBUG - {"message": "Server selection succeeded", "selector": "<function writable_server_selector at 0x7960180314c0>", "operation": "insert", "topologyDescription": "<TopologyDescription id: 663931238b5cb0b88cdcfb0f, topology_type: Single, servers: [<ServerDescription ('localhost', 27017) server_type: Standalone, rtt: 0.0009734889972605743>]>", "clientId": {"$oid": "663931238b5cb0b88cdcfb0f"}, "serverHost": "localhost", "serverPort": 27017}
2024-05-06 21:36:03 - DEBUG - {"clientId": {"$oid": "663931238b5cb0b88cdcfb0f"}, "message": "Command started", "comm

('Added 1005b7ff-4acb-46a6-81bb-3620d7daa0d0 to the database', None)
None
('Entry with ID 1005b7ff-4acb-46a6-81bb-3620d7daa0d0 successfully deleted.', None)
None


In [8]:
db.close_db()

2024-05-06 21:36:03 - DEBUG - Deleting every entry from the database.
2024-05-06 21:36:03 - INFO - Database closed successfully.


In [9]:
import json

serialized_data = x.json()

data_to_store = json.loads(serialized_data)

data_to_store['_id'] = data_to_store.pop('id')

print(data_to_store)

{'trajectory1': {'env_name': 'CartPole-v1', 'information': {'seed': 42}, 'transitions': [{'state': {'array': [0.02739560417830944, -0.006112155970185995, 0.03585979342460632, 0.019736802205443382]}, 'action': {'array': 0}, 'next_state': {'array': [0.02727336250245571, -0.20172953605651855, 0.036254528909921646, 0.32351475954055786]}, 'reward': 1.0, 'terminated': False, 'truncated': False}, {'state': {'array': [0.02727336250245571, -0.20172953605651855, 0.036254528909921646, 0.32351475954055786]}, 'action': {'array': 0}, 'next_state': {'array': [0.02323877066373825, -0.39734846353530884, 0.04272482171654701, 0.6274068355560303]}, 'reward': 1.0, 'terminated': False, 'truncated': False}, {'state': {'array': [0.02323877066373825, -0.39734846353530884, 0.04272482171654701, 0.6274068355560303]}, 'action': {'array': 0}, 'next_state': {'array': [0.015291801653802395, -0.593039870262146, 0.05527295917272568, 0.933233380317688]}, 'reward': 1.0, 'terminated': False, 'truncated': False}, {'state':

In [10]:
import gymnasium

print(gymnasium.envs.registry.keys())

dict_keys(['CartPole-v0', 'CartPole-v1', 'MountainCar-v0', 'MountainCarContinuous-v0', 'Pendulum-v1', 'Acrobot-v1', 'phys2d/CartPole-v0', 'phys2d/CartPole-v1', 'phys2d/Pendulum-v0', 'LunarLander-v2', 'LunarLanderContinuous-v2', 'BipedalWalker-v3', 'BipedalWalkerHardcore-v3', 'CarRacing-v2', 'Blackjack-v1', 'FrozenLake-v1', 'FrozenLake8x8-v1', 'CliffWalking-v0', 'Taxi-v3', 'tabular/Blackjack-v0', 'tabular/CliffWalking-v0', 'Reacher-v2', 'Reacher-v4', 'Pusher-v2', 'Pusher-v4', 'InvertedPendulum-v2', 'InvertedPendulum-v4', 'InvertedDoublePendulum-v2', 'InvertedDoublePendulum-v4', 'HalfCheetah-v2', 'HalfCheetah-v3', 'HalfCheetah-v4', 'Hopper-v2', 'Hopper-v3', 'Hopper-v4', 'Swimmer-v2', 'Swimmer-v3', 'Swimmer-v4', 'Walker2d-v2', 'Walker2d-v3', 'Walker2d-v4', 'Ant-v2', 'Ant-v3', 'Ant-v4', 'Humanoid-v2', 'Humanoid-v3', 'Humanoid-v4', 'HumanoidStandup-v2', 'HumanoidStandup-v4', 'GymV26Environment-v0', 'GymV21Environment-v0', 'Adventure-v0', 'AdventureDeterministic-v0', 'AdventureNoFrameskip-v0