# Minari Offline dataset from Pandas dataframe

In [2]:
import numpy as np
import pandas as pd
from gymnasium.spaces import Box, Discrete
from minari import load_dataset
from mercury.rl.environment import create_minari_dataset_from_df

dataset_id = "manual-dataset-v0"

# Step 1: Mock data like a Spark dataframe
data = [
    {"episode_id": 1, "step": 0, "obs": [0.0, 0.0], "action": 1, "reward": 0.1, "terminated": False, "truncated": False, "info": {"goal": 1}},
    {"episode_id": 1, "step": 1, "obs": [0.1, 0.0], "action": 0, "reward": 0.2, "terminated": True,  "truncated": False, "info": {"goal": 1}},
    {"episode_id": 2, "step": 0, "obs": [1.0, 1.0], "action": 1, "reward": -0.1, "terminated": False, "truncated": False, "info": {"goal": 0}},
    {"episode_id": 2, "step": 1, "obs": [0.9, 1.0], "action": 1, "reward": -0.2, "terminated": False, "truncated": True,  "info": {"goal": 0}},
]
df = pd.DataFrame(data)

# Step 2: Define env metadata
obs_space = Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32)
act_space = Discrete(2)

# Step 3: Create dataset
create_minari_dataset_from_df(
    df=df,
    dataset_id=dataset_id,
    observation_space=obs_space,
    action_space=act_space,
    env_spec=None,
    algorithm_name="t_katt_omnidata",
    author="Mercury",
    description="Mocked dataset. Hopefully someday Spark extracted data.",
)

# Step 4: Validate
ds = load_dataset(dataset_id)
print(f"ðŸ“¦ Loaded dataset with {len(ds)} episodes.")
first_episode = ds[0]
print(f"ðŸ‘€ First observation: {first_episode.observations[0]}")

âœ… Dataset 'manual-dataset-v0' created successfully.
ðŸ“¦ Loaded dataset with 2 episodes.
ðŸ‘€ First observation: [0. 0.]


