In [1]:
import os
os.environ['NUMEXPR_MAX_THREADS'] = '1'

import logging
import numexpr as ne
import numpy as np
import torch

from ddopai.experiments.meta_experiment_functions import *
import requests
import yaml
import re
import pandas as pd
import wandb
from copy import deepcopy
import warnings
import gc
from mushroom_rl import core 
import pickle
from tqdm import tqdm, trange
import random

In [2]:
config_env = import_config("config_env.yaml")
config_dist = import_config("config_dist.yaml")

INFO:root:Configuration file 'config_env.yaml' successfully loaded.
INFO:root:Configuration file 'config_dist.yaml' successfully loaded.


current: multiple env_kwargs with the specific values for each env

In [3]:
def sample_param(spec):
    t = spec["type"]
    if t == "uniform":
        return np.random.uniform(spec["low"], spec["high"])
    if t == "categorical":
        return random.choice(spec["values"])
    if t == "lognormal":
        return np.random.lognormal(mean=spec["mu"], sigma=spec["sigma"])
    raise ValueError(f"Unknown dist type {t}")

def build_task_kwargs(dist_config, status = "train", id=None):
    task_kwargs = {"id": id}
    status_config = dist_config[status]
    for key, spec in status_config["parameters"].items():
        val = sample_param(spec)
        # maybe round floats for readability:
        if isinstance(val, float):
            val = float(np.round(val, 4))
        task_kwargs[key] = val

    # handle dependent parameters, e.g. alpha/beta sparsity
    if task_kwargs["nb_features"] > 1:
        d = task_kwargs["nb_features"]
        sparsity = task_kwargs["sparsity_parameter"]
        mean_a = task_kwargs["mean_alpha"]
        mean_b = task_kwargs["mean_beta"]
        cov_p = task_kwargs["covariance"]
        alpha = np.random.multivariate_normal(
            mean=np.ones(d)*mean_a, 
            cov=np.eye(d)*cov_p
        )
        beta = np.random.multivariate_normal(
            mean=np.ones(d)*mean_b, 
            cov=np.eye(d)*cov_p
        )
        # zero out per sparsity
        zero_idx_a = np.random.choice(d, int(sparsity*d), replace=False)
        zero_idx_b = np.random.choice(d, int(sparsity*d), replace=False)
        alpha[zero_idx_a] = 0
        beta[zero_idx_b] = 0
        task_kwargs["alpha"] = [round(float(x), 2) for x in alpha]
        task_kwargs["beta"]  = [round(float(x), 2) for x in beta]
    else:
        # degenerate 1‐dim case
        task_kwargs["alpha"] = [round(float(task_kwargs["mean_alpha"]),2)]
        task_kwargs["beta"]  = [round(float(task_kwargs["mean_beta"]),2)]

    # convert env_class name → actual class and stash into kwargs
    return task_kwargs


In [None]:
size_train = config_env["size_train"]
size_val = config_env["size_val"]
size_test = config_env["size_test"]
train_tasks = [build_task_kwargs(config_dist, status="train", id=id) for id in range(size_train)]
val_tasks = [build_task_kwargs(config_dist, status="val", id=id) for id in range(size_val)]
test_tasks = [build_task_kwargs(config_dist, status="test", id=id) for id in range(size_test)]

if config_env["const_params"]:
    first_alpha = train_tasks[0]["alpha"]
    first_beta = train_tasks[0]["beta"]

    for task in train_tasks + val_tasks + test_tasks:
        task["alpha"] = first_alpha
        task["beta"] = first_beta


KeyError: 'constant_params'

In [None]:
config_env["env_kwargs"]["train_tasks"] = train_tasks
config_env["env_kwargs"]["val_tasks"] = val_tasks
config_env["env_kwargs"]["test_tasks"] = test_tasks

In [None]:
class NoAliasDumper(yaml.SafeDumper):
    def ignore_aliases(self, data):
        return True

with open("config_env.yaml", "w") as file:
    yaml.dump(config_env, file, default_flow_style=False, Dumper=NoAliasDumper)

In [None]:
config_env

{'env_class': 'DynamicPricingEnv',
 'env_kwargs': {'gamma': 0.0,
  'horizon_train': 500,
  'p_bound_high': 5.0,
  'p_bound_low': 0.0,
  'test_tasks': [{'id': 0,
    'horizon': 500,
    'nb_features': 5,
    'sparsity_parameter': 0.0,
    'noise_std': 0.1,
    'function_form': 'linear',
    'mean_alpha': 1.2,
    'mean_beta': -0.3,
    'covariance': 0.0,
    'sparsitiy': 0.0,
    'inv': False,
    'refernce_effects': False,
    'inv_level': 3000,
    'alpha': [1.2, 1.2, 1.2, 1.2, 1.2],
    'beta': [-0.3, -0.3, -0.3, -0.3, -0.3]},
   {'id': 1,
    'horizon': 500,
    'nb_features': 5,
    'sparsity_parameter': 0.0,
    'noise_std': 0.1,
    'function_form': 'linear',
    'mean_alpha': 1.2,
    'mean_beta': -0.3,
    'covariance': 0.0,
    'sparsitiy': 0.0,
    'inv': False,
    'refernce_effects': False,
    'inv_level': 3000,
    'alpha': [1.2, 1.2, 1.2, 1.2, 1.2],
    'beta': [-0.3, -0.3, -0.3, -0.3, -0.3]},
   {'id': 2,
    'horizon': 500,
    'nb_features': 5,
    'sparsity_parameter