In [1]:
import os
os.environ['NUMEXPR_MAX_THREADS'] = '1'

import logging
import numexpr as ne
import numpy as np
import torch

from ddopai.experiments.meta_experiment_functions import *
import requests
import yaml
import re
import pandas as pd
import wandb
from copy import deepcopy
import warnings
import gc
from mushroom_rl import core 
import pickle
from tqdm import tqdm, trange

In [None]:
config_env = import_config("config_env.yaml")
config_dist = import_config("config_dist.yaml")

INFO:root:Configuration file 'config_env.yaml' successfully loaded.


current: multiple env_kwargs with the specific values for each env

In [None]:
def sample_param(spec):
    t = spec["type"]
    if t == "uniform":
        return np.random.uniform(spec["low"], spec["high"])
    if t == "categorical":
        return np.random.choice(spec["choices"])
    if t == "lognormal":
        return np.random.lognormal(mean=spec["mu"], sigma=spec["sigma"])
    raise ValueError(f"Unknown dist type {t}")

def build_task_kwargs(dist_config, status = "train"):
    task_kwargs = {}
    for key, spec in dist_config[status]["parameters"].items():
        val = sample_param(spec)
        # maybe round floats for readability:
        if isinstance(val, float):
            val = float(np.round(val, 4))
        task_kwargs[key] = val

    # handle dependent parameters, e.g. alpha/beta sparsity
    if task_kwargs["nb_features"] > 1:
        d = task_kwargs["nb_features"]
        sparsity = task_kwargs["sparsity_parameter"]
        mean_a = task_kwargs["parameters"]["mean_alpha"]["choices"]
        mean_b = task_kwargs["parameters"]["mean_beta"]["choices"]
        cov_p = task_kwargs["parameters"]["covariance"]["choices"]
        alpha = np.random.multivariate_normal(
            mean=np.ones(d)*mean_a, 
            cov=np.eye(d)*cov_p
        )
        beta = np.random.multivariate_normal(
            mean=np.ones(d)*mean_b, 
            cov=np.eye(d)*cov_p
        )
        # zero out per sparsity
        zero_idx_a = np.random.choice(d, int(sparsity*d), replace=False)
        zero_idx_b = np.random.choice(d, int(sparsity*d), replace=False)
        alpha[zero_idx_a] = 0
        beta[zero_idx_b] = 0
        task_kwargs["alpha"] = [round(float(x), 2) for x in alpha]
    else:
        # degenerate 1‐dim case
        task_kwargs["alpha"] = [round(float(task_kwargs["leading_alpha"]),2)]
        task_kwargs["beta"]  = [round(float(task_kwargs["leading_beta"]),2)]

    # convert env_class name → actual class and stash into kwargs
    return task_kwargs


In [None]:
env_kwargs = []
for episode in range(config_env["setup_kwargs"]["n_episodes"]):
    episode_kwargs = {}
    episode_kwargs['gamma'] = config_env["setup_kwargs"]["gamma"][episode % len(config_env["setup_kwargs"]["gamma"])] # Ausen
    episode_kwargs['env_class'] = config_env["setup_kwargs"]["env_class"][episode % len(config_env["setup_kwargs"]["env_class"])] # Ausen
    episode_kwargs['horizon_train'] = config_env["setup_kwargs"]["horizon_train"][episode % len(config_env["setup_kwargs"]["horizon_train"])] # Innen
    episode_kwargs['p_bound_low'] = config_env["setup_kwargs"]["p_bound_low"][episode % len(config_env["setup_kwargs"]["p_bound_low"])] # Ausen
    episode_kwargs['p_bound_high'] = config_env["setup_kwargs"]["p_bound_high"][episode % len(config_env["setup_kwargs"]["p_bound_high"])] # Ausen
    episode_kwargs['inv'] = config_env["setup_kwargs"]["inv"][episode % len(config_env["setup_kwargs"]["inv"])] # Innen
    episode_kwargs['nb_features'] = config_env["setup_kwargs"]["nb_features"][episode % len(config_env["setup_kwargs"]["nb_features"])] # Innen
    episode_kwargs['noise_std'] = config_env["setup_kwargs"]["noise_std"][episode % len(config_env["setup_kwargs"]["noise_std"])] # Innen
    episode_kwargs['function_form'] = config_env["setup_kwargs"]["function_form"][episode % len(config_env["setup_kwargs"]["function_form"])] # Innen
    episode_kwargs['env_type'] = config_env["setup_kwargs"]["env_type"][episode % len(config_env["setup_kwargs"]["env_type"])].copy()
    if episode_kwargs['nb_features'] > 1:
        alpha = np.random.multivariate_normal(
            mean=np.ones(episode_kwargs['nb_features'])* config_env["setup_kwargs"]["mean_alpha"][episode % len(config_env["setup_kwargs"]["mean_alpha"])],
            cov=np.eye(episode_kwargs['nb_features']) * config_env["setup_kwargs"]["covariance_parameter"][episode % len(config_env["setup_kwargs"]["covariance_parameter"])]
        )
        beta = np.random.multivariate_normal(
            mean=np.ones(episode_kwargs['nb_features']) * config_env["setup_kwargs"]["mean_beta"][episode % len(config_env["setup_kwargs"]["mean_beta"])],
            cov=np.eye(episode_kwargs['nb_features']) * config_env["setup_kwargs"]["covariance_parameter"][episode % len(config_env["setup_kwargs"]["covariance_parameter"])]
        )
        sparsity = config_env["setup_kwargs"]["sparsity_parameter"][episode % len(config_env["setup_kwargs"]["sparsity_parameter"])]
        num_zero_alpha = int(len(alpha) * sparsity)
        num_zero_beta = int(len(beta) * sparsity)

        zero_indices_alpha = np.random.choice(len(alpha), num_zero_alpha, replace=False)
        zero_indices_beta = np.random.choice(len(beta), num_zero_beta, replace=False)

        alpha[zero_indices_alpha] = 0
        beta[zero_indices_beta] = 0
    
        #alpha = np.insert(alpha, 0, config_env["setup_kwargs"]["leading_alpha"][episode % len(config_env["setup_kwargs"]["leading_alpha"])])
        #beta = np.insert(beta, 0, config_env["setup_kwargs"]["leading_beta"][episode % len(config_env["setup_kwargs"]["leading_beta"])])
    else:
        alpha = np.array([config_env["setup_kwargs"]["leading_alpha"][episode % len(config_env["setup_kwargs"]["leading_alpha"])]])
        beta = np.array([config_env["setup_kwargs"]["leading_beta"][episode % len(config_env["setup_kwargs"]["leading_beta"])]])
    episode_kwargs['alpha'] = [round(float(a), 2) for a in alpha]
    episode_kwargs['beta'] = [round(float(b), 2) for b in beta]
    episode_kwargs['env_type'].update({
        'mean_alpha': config_env["setup_kwargs"]["mean_alpha"][episode % len(config_env["setup_kwargs"]["mean_alpha"])],
        'mean_beta': config_env["setup_kwargs"]["mean_beta"][episode % len(config_env["setup_kwargs"]["mean_beta"])],
        'leading_alpha': config_env["setup_kwargs"]["leading_alpha"][episode % len(config_env["setup_kwargs"]["leading_alpha"])],
        'leading_beta': config_env["setup_kwargs"]["leading_beta"][episode % len(config_env["setup_kwargs"]["leading_beta"])],
        'covariance_parameter': config_env["setup_kwargs"]["covariance_parameter"][episode % len(config_env["setup_kwargs"]["covariance_parameter"])],
        'noise_std': config_env["setup_kwargs"]["noise_std"][episode % len(config_env["setup_kwargs"]["noise_std"])]
    })
    env_kwargs.append(episode_kwargs)
config_env["env_kwargs"] = env_kwargs

In [4]:
env_kwargs[2]

{'gamma': 0,
 'env_class': 'RL2DynamicPricingEnv',
 'horizon_train': 300,
 'p_bound_low': 0,
 'p_bound_high': 5,
 'inv': [3000.0],
 'nb_features': 5,
 'noise_std': 1,
 'function_form': ['linear'],
 'env_type': {'inv': False,
  'reference_price': False,
  'mean_alpha': 1.2,
  'mean_beta': -0.3,
  'leading_alpha': 10,
  'leading_beta': -1,
  'covariance_parameter': 0.2,
  'noise_std': 1},
 'alpha': [1.33, 0.79, 1.11, 1.57, 1.74],
 'beta': [-0.76, 0.3, 0.3, -0.67, -0.52]}

In [5]:
class NoAliasDumper(yaml.SafeDumper):
    def ignore_aliases(self, data):
        return True

with open("config_env.yaml", "w") as file:
    yaml.dump(config_env, file, default_flow_style=False, Dumper=NoAliasDumper)