In [None]:
# envs/decision_env.py
import gym
import numpy as np

class DecisionEnv(gym.Env):
    """
    Simple env: observation is numeric vector (business features).
    Action: discrete choices (e.g., 0: keep,1:promote,2:discount,3:delay)
    Reward: simulated KPI (e.g., profit uplift).
    """
    metadata = {"render.modes": ["human"]}

    def __init__(self, obs_dim=10, n_actions=4, max_steps=20):
        super().__init__()
        self.obs_dim = obs_dim
        self.action_space = gym.spaces.Discrete(n_actions)
        self.observation_space = gym.spaces.Box(low=-10, high=10, shape=(obs_dim,), dtype=np.float32)
        self.max_steps = max_steps
        self.reset()

    def reset(self):
        self.steps = 0
        # random context / customer profile
        self.state = np.random.randn(self.obs_dim).astype(np.float32)
        return self.state

    def step(self, action):
        # simulate reward: dot product w/ action-specific factor + noise
        base = float(np.tanh(self.state.sum() * 0.1))
        action_effect = (action - 1.5) * 0.1  # simple mapping
        reward = base + action_effect + np.random.randn() * 0.01
        self.steps += 1
        done = self.steps >= self.max_steps
        # small observation drift
        self.state = (self.state * 0.99 + np.random.randn(self.obs_dim) * 0.01).astype(np.float32)
        return self.state, reward, done, {}
