## 归一化处理与反归一化处理

不进行归一化处理

In [2]:
import gym
import numpy as np

env = gym.make("CartPole-v1")
obs = env.reset()

for episode in range(5):
    total_reward = 0
    obs = env.reset()
    done = False
    while not done:
        action = env.action_space.sample()  # 随机动作
        state, reward, done, truncated, info  = env.step(action)
        total_reward += reward
    print(f"Episode {episode + 1}: Total Reward without Normalization = {total_reward}")

env.close()

Episode 1: Total Reward without Normalization = 27.0
Episode 2: Total Reward without Normalization = 25.0
Episode 3: Total Reward without Normalization = 15.0
Episode 4: Total Reward without Normalization = 16.0
Episode 5: Total Reward without Normalization = 36.0


进行状态归一化处理

In [3]:
import gym
import numpy as np

class NormalizedEnv(gym.ObservationWrapper):
    def __init__(self, env):
        super(NormalizedEnv, self).__init__(env)
        self.mean = np.zeros(env.observation_space.shape)
        self.std = np.ones(env.observation_space.shape)

    def observation(self, observation):
        return (observation - self.mean) / (self.std + 1e-8)

env = NormalizedEnv(gym.make("CartPole-v1"))

for episode in range(5):
    total_reward = 0
    obs = env.reset()
    done = False
    while not done:
        action = env.action_space.sample()  # 随机动作
        state, reward, done, truncated, info = env.step(action)
        total_reward += reward
    print(f"Episode {episode + 1}: Total Reward with Normalization = {total_reward}")

env.close()

Episode 1: Total Reward with Normalization = 13.0
Episode 2: Total Reward with Normalization = 19.0
Episode 3: Total Reward with Normalization = 17.0
Episode 4: Total Reward with Normalization = 13.0
Episode 5: Total Reward with Normalization = 16.0


## state区别

## reward 区别

## action 区别