In [1]:
%load_ext autoreload
%autoreload 2

In [8]:
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions.categorical import Categorical
import matplotlib.pyplot as plt
import copy

In [3]:
import sys
sys.argv = [""]

In [4]:
from cart_pole.utils import *
from cart_pole.agent import *
from torch.utils.tensorboard import SummaryWriter
from cart_pole.environment import *
import pdb

In [5]:
args = parse_args(seconds_since_epoch=1764177877)
writer = init_tracking(args)

In [6]:
envs = make_sync_vector_env(args)
agent = Agent(envs).to(args.device)
storage = Storage(args, envs)
optimizer = optim.Adam(agent.parameters(),
                       lr=args.learning_rate,
                       eps=1e-5)

num_updates = args.total_timesteps // args.batch_size
storage.rollout(agent, envs)

In [75]:
class MinMaxScaler(object):

    def __init__(self,
                 args,
                 envs):

        obs_space = copy.deepcopy(envs.single_observation_space)

        self.is_bounded =\
            np.logical_and(obs_space.bounded_above,
                           obs_space.bounded_below)

        self.is_unbounded =\
            np.logical_not(self.is_bounded)

        self.is_bounded = torch.tensor(self.is_bounded)
        self.is_unbounded = torch.tensor(self.is_unbounded)
        
        self.low = torch.tensor(obs_space.low)
        self.inv_range = torch.ones_like(self.low)

        self.inv_range[self.is_bounded] =\
            1 / (torch.tensor(obs_space.high[self.is_bounded]) -\
                 self.low[self.is_bounded])

        self.inv_range = self.inv_range.to(args.device)
        self.low = self.low.to(args.device)

    def __call__(self,
                 x):

        scaled_x = x.clone()
        
        target_shape = torch.tensor(scaled_x.shape)
        expand_shape = [1] *(len(target_shape) - 1) + [-1]
        
        low_nd = self.low.view(*expand_shape).expand(*target_shape)

        inv_range_nd =\
            self.inv_range.view(*expand_shape).expand(*target_shape)
        
        is_unbounded_nd =\
            self.is_unbounded.view(*expand_shape).expand(*target_shape)
        
        is_bounded_nd =\
            self.is_bounded.view(*expand_shape).expand(*target_shape)

        scaled_x[is_bounded_nd] =\
            (scaled_x[is_bounded_nd] - low_nd[is_bounded_nd]) *\
            inv_range_nd[is_bounded_nd]
        
        scaled_x[is_unbounded_nd] =\
            scaled_x[is_unbounded_nd] / (1 + scaled_x[is_unbounded_nd].abs())

        return scaled_x

obs_scaler = MinMaxScaler(args, envs)

In [76]:
scaled_x = obs_scaler(storage.obs)

In [94]:
pd.DataFrame(scaled_x.reshape((-1,) + (4,)).cpu()).describe()

Unnamed: 0,0,1,2,3
count,2000.0,2000.0,2000.0,2000.0
mean,0.500069,-0.03459,0.512171,0.045753
std,0.010497,0.298109,0.119629,0.375724
min,0.461286,-0.718678,0.208226,-0.738429
25%,0.495258,-0.28413,0.444302,-0.274012
50%,0.499754,-0.019338,0.517513,0.041111
75%,0.504137,0.188389,0.579417,0.378998
max,0.596537,0.693009,0.805515,0.767597


In [95]:
pd.DataFrame(storage.obs.reshape((-1,) + (4,)).cpu()).describe()

Unnamed: 0,0,1,2,3
count,2000.0,2000.0,2000.0,2000.0
mean,0.000662,-0.056026,0.010197,0.099258
std,0.100767,0.539438,0.10022,0.805893
min,-0.371652,-2.554643,-0.244436,-2.823061
25%,-0.04552,-0.396902,-0.046661,-0.377433
50%,-0.002361,-0.01972,0.014671,0.042874
75%,0.039715,0.232117,0.066532,0.610301
max,0.926756,2.257421,0.255948,3.302878
