In [1]:
import gymnasium as gym

env = gym.make("Hopper-v4", render_mode="human")

observation, info = env.reset(seed=42)
for _ in range(1000):
    action = env.action_space.sample()
    observation, reward, terminated, truncated, info = env.step(action)
    env.render()
    if terminated or truncated:
        observation, info = env.reset()
env.close()

In [4]:
env.action_space.shape[0]

3

In [14]:
print(env.observation_space)
print(env.observation_space.shape)
print("Lows:", env.observation_space.low, "High:", env.observation_space.high)
print(type(env.observation_space.low))

Box(-inf, inf, (11,), float64)
(11,)
Lows: [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf] High: [inf inf inf inf inf inf inf inf inf inf inf]
<class 'numpy.ndarray'>


In [16]:
list(zip(env.observation_space.low, env.observation_space.high))

[(-inf, inf),
 (-inf, inf),
 (-inf, inf),
 (-inf, inf),
 (-inf, inf),
 (-inf, inf),
 (-inf, inf),
 (-inf, inf),
 (-inf, inf),
 (-inf, inf),
 (-inf, inf)]

In [21]:
import torch as th
from torch import nn

class ParamModule(nn.Module):
    def __init__(self):
        super().__init__()
        self.param = nn.Parameter(th.rand(2))
    def forward(self, x: th.Tensor) -> th.Tensor:
        return self.param[0] * x + self.param[1]

In [31]:
params = ParamModule()
x = th.Tensor([2, 2])
print("x:", x)
print("params:", params.param)
print("params(x):", params(x))
print(params.parameters())

x: tensor([2., 2.])
params: Parameter containing:
tensor([0.9994, 0.7086], requires_grad=True)
params(x): tensor([2.7074, 2.7074], grad_fn=<AddBackward0>)
<generator object Module.parameters at 0x0000016C58746EB0>


In [39]:
model = ParamModule()
lr = 0.1
loss_fn = nn.MSELoss()
opt = th.optim.Adam(model.parameters(), lr=lr)

X = th.range(1, 10)
Y = 2 * X + 15

for epoch in range(10000):
    pred = model(X)
    loss = loss_fn(pred, Y)

    opt.zero_grad()
    loss.backward()
    opt.step()

    print(f"Epoch {epoch}:", model.param)



  X = th.range(1, 10)


Epoch 0: Parameter containing:
tensor([0.1872, 0.1447], requires_grad=True)
Epoch 1: Parameter containing:
tensor([0.2871, 0.2446], requires_grad=True)
Epoch 2: Parameter containing:
tensor([0.3868, 0.3444], requires_grad=True)
Epoch 3: Parameter containing:
tensor([0.4864, 0.4440], requires_grad=True)
Epoch 4: Parameter containing:
tensor([0.5857, 0.5434], requires_grad=True)
Epoch 5: Parameter containing:
tensor([0.6846, 0.6424], requires_grad=True)
Epoch 6: Parameter containing:
tensor([0.7832, 0.7412], requires_grad=True)
Epoch 7: Parameter containing:
tensor([0.8813, 0.8395], requires_grad=True)
Epoch 8: Parameter containing:
tensor([0.9788, 0.9373], requires_grad=True)
Epoch 9: Parameter containing:
tensor([1.0757, 1.0345], requires_grad=True)
Epoch 10: Parameter containing:
tensor([1.1718, 1.1312], requires_grad=True)
Epoch 11: Parameter containing:
tensor([1.2672, 1.2271], requires_grad=True)
Epoch 12: Parameter containing:
tensor([1.3618, 1.3224], requires_grad=True)
Epoch 13:

In [76]:
import numpy as np
device = th.device("cuda" if th.cuda.is_available() else "cpu")
x = [th.rand(3, dtype=th.float16) for i in range(5)]
print(x)
print(len(x[0]))
y = th.stack(x)
y.float()
y = y.to(device=device, dtype=th.float)
print(y)
print(type(y))
print(y.type())
print(y.device)
print(device)

[tensor([0.9868, 0.2300, 0.8413], dtype=torch.float16), tensor([0.4346, 0.3608, 0.9839], dtype=torch.float16), tensor([0.8110, 0.6636, 0.5249], dtype=torch.float16), tensor([0.1318, 0.4897, 0.2139], dtype=torch.float16), tensor([0.1748, 0.6147, 0.9346], dtype=torch.float16)]
3
tensor([[0.9868, 0.2300, 0.8413],
        [0.4346, 0.3608, 0.9839],
        [0.8110, 0.6636, 0.5249],
        [0.1318, 0.4897, 0.2139],
        [0.1748, 0.6147, 0.9346]], device='cuda:0')
<class 'torch.Tensor'>
torch.cuda.FloatTensor
cuda:0
cuda
