In [146]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

from rllib.agent import RandomAgent
from rllib.policy import RandomPolicy
from rllib.util import rollout_policy, rollout_agent
from rllib.dataset import TrajectoryDataset

from rllib.environment.systems import InvertedPendulum, CartPole
from rllib.environment import SystemEnvironment, GymEnvironment
import numpy as np 
from torch.utils.data import DataLoader
import torch


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [147]:
def termination_func(state):
    return np.abs(state[..., 0]) >= np.deg2rad(45)

def reward_func(state, action):
    return np.exp(-0.5 / (0.2 ** 2) * state[..., 0] ** 2)  

system = InvertedPendulum(mass=0.3, length=0.5, friction=0.005)
# system = CartPole(pendulum_mass=0.3, cart_mass=1, 
#                   length=0.5, rot_friction=0.005)
# 
initial_state = np.array([np.deg2rad(20), 0.])
# 
environment = SystemEnvironment(system, initial_state=initial_state, max_steps=50)
# environment = GymEnvironment('Pendulum-v0')

policy = RandomPolicy(dim_action=environment.dim_action, dim_state=environment.dim_observation,
                      num_action=environment.num_action
                      )

agent = RandomAgent(dim_action=environment.dim_action, dim_state=environment.dim_observation,
                    num_action=environment.num_action
                    )

dataset = TrajectoryDataset(sequence_length=4)
dataloader = DataLoader(dataset, batch_size=32, num_workers=0)


In [148]:
for episode in range(10):
    trajectory = rollout_policy(environment, policy)
    dataset.append(trajectory)
    print(len(trajectory))
    
for episode in range(10):
    rollout_agent(environment, agent)


50
50
50
50
50
50
50
50
50
50


In [149]:
dataset.shuffle()
dataloader = DataLoader(dataset, batch_size=32, num_workers=0, shuffle=False)
states = []
for epoch in range(2):
    for observation in dataloader:
        state, action, reward, next_state, done = observation
        print(state.shape)
        states.append(state)
        print(epoch)
        break 
    

torch.Size([32, 4, 2])
0
torch.Size([32, 4, 2])
1


In [150]:
print(state)

tensor([[[ 0.8317,  3.5919],
         [ 0.8691,  3.9002],
         [ 0.9086,  3.9996],
         [ 0.9493,  4.1376]],

        [[ 1.1152,  4.5441],
         [ 1.1621,  4.8529],
         [ 1.2106,  4.8297],
         [ 1.2590,  4.8547]],

        [[ 0.3809,  0.4908],
         [ 0.3863,  0.5896],
         [ 0.3917,  0.4962],
         [ 0.3974,  0.6417]],

        [[ 0.6748,  2.7309],
         [ 0.7027,  2.8356],
         [ 0.7314,  2.9121],
         [ 0.7615,  3.1173]],

        [[ 1.1244,  4.1043],
         [ 1.1670,  4.4091],
         [ 1.2125,  4.7028],
         [ 1.2603,  4.8491]],

        [[ 0.3501, -0.0806],
         [ 0.3499,  0.0444],
         [ 0.3517,  0.3038],
         [ 0.3546,  0.2838]],

        [[ 0.4634,  1.6257],
         [ 0.4805,  1.7833],
         [ 0.4995,  2.0220],
         [ 0.5192,  1.9284]],

        [[ 0.4588,  1.6080],
         [ 0.4757,  1.7708],
         [ 0.4929,  1.6824],
         [ 0.5102,  1.7673]],

        [[ 0.3568,  0.4183],
         [ 0.3615,  0.5222]