In [1]:
%config InlineBackend.figure_format = 'svg'
%env MUJOCO_GL=egl
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
from dm_control import suite
from dm_control.suite.wrappers import pixels
from models import Encoder, Decoder, RewardModel, RSSM
from mpc import MPC
from replay import ExpReplay
from torch import optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
from utils import display_img, display_video, preprocess_img

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
matplotlib.rcParams['animation.embed_limit'] = 2**128
random_state = np.random.RandomState(0)

env: MUJOCO_GL=egl


In [2]:
# For animations to render inline in jupyter,
# download ffmpeg and set the path below to the location of the ffmpeg executable
# plt.rcParams['animation.ffmpeg_path'] = '/usr/bin/ffmpeg'

In [3]:
SEED_EPS = 1
TRAIN_EPS = 100
UPDATES = 100
ACTION_REPEAT = 8
BATCH_SZ = 50
CHUNK_LEN = 50

In [4]:
env = suite.load('cartpole', 'swingup')
env = pixels.Wrapper(env) # only use pixels instead of internal state
act_spec = env.action_spec()
action_dim = act_spec.shape[0]

data = ExpReplay(BATCH_SZ, CHUNK_LEN, action_dim)

In [5]:
# Generate random seed data
total_reward_seed = 0
t = 0
for i in range(SEED_EPS):
    state = env.reset()
    reward = 0
    while not state.last():
        t += 1
        action = random_state.uniform(act_spec.minimum, act_spec.maximum, action_dim)
        reward = state.reward
        if reward is None: reward = 0
        total_reward_seed += reward
        frame = env.physics.render(camera_id=0, height=200, width=200)
        frame = preprocess_img(frame).to(device)
        data.append(frame, torch.as_tensor(action), torch.as_tensor(reward))
        state = env.step(action)
print("Avg reward per ep: ",total_reward_seed/SEED_EPS)
print("Avg timesteps per ep: ", t/SEED_EPS)

Avg reward per ep:  26.938218960784745
Avg timesteps per ep:  1000.0


In [6]:
enc = Encoder().to(device)
dec = Decoder().to(device)
reward_model = RewardModel().to(device)
rssm = RSSM(action_dim).to(device)
params = list(enc.parameters()) + list(dec.parameters()) + list(reward_model.parameters()) + list(rssm.parameters())
optimizer = optim.Adam(params, lr=1e-3, eps=1e-4)

planner = MPC(action_dim)

In [7]:
rewards = []

# Train for 250 eps
for i in range(1):
    # MODEL FITTING

    # DATA COLLECTION
    t= 0
    eps_reward = 0
    with torch.no_grad():
        state = env.reset()
        det_state = torch.zeros(200).to(device)
        stoc_state = torch.zeros(30).to(device)
        action = torch.zeros(action_dim).to(device)
        frame = preprocess_img(env.physics.render(camera_id=0, height=200, width=200)).to(device)
        # while not state.last():
        for i in range(10):
            t+=1
            print(t)
            det_state = rssm.drnn(det_state, stoc_state, action.to(device))
            stoc_state, _, _ = rssm.ssm_posterior(det_state, enc(frame))
            stoc_state = stoc_state.squeeze()
            action = planner.get_action(det_state.to(device), stoc_state.to(device), rssm, reward_model)
            for _ in range(ACTION_REPEAT):
                if state.last(): break
                state = env.step(action)
                eps_reward += state.reward
            frame = preprocess_img(env.physics.render(camera_id=0, height=200, width=200))
            data.append(frame, action, state.reward)
        rewards.append(eps_reward)

1


AttributeError: 'tuple' object has no attribute 'squeeze'