In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
path = os.path.abspath(os.path.join(os.getcwd(),".."))
print(path)
import sys, os
sys.path.append(os.path.dirname(os.getcwd()))
from dynamics_predict.dynamics_networks import DynamicsNetwork, DynamicsParamsOptimizer, EncoderDynamicsNetwork, EncoderDecoderDynamicsNetwork, VAEDynamicsNetwork
from rl.policy_networks import DPG_PolicyNetwork
from utils.load_params import load_params
from utils.common_func import rand_params
from dynamics_predict.defaults import DYNAMICS_PARAMS, HYPER_PARAMS
from environment import envs
import torch



/home/quantumiracle/research/COS513_project/src
Error: encoder not found!


In [20]:
env_name = 'inverteddoublependulum'
data_path = path+'/data/dynamics_data/'+env_name+'/dynamics.npy'
param_dim = len(DYNAMICS_PARAMS[env_name+'dynamics'])
print('parameter dimension: ', param_dim)

parameter dimension:  5


In [26]:
train_data = np.load(data_path, allow_pickle=True)
print('number of samples in dest data: ', len(train_data))
# split data
def split_data(data, partial=100000):
    data_s, data_a, data_param, data_s_ = [], [], [], []
    for d in data[:partial]:
        [s,a,param], s_ = d
        data_s.append(s)
        data_a.append(a)
        data_param.append(param)
        data_s_.append(s_)

    data_s = np.array(data_s)
    data_a = np.array(data_a)
    data_param = np.array(data_param)
    data_s_ = np.array(data_s_)
    
    return data_s, data_a, data_param, data_s_

data_s, data_a, data_param, data_s_ = split_data(train_data)
print(data_s.shape, data_a.shape, data_param.shape, data_s_.shape)

number of samples in dest data:  5823798
(100000, 11) (100000, 1) (100000, 5) (100000, 11)


## Switch Linear Dynamics

In [28]:
from torch.distributions import Normal
import torch.nn.functional as F
import torch.nn as nn

device = 'cuda'

class DynamicsParamsOptimizer():
    """ 
    Dynamics parameters optimization model (gradient-based) based on a trained 
    forward dynamics prediction network: (s, a, learnable_params) -> s_ with real-world data. 
    """
    def __init__(self, state_dim, action_dim, param_dim, latent_dim, switch_dim, model_save_path):
        self.model = SLDynamicsNetwork(state_dim, action_dim, param_dim, latent_dim, switch_dim).to(device)
        self.criterion = nn.MSELoss()
        self.model_save_path = model_save_path

    def train(self, s, a, theta, s_, epoch):
        """ s,a concat with param (learnable) -> s_ """
        if not isinstance(s_, torch.Tensor):
            s_ = torch.Tensor(s_).to(device)

        for ep in range(epoch):
            s_pred = self.model.forward(s, a, theta)
            self.model.optimizer.zero_grad()
            loss = self.criterion(s_pred, s_)
            loss.backward()
            self.model.optimizer.step()
            if ep%100==0:
                print('epoch: {}, loss: {}'.format(ep, loss.item()))
                torch.save(self.model.state_dict(), self.model_save_path+'model')
            

class SLDynamicsNetwork(nn.Module):
    def __init__(self, state_dim, action_dim, param_dim, latent_dim, switch_dim, lr=1e-4):
        super(SLDynamicsNetwork, self).__init__()
        self.state_dim = state_dim
        self.A = nn.Parameter(torch.rand((switch_dim, latent_dim, state_dim, state_dim)), requires_grad=True)
        self.B = nn.Parameter(torch.rand((switch_dim, latent_dim, state_dim, action_dim)), requires_grad=True)
        self.E = nn.Parameter(torch.rand((param_dim, latent_dim)), requires_grad=True)
        self.switch_logits = nn.Sequential(
            nn.Linear(state_dim, switch_dim, bias=False)  # only weight matrix, no bias
        )
        # print(dict(self.named_parameters()))

        self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)

    def gaussian_noise(self, shape, scale):
        normal = Normal(0, 1)
        epsilon = scale * normal.sample(shape) 
        return epsilon

    def get_switch_var(self, s):
        logits_ = self.switch_logits(s)
        switch_var = F.gumbel_softmax(logits_, tau=1, hard=True)  # if hard, return one-hot
        return switch_var

    def get_s_before_encode(self, s, a):
        switch_var = self.get_switch_var(s)
        A_w = torch.einsum('ab,bcde->acde', switch_var, self.A) # chosen by the switch variable; shape (#batch, #latent, #state, #state)
        B_w = torch.einsum('ab,bcde->acde', switch_var, self.B) # chosen by the switch variable; shape (#batch, #latent, #state, #action)
        s_before_encode = torch.einsum('abcd,ad->abc', A_w, s) + torch.einsum('abcd,ad->abc', B_w, a)  # shape (#batch, #latent, #state)
        return s_before_encode

    def forward(self, s, a, theta):
        if not isinstance(s, torch.Tensor):
            s = torch.Tensor(s).to(device)
        if not isinstance(a, torch.Tensor):
            a = torch.Tensor(a).to(device)
        if not isinstance(theta, torch.Tensor):
            theta = torch.Tensor(theta).to(device)
        batch_size = s.shape[0]

        s_before_encode = self.get_s_before_encode(s, a)
        s_before_noise = torch.einsum('ab,abc->ac', theta@self.E, s_before_encode)  # shape (#batch, #state)
        noise = self.gaussian_noise(shape=(batch_size, self.state_dim), scale=0.)
        s_ = s_before_noise + noise.to(device)

        return s_

    def get_latent_code(self, s, a, s_):
        if not isinstance(s, torch.Tensor):
            s = torch.Tensor(s).to(device).to(device)
        if not isinstance(a, torch.Tensor):
            a = torch.Tensor(a).to(device)        
        if not isinstance(s_, torch.Tensor):
            s_ = torch.Tensor(s_).to(device)     

        s_before_encode = self.get_s_before_encode(s, a)
        inv_s = torch.linalg.pinv(s_before_encode)  # pseudo-inverse; shape (#batch, #state, #latent)
        alpha = torch.einsum('ab,abc->ac', s_, inv_s)
        print(s_before_encode.shape, inv_s.shape, alpha.shape)

        return alpha



In [29]:
import os

# train
state_dim = data_s.shape[1]
action_dim = data_a.shape[1]
param_dim = data_param.shape[1]
latent_dim = 2
switch_dim = 5

model_save_path = f'../data/weights/dynamics/inverteddoublependulum/'
os.makedirs(model_save_path, exist_ok=True)
updater = DynamicsParamsOptimizer(state_dim, action_dim, param_dim, latent_dim, switch_dim, model_save_path)
updater.train(data_s, data_a, data_param, data_s_, epoch=10000)

epoch: 0, loss: 537.4883422851562
epoch: 100, loss: 497.87249755859375
epoch: 200, loss: 461.5218505859375
epoch: 300, loss: 428.9962158203125
epoch: 400, loss: 398.29608154296875
epoch: 500, loss: 370.56011962890625
epoch: 600, loss: 345.722412109375
epoch: 700, loss: 321.8437194824219
epoch: 800, loss: 300.49273681640625
epoch: 900, loss: 279.8143615722656
epoch: 1000, loss: 260.5543212890625
epoch: 1100, loss: 242.75592041015625
epoch: 1200, loss: 226.7599334716797
epoch: 1300, loss: 210.84959411621094
epoch: 1400, loss: 196.90492248535156
epoch: 1500, loss: 184.2673797607422
epoch: 1600, loss: 171.970703125
epoch: 1700, loss: 160.6292266845703
epoch: 1800, loss: 149.6862335205078
epoch: 1900, loss: 139.957275390625
epoch: 2000, loss: 130.5864715576172
epoch: 2100, loss: 122.14558410644531
epoch: 2200, loss: 114.12090301513672
epoch: 2300, loss: 106.89201354980469
epoch: 2400, loss: 99.89753723144531
epoch: 2500, loss: 93.7880859375
epoch: 2600, loss: 87.22730255126953
epoch: 2700, 

In [34]:
# load test data
test_data_path = path+'/data/dynamics_data/'+env_name+'/test_dynamics.npy'
test_data = np.load(test_data_path, allow_pickle=True)
print('number of samples in dest data: ', len(test_data))
idx=5  # index of sample to test: 0-10
test_s = np.array(test_data[idx]['sa'])[:, :-1]
test_a = np.array(test_data[idx]['sa'])[:, -1:]
test_param = np.array(test_data[idx]['params'])
test_s_ = np.array(test_data[idx]['s_'])
print(test_s.shape, test_a.shape, test_param.shape, test_s_.shape)

partial = 10
alpha = updater.model.get_latent_code(test_s[:partial], test_a[:partial], test_s_[:partial])

# compare with encoded value
alpha_ = torch.Tensor([test_param]).to(device)@updater.model.E
print(alpha, alpha.mean(dim=0), alpha_)

number of samples in dest data:  10
(275717, 11) (275717, 1) (5,) (275717, 11)
torch.Size([10, 2, 11]) torch.Size([10, 11, 2]) torch.Size([10, 2])
tensor([[7.5096, 4.9735],
        [4.3295, 3.0814],
        [4.9684, 3.5469],
        [4.2286, 2.6858],
        [3.8032, 2.0696],
        [5.6362, 3.6489],
        [4.0966, 2.6322],
        [4.3626, 2.9226],
        [7.1042, 5.1002],
        [0.8724, 0.5177]], device='cuda:0', grad_fn=<ViewBackward>) tensor([4.6911, 3.1179], device='cuda:0', grad_fn=<MeanBackward1>) tensor([[5.5475, 3.7723]], device='cuda:0', grad_fn=<MmBackward>)


In [37]:
# test on train data
test_size = 20
test_s = data_s[:test_size]
test_a = data_a[:test_size]
test_param = data_param[:test_size]
test_s_ = data_s_[:test_size]

alpha = updater.model.get_latent_code(test_s, test_a, test_s_)

# compare with encoded value
alpha_ = torch.Tensor(test_param)@updater.model.E
print(alpha, alpha_)


torch.Size([20, 2, 11]) torch.Size([20, 11, 2]) torch.Size([20, 2])
tensor([[ 2.2643, -1.8454],
        [ 2.8811, -2.2793],
        [ 3.5958, -2.9697],
        [ 1.7742, -1.4963],
        [ 2.4365, -2.0887],
        [ 2.2297, -1.8889],
        [ 1.8014, -1.4508],
        [ 2.7828, -2.4257],
        [ 3.4125, -2.8278],
        [ 1.7107, -1.3469],
        [ 2.0031, -1.6024],
        [ 2.5392, -2.0994],
        [ 3.7406, -3.1136],
        [ 1.4040, -1.1620],
        [ 2.3428, -1.9775],
        [ 4.2024, -3.5203],
        [ 1.4234, -1.1618],
        [ 1.6157, -1.2830],
        [ 1.8207, -1.4661],
        [ 2.3034, -1.9059]], grad_fn=<ViewBackward>) tensor([[ 2.3339, -1.8811],
        [ 2.3339, -1.8811],
        [ 2.3339, -1.8811],
        [ 2.3339, -1.8811],
        [ 2.3339, -1.8811],
        [ 2.3339, -1.8811],
        [ 2.3339, -1.8811],
        [ 2.3339, -1.8811],
        [ 2.3339, -1.8811],
        [ 2.3339, -1.8811],
        [ 2.3339, -1.8811],
        [ 2.3339, -1.8811],
        [ 2