In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
!pip3 install pyro-ppl



In [121]:
import os
path = os.path.abspath(os.path.join(os.getcwd(),".."))
import sys
sys.path.append(os.path.dirname(os.getcwd()))
from dynamics_predict.defaults import DYNAMICS_PARAMS, HYPER_PARAMS

env_name = 'inverteddoublependulum'
data_path = path+'/data/dynamics_data/'+env_name+'/dynamics.npy'
param_dim = len(DYNAMICS_PARAMS[env_name+'dynamics'])
print('parameter dimension: ', param_dim)

train_data = np.load(data_path, allow_pickle=True)
print('number of samples in data: ', len(train_data))
# split data
data_s, data_a, data_param, data_s_ = [], [], [], []
for d in train_data:
    [s,a,param], s_ = d
    data_s.append(s)
    data_a.append(a)
    data_param.append(param)
    data_s_.append(s_)

data_s = np.array(data_s)
data_a = np.array(data_a)
data_param = np.array(data_param)
data_s_ = np.array(data_s_)

print(data_s.shape, data_a.shape, data_param.shape, data_s_.shape)

parameter dimension:  5
number of samples in data:  3549
(3549, 11) (3549, 1) (3549, 5) (3549, 11)


In [122]:
x = np.concatenate((data_s,data_a), axis=-1)
theta = data_param
y = data_s_
print(x.shape, y.shape)

s_dim = data_s.shape[-1]
a_dim = data_a.shape[-1]
param_dim = data_param.shape[-1]
latent_dim = 2

(3549, 12) (3549, 11)


In [123]:
#stage 1, learning forward dynamics and dynamics encoder
from torch.distributions import Normal
import torch.nn.functional as F
import torch.nn as nn
import os

device = 'cpu'

class DynamicsEncoder(nn.Module):
    """ Dynamics parameters encoding network: (params) -> (latent code) """
    def __init__(self, param_dim, latent_dim, hidden_dim=32, hidden_activation=F.relu, output_activation=F.tanh, num_hidden_layers=2, lr=1e-3, gamma=0.99):
        super(DynamicsEncoder, self).__init__()
        
        self.hidden_activation = hidden_activation
        self.output_activation = output_activation
        self._param_dim = param_dim
        self.latent_dim = latent_dim
        self.num_hidden_layers = num_hidden_layers

        self.input_layer =  nn.Linear(self._param_dim, hidden_dim)
        self.hidden_layers = [nn.Linear(hidden_dim, hidden_dim) for _ in range(num_hidden_layers)]
        self.hidden_layers = nn.ModuleList(self.hidden_layers)  # Have to wrap the list layers with nn.ModuleList to coorectly make those parameters tracked by nn.module! Otherwise those params will not be saved!
        self.output_layer =  nn.Linear(hidden_dim, latent_dim)

        self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)
        # self.scheduler = torch.optim.lr_scheduler.ExponentialLR(self.optimizer, gamma=0.99)

    def forward(self, x):
        if not isinstance(x, torch.Tensor):
            x = torch.Tensor(x)
        x=self.hidden_activation(self.input_layer(x))
        for hl in self.hidden_layers:
            x=self.hidden_activation(hl(x))
        x=self.output_layer(x)
        if self.output_activation is not None:
            x=self.output_activation(x)
        return x

# class EmbeddingDynamicsNetwork(nn.Module):
#     """ Common class for dyanmics prediction network with dynamics embedding as input: (s,a, alpha) -> s' """
#     def __init__(self, s_dim, a_dim, latent_dim, hidden_dim=32, hidden_activation=F.relu, output_activation=F.tanh, num_hidden_layers=2, lr=1e-3, gamma=0.99):
#         super(EmbeddingDynamicsNetwork, self).__init__()
        
#         self.hidden_activation = hidden_activation
#         self.output_activation = output_activation
#         self.latent_dim = latent_dim
#         self.num_hidden_layers = num_hidden_layers

#         self.input_layer =  nn.Linear(s_dim+a_dim+self.latent_dim, hidden_dim)
#         self.hidden_layers = [nn.Linear(hidden_dim, hidden_dim) for _ in range(num_hidden_layers)]
#         self.hidden_layers = nn.ModuleList(self.hidden_layers)  # Have to wrap the list layers with nn.ModuleList to coorectly make those parameters tracked by nn.module! Otherwise those params will not be saved!
#         self.output_layer =  nn.Linear(hidden_dim, s_dim)

#         self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)
#         # self.scheduler = torch.optim.lr_scheduler.ExponentialLR(self.optimizer, gamma=0.99)

#     def forward(self, x):
#         if not isinstance(x, torch.Tensor):
#             x = torch.Tensor(x)
#         x=self.hidden_activation(self.input_layer(x))
#         for hl in self.hidden_layers:
#             x=self.hidden_activation(hl(x))
#         x=self.output_layer(x)
#         if self.output_activation is not None:
#             x=self.output_activation(x)
#         return x

class EmbeddingDynamicsNetwork(nn.Module):
    """ Common class for dyanmics prediction network with dynamics embedding as input: (s,a, alpha) -> s' """
    def __init__(self, s_dim, a_dim, latent_dim, hidden_dim=32, hidden_activation=F.relu, output_activation=F.tanh, num_hidden_layers=2, lr=1e-3, gamma=0.99):
        super(EmbeddingDynamicsNetwork, self).__init__()
        
        in_size = s_dim+a_dim+latent_dim
        out_size = s_dim

        self.weights =  nn.Parameter(torch.randn(in_size, out_size))
        self.bias = nn.Parameter(torch.randn(out_size))

        self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)
        # self.scheduler = torch.optim.lr_scheduler.ExponentialLR(self.optimizer, gamma=0.99)

    def forward(self, x):
        if not isinstance(x, torch.Tensor):
            x = torch.Tensor(x)
        y = x @ self.weights + self.bias
        return y

class DynamicsParamsOptimizer():
    """ 
    Dynamics parameters optimization model (gradient-based) based on a trained 
    forward dynamics prediction network: (s, a, learnable_params) -> s_ with real-world data. 
    """
    def __init__(self, s_dim, a_dim, param_dim, latent_dim, hidden_dim=32, hidden_activation=F.relu, output_activation=None, num_hidden_layers=2, lr=1e-2, gamma=0.99):
        self.dynamics_model = EmbeddingDynamicsNetwork(s_dim, a_dim, latent_dim, hidden_dim, hidden_activation, output_activation, num_hidden_layers, lr, gamma).to(device)
        self.dynamics_encoder = DynamicsEncoder(param_dim, latent_dim, hidden_dim, hidden_activation, output_activation, num_hidden_layers, lr, gamma).to(device)
        self.optimizer = torch.optim.Adam(list(self.dynamics_model.parameters()) + list(self.dynamics_encoder.parameters()), lr=lr)

        self.loss = nn.MSELoss()

    def forward(self, x, theta):
        """ s,a concat with param (learnable) -> s_ """

        alpha = self.dynamics_encoder(theta)
        y_  = self.dynamics_model(torch.cat((x, alpha), axis=-1))
        
        return y_

    def update(self, data, epoch=200, model_save_path=None):
        (x, theta, y) = data
        if not isinstance(x, torch.Tensor):
            x = torch.Tensor(x).to(device)
        if not isinstance(theta, torch.Tensor):
            theta = torch.Tensor(theta).to(device)        
        if not isinstance(y, torch.Tensor):
            y = torch.Tensor(y).to(device)

        for ep in range(epoch):
            y_ = self.forward(x, theta)
            self.optimizer.zero_grad()
            loss = self.loss(y_, y)
            loss.backward()
            self.optimizer.step()
            if ep%100==0:
                print('epoch: {}, loss: {}'.format(ep, loss.item()))
                torch.save(self.dynamics_model.state_dict(), model_save_path+'dynamics_model')
                torch.save(self.dynamics_encoder.state_dict(), model_save_path+'dynamics_encoder')

opt = DynamicsParamsOptimizer(s_dim, a_dim, param_dim, latent_dim)
data = (x, theta, y)
model_save_path = './model/test/'
os.makedirs(model_save_path, exist_ok=True)
opt.update(data, epoch=2000, model_save_path=model_save_path)

epoch: 0, loss: 11.473695755004883
epoch: 100, loss: 1.3826773166656494
epoch: 200, loss: 0.558249831199646
epoch: 300, loss: 0.3881198465824127
epoch: 400, loss: 0.3190983235836029
epoch: 500, loss: 0.2819022238254547
epoch: 600, loss: 0.257633239030838
epoch: 700, loss: 0.24044831097126007
epoch: 800, loss: 0.2271835058927536
epoch: 900, loss: 0.2170628160238266
epoch: 1000, loss: 0.209286630153656
epoch: 1100, loss: 0.203314870595932
epoch: 1200, loss: 0.19874310493469238
epoch: 1300, loss: 0.19511933624744415
epoch: 1400, loss: 0.1923438161611557
epoch: 1500, loss: 0.19016148149967194
epoch: 1600, loss: 0.18844372034072876
epoch: 1700, loss: 0.1870744526386261
epoch: 1800, loss: 0.18599280714988708
epoch: 1900, loss: 0.18508432805538177


In [91]:
#stage 2, using BNN and SVI to fit alpha
import pyro
import pyro.distributions as dist
from pyro.nn import PyroModule, PyroSample
import torch.nn as nn
from pyro.infer.autoguide import AutoDiagonalNormal
from pyro.infer import SVI, Trace_ELBO, Predictive
from tqdm.auto import trange, tqdm

In [160]:
# load test data
test_data_path = path+'/data/dynamics_data/'+env_name+'/test_dynamics.npy'
test_data = np.load(test_data_path, allow_pickle=True)
print('number of samples in dest data: ', len(test_data))
idx=5  # index of sample to test: 0-10
test_s = np.array(test_data[idx]['sa'])[:, :-1]
test_a = np.array(test_data[idx]['sa'])[:, -1:]
test_param = np.array(test_data[idx]['params'])
test_s_ = np.array(test_data[idx]['s_'])
print(test_s.shape, test_a.shape, test_param.shape, test_s_.shape)

# load model
# updater = DynamicsParamsOptimizer(state_dim, action_dim, param_dim, latent_dim, switch_dim, model_save_path)
# updater.model.load_state_dict(torch.load(model_save_path+'model', map_location=device))

number of samples in dest data:  10
(5333, 11) (5333, 1) (5,) (5333, 11)


In [161]:
test_x = torch.from_numpy(np.concatenate((test_s,test_a), axis=-1)).float()
test_y = torch.from_numpy(test_s_).float()

x_dim = test_x.shape[1]
y_dim = test_y.shape[1]
print(test_x.shape, test_y.shape)

torch.Size([5333, 12]) torch.Size([5333, 11])


In [94]:
print(opt.dynamics_model.weights.shape)

torch.Size([14, 11])


In [111]:
from pyro.nn import PyroModule, PyroParam, PyroSample
import copy

class EmbeddingFit(PyroModule):
    def __init__(self, latent_dim, dynamics_model):
        super().__init__()
        self.latent_dim = latent_dim
        # self.dynamics_model = dynamics_model
        self.weights = copy.deepcopy(dynamics_model.weights.cpu())
        self.bias = copy.deepcopy(dynamics_model.bias.cpu())
        self.alpha = PyroSample(dist.Normal(0., 1.).expand([latent_dim]).to_event(1))
        self.sigma = PyroSample(dist.LogNormal(0, 1))


    def forward(self, x, y=None):
        """ s,a concat with param (learnable) -> s_ """
        # batch_size = x.shape[0]
        # print(batch_size)
        # input = torch.cat((x, self.alpha.repeat([batch_size, 1])), axis=-1)
        # print(input.shape)
        batch_size=2
        input = torch.randn(2, 14)
        # mu = self.dynamics_model(input).squeeze()
        mu = input @ self.weights + self.bias
        # sigma = pyro.sample("sigma", dist.Uniform(0., 1.).expand([y_dim]).to_event(1))  # the to_event(1) is necessary, you’ll need to call .to_event(1) to use scalar distributions like Normal as a joint diagonal distributions over multiple variables: see: https://forum.pyro.ai/t/simple-gmm-in-pyro/3047/3
        # sigma = PyroSample(dist.Uniform(0., 1.))  # the to_event(1) is necessary, you’ll need to call .to_event(1) to use scalar distributions like Normal as a joint diagonal distributions over multiple variables: see: https://forum.pyro.ai/t/simple-gmm-in-pyro/3047/3

        with pyro.plate("instances", batch_size):
            obs = pyro.sample("obs", dist.Normal(mu, self.sigma).to_event(1),
                               obs=y)
            print(obs.shape)
            return obs

In [186]:
class EmbeddingFit(PyroModule):
    def __init__(self, latent_dim, dynamics_model):
        super().__init__()
        self.alpha = PyroSample(dist.Normal(0., 1.).expand([latent_dim]).to_event(1))
        self.weight = copy.deepcopy(dynamics_model.weights.cpu())
        self.bias = copy.deepcopy(dynamics_model.bias.cpu())
        self.sigma = pyro.sample("sigma", dist.Uniform(0., 1.).expand([1]).to_event(1))

    def forward(self, x, output=None):
        batch_size = x.shape[0]
        input = torch.cat((x, self.alpha.repeat([batch_size, 1])), axis=-1)
        mu = input @ self.weight + self.bias
        with pyro.plate("instances", batch_size):
            return pyro.sample("obs", dist.Normal(mu, self.sigma).to_event(1),
                               obs=output)

# pyro.clear_param_store()
# pyro.set_rng_seed(1)

dynamics_model = EmbeddingDynamicsNetwork(s_dim, a_dim, latent_dim, hidden_dim=32, hidden_activation=F.relu, output_activation=None, num_hidden_layers=2, lr=1e-2, gamma=0.99).to(device)
model_save_path = './model/test/'
dynamics_model.load_state_dict(torch.load(model_save_path+'dynamics_model', map_location=device))

model = EmbeddingFit(latent_dim, dynamics_model)

x = test_x[:100]
y = test_y[:100]

print(x.shape, y.shape, x.dtype)
print(test_x.shape, test_y.shape, test_x.dtype)

guide = AutoDiagonalNormal(model)  # unlearned posterior dist. AutoDiagonalNormal
svi = SVI(model, guide, pyro.optim.Adam({"lr": 0.01}), Trace_ELBO())  # parameters to optimize are determined by guide()
for step in range(1000):
    loss = svi.step(x, y) / y.numel()  # data in step() are passed to both model() and guide()
    if step % 100 == 0:
        # print(model.linear.weight)
        print("step {} loss = {:0.4g}".format(step, loss))

torch.Size([100, 12]) torch.Size([100, 11]) torch.float32
torch.Size([5333, 12]) torch.Size([5333, 11]) torch.float32
step 0 loss = 1.474
step 100 loss = 1.474
step 200 loss = 1.474
step 300 loss = 1.475
step 400 loss = 1.474
step 500 loss = 1.474
step 600 loss = 1.475
step 700 loss = 1.474
step 800 loss = 1.474
step 900 loss = 1.474


In [138]:
predictive = Predictive(model, guide=guide, num_samples=500)
x_test = x_train[:10]
print(x_test.shape)
preds = predictive(x_test)

y_pred = preds['obs'].T.detach().numpy().mean(axis=1)
y_std = preds['obs'].T.detach().numpy().std(axis=1)

print(y_pred, y_std)

# fig, ax = plt.subplots(figsize=(10, 5))
# ax.plot(x, y, 'o', markersize=1)
# ax.plot(x_test, y_pred)
# ax.fill_between(x_test, y_pred - y_std, y_pred + y_std,
#                 alpha=0.5, color='#ffcd3c')

torch.Size([10, 23])
[[-1.4620235  -0.9292353  -2.3858504  ... -1.5019815  -1.8051643
  -2.8081913 ]
 [ 1.2608469   1.4969504   2.1020446  ...  0.26011953  1.8149033
   1.5758591 ]
 [ 0.79152566  0.92237675  1.3267851  ... -0.35175693  1.1348053
   1.0248308 ]
 [-1.4267539  -0.08124174 -0.70871776 ... -0.19416465 -1.2698132
  -1.2513993 ]
 [-0.23448806 -1.2992532  -0.70612466 ... -2.1950686  -1.2392956
   0.31301618]] [[1.4301293  1.3278846  1.8229095  ... 1.5404499  1.7940197  2.0547945 ]
 [1.4298494  1.1652166  1.8705648  ... 1.3980244  1.4551692  0.9440567 ]
 [1.0242914  0.63166404 1.1168156  ... 0.56255925 0.6149589  0.73265785]
 [0.57261705 0.900117   0.68755174 ... 0.5758168  0.8907142  0.9437713 ]
 [0.91220987 1.0359749  1.8830494  ... 1.6415218  0.8795121  0.97397715]]


In [None]:
class Model(PyroModule):
    def __init__(self, h1=20, h2=20):
        super().__init__()
        self.fc1 = PyroModule[nn.Linear](x_dim, h1)
        self.fc1.weight = PyroSample(dist.Normal(0., 1.).expand([h1, x_dim]).to_event(2))
        self.fc1.bias = PyroSample(dist.Normal(0., 1.).expand([h1]).to_event(1))
        self.fc2 = PyroModule[nn.Linear](h1, h2)
        self.fc2.weight = PyroSample(dist.Normal(0., 1.).expand([h2, h1]).to_event(2))
        self.fc2.bias = PyroSample(dist.Normal(0., 1.).expand([h2]).to_event(1))
        self.fc3 = PyroModule[nn.Linear](h2, y_dim)
        self.fc3.weight = PyroSample(dist.Normal(0., 1.).expand([y_dim, h2]).to_event(2))
        self.fc3.bias = PyroSample(dist.Normal(0., 1.).expand([y_dim]).to_event(1))
        self.relu = nn.ReLU()

    def forward(self, x, y=None):
        batch_size = x.shape[0]
        # x = x.reshape(-1, 1)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        mu = self.fc3(x).squeeze()
        sigma = pyro.sample("sigma", dist.Uniform(0., 1.).expand([y_dim]).to_event(1))  # the to_event(1) is necessary, you’ll need to call .to_event(1) to use scalar distributions like Normal as a joint diagonal distributions over multiple variables: see: https://forum.pyro.ai/t/simple-gmm-in-pyro/3047/3
        # print(mu.shape, sigma.shape, y.shape)

        # with pyro.plate("data", batch_size):
        #     obs = pyro.sample("obs", dist.Normal(mu, sigma).to_event(1), obs=y) # the to_event(1) is necessary
        #     return mu

        with pyro.plate("instances", batch_size):
            return pyro.sample("obs", dist.Normal(mu, sigma).to_event(1),
                               obs=y)