In [2]:
import torch
import torch.nn as nn
from torch.autograd import grad
import pandas as pd
from InverseFuncs import trajectory, getLoss, reset_theta, theta_range

from DDPGv2Agent import Agent
from FireflyEnv import Model # firefly_task.py
from collections import deque
from Inverse_Config import Inverse_Config
import matplotlib.pyplot as plt

# read configuration parameters
arg = Inverse_Config()
# fix random seed
import random
random.seed(arg.SEED_NUMBER)
import torch
torch.manual_seed(arg.SEED_NUMBER)
if torch.cuda.is_available():
    torch.cuda.manual_seed(arg.SEED_NUMBER)
import numpy as np
np.random.seed(arg.SEED_NUMBER)
import time

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# if gpu is to be used
#CUDA = False
#device = "cpu"

CUDA = torch.cuda.is_available()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tic = time.time()



filename = '20191231-172726-01081157' # agent information

learning_arg = torch.load('../firefly-inverse-data/data/20191231-172726_arg.pkl')

DISCOUNT_FACTOR = learning_arg.DISCOUNT_FACTOR
arg.gains_range = learning_arg.gains_range
arg.std_range = learning_arg.std_range
arg.goal_radius_range = learning_arg.goal_radius_range
arg.WORLD_SIZE = learning_arg.WORLD_SIZE
arg.DELTA_T = learning_arg.DELTA_T
arg.EPISODE_TIME = learning_arg.EPISODE_TIME
arg.EPISODE_LEN = learning_arg.EPISODE_LEN



env = Model(arg) # build an environment
env.max_goal_radius = arg.goal_radius_range[1] # use the largest world size for goal radius
env.box = arg.WORLD_SIZE
agent = Agent(env.state_dim, env.action_dim, arg,  filename, hidden_dim=128, gamma=DISCOUNT_FACTOR, tau=0.001) #, device = "cpu")
agent.load(filename)


Running DDPG Agent: using  cpu


In [9]:
# true theta
true_theta = reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range)
true_theta_log.append(true_theta.data.clone())
x_traj, obs_traj, a_traj, _ = trajectory(agent, true_theta, env, arg, arg.gains_range, arg.std_range,arg.goal_radius_range, arg.NUM_EP)  # generate true trajectory
true_loss = getLoss(agent, x_traj, a_traj, true_theta, env, arg.gains_range, arg.std_range, arg.PI_STD, arg.NUM_SAMPLES)  # this is the lower bound of loss?
print("true loss:{}".format(true_loss))
print("true_theta:{}".format(true_theta))



true loss:153753.90625
true_theta:tensor([10.3728,  8.8873,  0.0819,  1.8675,  9.6898,  8.0379,  1.4043,  1.7850,
         0.4398])


In [None]:
gain_space = np.linspace(arg.gains_range[0],arg.gains_range[1], num = 5)
std_space = np.linspace(arg.std_range[0], arg.std_range[1], num = 3)
goal_radius_space = np.linspace(arg.goal_radius_range[0], arg.goal_radius_range[1], num =3)
theta_log = []
loss_log = []
true_theta_log = []

In [23]:
np.append(gain_space, true_theta[0])

array([ 8.        ,  9.        , 10.        , 11.        , 12.        ,
       10.37282372])

In [None]:
pro_gains = torch.zeros(2)
pro_noise_stds = torch.zeros(2)
obs_gains = torch.zeros(2)
obs_noise_stds = torch.zeros(2)
goal_radius = torch.zeros(1)
i = 0
for pro_gains_vel in np.append(gain_space, true_theta[0]):
    pro_gains[0] = pro_gains_vel
    for pro_gains_ang in np.append(gain_space, true_theta[1]):
        pro_gains[1] = pro_gains_ang
        for obs_gains_vel  in np.append(gain_space, true_theta[4]):
            obs_gains[0] = obs_gains_vel
            for obs_gains_ang in np.append(gain_space, true_theta[5]):
                obs_gains[1] = obs_gains_ang
                for pro_std_vel in np.append(std_space, true_theta[2]):
                    pro_noise_stds[0] = pro_std_vel
                    for pro_std_ang in np.append(std_space, true_theta[3]):
                        pro_noise_stds[1] = pro_std_ang
                        for obs_std_vel in np.append(std_space, true_theta[6]):
                            obs_noise_stds[0] = obs_std_vel 
                            for obs_std_ang in np.append(std_space, true_theta[7]):
                                obs_noise_stds[1] = obs_std_ang
                                for goal_r in np.append(goal_radius_space, true_theta[8]):
                                    goal_radius[0] = goal_r
                                    
                                    i += 1
                                    theta = torch.cat([pro_gains, pro_noise_stds, obs_gains, obs_noise_stds, goal_radius])
    
    
                                    theta_log.append(theta.data)
                                    loss = getLoss(agent, x_traj, a_traj, theta, env, arg.gains_range, arg.std_range, arg.PI_STD, arg.NUM_SAMPLES)
                                    loss_log.append(loss.data)

                                    print("num:{}, theta:{}, loss:{}".format(i, theta, loss))


num:1, theta:tensor([8.0000, 8.0000, 0.0100, 0.0100, 8.0000, 8.0000, 0.0100, 0.0100, 0.2000]), loss300404.3125
num:2, theta:tensor([8.0000, 8.0000, 0.0100, 0.0100, 8.0000, 8.0000, 0.0100, 0.0100, 0.3500]), loss249473.640625
num:3, theta:tensor([8.0000, 8.0000, 0.0100, 0.0100, 8.0000, 8.0000, 0.0100, 0.0100, 0.5000]), loss217140.734375
num:4, theta:tensor([8.0000, 8.0000, 0.0100, 0.0100, 8.0000, 8.0000, 0.0100, 0.0100, 0.4398]), loss223984.015625
num:5, theta:tensor([8.0000, 8.0000, 0.0100, 0.0100, 8.0000, 8.0000, 0.0100, 1.0050, 0.2000]), loss299034.28125
num:6, theta:tensor([8.0000, 8.0000, 0.0100, 0.0100, 8.0000, 8.0000, 0.0100, 1.0050, 0.3500]), loss258279.1875
num:7, theta:tensor([8.0000, 8.0000, 0.0100, 0.0100, 8.0000, 8.0000, 0.0100, 1.0050, 0.5000]), loss199787.5
num:8, theta:tensor([8.0000, 8.0000, 0.0100, 0.0100, 8.0000, 8.0000, 0.0100, 1.0050, 0.4398]), loss221196.09375
num:9, theta:tensor([8.0000, 8.0000, 0.0100, 0.0100, 8.0000, 8.0000, 0.0100, 2.0000, 0.2000]), loss302090.2

num:74, theta:tensor([8.0000, 8.0000, 0.0100, 1.0050, 8.0000, 8.0000, 0.0100, 2.0000, 0.3500]), loss292744.625
num:75, theta:tensor([8.0000, 8.0000, 0.0100, 1.0050, 8.0000, 8.0000, 0.0100, 2.0000, 0.5000]), loss252092.96875
num:76, theta:tensor([8.0000, 8.0000, 0.0100, 1.0050, 8.0000, 8.0000, 0.0100, 2.0000, 0.4398]), loss263571.78125
num:77, theta:tensor([8.0000, 8.0000, 0.0100, 1.0050, 8.0000, 8.0000, 0.0100, 1.7850, 0.2000]), loss328673.46875
num:78, theta:tensor([8.0000, 8.0000, 0.0100, 1.0050, 8.0000, 8.0000, 0.0100, 1.7850, 0.3500]), loss300776.8125
num:79, theta:tensor([8.0000, 8.0000, 0.0100, 1.0050, 8.0000, 8.0000, 0.0100, 1.7850, 0.5000]), loss250008.5
num:80, theta:tensor([8.0000, 8.0000, 0.0100, 1.0050, 8.0000, 8.0000, 0.0100, 1.7850, 0.4398]), loss270334.21875
num:81, theta:tensor([8.0000, 8.0000, 0.0100, 1.0050, 8.0000, 8.0000, 1.0050, 0.0100, 0.2000]), loss265073.8125
num:82, theta:tensor([8.0000, 8.0000, 0.0100, 1.0050, 8.0000, 8.0000, 1.0050, 0.0100, 0.3500]), loss2431

num:147, theta:tensor([8.0000, 8.0000, 0.0100, 2.0000, 8.0000, 8.0000, 1.0050, 0.0100, 0.5000]), loss236999.921875
num:148, theta:tensor([8.0000, 8.0000, 0.0100, 2.0000, 8.0000, 8.0000, 1.0050, 0.0100, 0.4398]), loss251455.84375
num:149, theta:tensor([8.0000, 8.0000, 0.0100, 2.0000, 8.0000, 8.0000, 1.0050, 1.0050, 0.2000]), loss300291.34375
num:150, theta:tensor([8.0000, 8.0000, 0.0100, 2.0000, 8.0000, 8.0000, 1.0050, 1.0050, 0.3500]), loss272658.65625
num:151, theta:tensor([8.0000, 8.0000, 0.0100, 2.0000, 8.0000, 8.0000, 1.0050, 1.0050, 0.5000]), loss245496.90625
num:152, theta:tensor([8.0000, 8.0000, 0.0100, 2.0000, 8.0000, 8.0000, 1.0050, 1.0050, 0.4398]), loss240288.140625
num:153, theta:tensor([8.0000, 8.0000, 0.0100, 2.0000, 8.0000, 8.0000, 1.0050, 2.0000, 0.2000]), loss321957.5
num:154, theta:tensor([8.0000, 8.0000, 0.0100, 2.0000, 8.0000, 8.0000, 1.0050, 2.0000, 0.3500]), loss279304.84375
num:155, theta:tensor([8.0000, 8.0000, 0.0100, 2.0000, 8.0000, 8.0000, 1.0050, 2.0000, 0.5

num:220, theta:tensor([8.0000, 8.0000, 0.0100, 1.8675, 8.0000, 8.0000, 1.0050, 2.0000, 0.4398]), loss260451.875
num:221, theta:tensor([8.0000, 8.0000, 0.0100, 1.8675, 8.0000, 8.0000, 1.0050, 1.7850, 0.2000]), loss294310.90625
num:222, theta:tensor([8.0000, 8.0000, 0.0100, 1.8675, 8.0000, 8.0000, 1.0050, 1.7850, 0.3500]), loss271008.15625
num:223, theta:tensor([8.0000, 8.0000, 0.0100, 1.8675, 8.0000, 8.0000, 1.0050, 1.7850, 0.5000]), loss256071.40625
num:224, theta:tensor([8.0000, 8.0000, 0.0100, 1.8675, 8.0000, 8.0000, 1.0050, 1.7850, 0.4398]), loss247118.171875
num:225, theta:tensor([8.0000, 8.0000, 0.0100, 1.8675, 8.0000, 8.0000, 2.0000, 0.0100, 0.2000]), loss278000.46875
num:226, theta:tensor([8.0000, 8.0000, 0.0100, 1.8675, 8.0000, 8.0000, 2.0000, 0.0100, 0.3500]), loss263196.25
num:227, theta:tensor([8.0000, 8.0000, 0.0100, 1.8675, 8.0000, 8.0000, 2.0000, 0.0100, 0.5000]), loss248309.34375
num:228, theta:tensor([8.0000, 8.0000, 0.0100, 1.8675, 8.0000, 8.0000, 2.0000, 0.0100, 0.439

num:293, theta:tensor([8.0000, 8.0000, 1.0050, 0.0100, 8.0000, 8.0000, 2.0000, 1.0050, 0.2000]), loss309180.78125
num:294, theta:tensor([8.0000, 8.0000, 1.0050, 0.0100, 8.0000, 8.0000, 2.0000, 1.0050, 0.3500]), loss279589.9375
num:295, theta:tensor([8.0000, 8.0000, 1.0050, 0.0100, 8.0000, 8.0000, 2.0000, 1.0050, 0.5000]), loss235571.75
num:296, theta:tensor([8.0000, 8.0000, 1.0050, 0.0100, 8.0000, 8.0000, 2.0000, 1.0050, 0.4398]), loss241410.625
num:297, theta:tensor([8.0000, 8.0000, 1.0050, 0.0100, 8.0000, 8.0000, 2.0000, 2.0000, 0.2000]), loss305450.3125
num:298, theta:tensor([8.0000, 8.0000, 1.0050, 0.0100, 8.0000, 8.0000, 2.0000, 2.0000, 0.3500]), loss271188.90625
num:299, theta:tensor([8.0000, 8.0000, 1.0050, 0.0100, 8.0000, 8.0000, 2.0000, 2.0000, 0.5000]), loss235661.1875
num:300, theta:tensor([8.0000, 8.0000, 1.0050, 0.0100, 8.0000, 8.0000, 2.0000, 2.0000, 0.4398]), loss247720.84375
num:301, theta:tensor([8.0000, 8.0000, 1.0050, 0.0100, 8.0000, 8.0000, 2.0000, 1.7850, 0.2000]),

num:366, theta:tensor([8.0000, 8.0000, 1.0050, 1.0050, 8.0000, 8.0000, 2.0000, 1.7850, 0.3500]), loss319998.15625
num:367, theta:tensor([8.0000, 8.0000, 1.0050, 1.0050, 8.0000, 8.0000, 2.0000, 1.7850, 0.5000]), loss268786.625
num:368, theta:tensor([8.0000, 8.0000, 1.0050, 1.0050, 8.0000, 8.0000, 2.0000, 1.7850, 0.4398]), loss292189.125
num:369, theta:tensor([8.0000, 8.0000, 1.0050, 1.0050, 8.0000, 8.0000, 1.4043, 0.0100, 0.2000]), loss332065.03125
num:370, theta:tensor([8.0000, 8.0000, 1.0050, 1.0050, 8.0000, 8.0000, 1.4043, 0.0100, 0.3500]), loss296122.0625
num:371, theta:tensor([8.0000, 8.0000, 1.0050, 1.0050, 8.0000, 8.0000, 1.4043, 0.0100, 0.5000]), loss235457.234375
num:372, theta:tensor([8.0000, 8.0000, 1.0050, 1.0050, 8.0000, 8.0000, 1.4043, 0.0100, 0.4398]), loss268226.5625
num:373, theta:tensor([8.0000, 8.0000, 1.0050, 1.0050, 8.0000, 8.0000, 1.4043, 1.0050, 0.2000]), loss347182.84375
num:374, theta:tensor([8.0000, 8.0000, 1.0050, 1.0050, 8.0000, 8.0000, 1.4043, 1.0050, 0.3500

num:439, theta:tensor([8.0000, 8.0000, 1.0050, 2.0000, 8.0000, 8.0000, 1.4043, 1.0050, 0.5000]), loss279977.5625
num:440, theta:tensor([8.0000, 8.0000, 1.0050, 2.0000, 8.0000, 8.0000, 1.4043, 1.0050, 0.4398]), loss311698.6875
num:441, theta:tensor([8.0000, 8.0000, 1.0050, 2.0000, 8.0000, 8.0000, 1.4043, 2.0000, 0.2000]), loss379659.46875
num:442, theta:tensor([8.0000, 8.0000, 1.0050, 2.0000, 8.0000, 8.0000, 1.4043, 2.0000, 0.3500]), loss337841.5
num:443, theta:tensor([8.0000, 8.0000, 1.0050, 2.0000, 8.0000, 8.0000, 1.4043, 2.0000, 0.5000]), loss293376.53125
num:444, theta:tensor([8.0000, 8.0000, 1.0050, 2.0000, 8.0000, 8.0000, 1.4043, 2.0000, 0.4398]), loss318498.25
num:445, theta:tensor([8.0000, 8.0000, 1.0050, 2.0000, 8.0000, 8.0000, 1.4043, 1.7850, 0.2000]), loss368116.53125
num:446, theta:tensor([8.0000, 8.0000, 1.0050, 2.0000, 8.0000, 8.0000, 1.4043, 1.7850, 0.3500]), loss332476.21875
num:447, theta:tensor([8.0000, 8.0000, 1.0050, 2.0000, 8.0000, 8.0000, 1.4043, 1.7850, 0.5000]), 

num:512, theta:tensor([8.0000, 8.0000, 1.0050, 1.8675, 8.0000, 8.0000, 1.4043, 1.7850, 0.4398]), loss311205.375
num:513, theta:tensor([8.0000, 8.0000, 2.0000, 0.0100, 8.0000, 8.0000, 0.0100, 0.0100, 0.2000]), loss305435.65625
