In [1]:
import gymnasium as gym
from replay_buffer import ReplayBuffer

In [2]:
render = False
if render:
    env = gym.make('Pendulum-v1', g=9.81, render_mode="human")
else:
    env = gym.make('Pendulum-v1', g=9.81)
max_episodes = 1000
max_steps = 200

obs_dim = env.observation_space.shape[0]
act_dim = env.action_space.shape[0]
buf_dim = int(max_episodes*max_steps)

# create training set
seed = 1
training_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim, size=buf_dim)

for episode in range(max_episodes):
    observation, info = env.reset(seed=seed)
    for steps in range(max_steps+1):
        action = env.action_space.sample()  # agent policy that uses the observation and info
        next_observation, reward, terminated, truncated, info = env.step(action)

        done = terminated or truncated
        training_buffer.store(observation, action, reward, next_observation, done)

        env.render()

        observation = next_observation

        if done:
            done = False
            break

print("Finished creating the training set")

  gym.logger.warn(


Finished creating the training set


In [3]:
# create test set
max_episodes_test = 100
buf_dim = int(max_episodes*max_steps)

seed = 7
testing_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim, size=buf_dim)

for episode in range(max_episodes_test):
    observation, info = env.reset(seed=seed)
    for steps in range(max_steps + 1):
        action = env.action_space.sample()  # agent policy that uses the observation and info
        next_observation, reward, terminated, truncated, info = env.step(action)

        done = terminated or truncated
        testing_buffer.store(observation, action, reward, next_observation, done)

        env.render()

        observation = next_observation

        if done:
            done = False
            break

print("Finished creating the test set")

Finished creating the test set


In [4]:
# learning the reward function of the pendulum
from models import FCNN, SparseFCNN, L0SINDy_reward
from trainer import train_eval_reward_model
import torch

h_dim = 64
lr = 3e-4
batch_size = 256
num_epochs = 50

In [5]:
fcnn_model = FCNN(input_dim=obs_dim+act_dim, output_dim=1, h_dim=h_dim)

if torch.cuda.is_available():
    fcnn_model = fcnn_model.cuda()

optimizer_fcnn = torch.optim.Adam([
    {'params': fcnn_model.parameters()},
], lr=lr, weight_decay=0.0)

metrics_fcnn = train_eval_reward_model(fcnn_model, optimizer_fcnn, training_buffer, testing_buffer, batch_size, num_epochs)
print("Best testing error FCNN is {} and it was found at epoch {}".format(metrics_fcnn[2], metrics_fcnn[3]))

====> Epoch: 0 Average train loss: 5.0853429799
====> Epoch: 0 Average eval loss: 0.8065864444
====> Epoch: 1 Average train loss: 0.4510329755
====> Epoch: 1 Average eval loss: 0.2876272798
====> Epoch: 2 Average train loss: 0.1860080506
====> Epoch: 2 Average eval loss: 0.1615861952
====> Epoch: 3 Average train loss: 0.1095907881
====> Epoch: 3 Average eval loss: 0.1100310534
====> Epoch: 4 Average train loss: 0.0722869541
====> Epoch: 4 Average eval loss: 0.0722475722
====> Epoch: 5 Average train loss: 0.0474292982
====> Epoch: 5 Average eval loss: 0.0481063426
====> Epoch: 6 Average train loss: 0.0307188731
====> Epoch: 6 Average eval loss: 0.0303153489
====> Epoch: 7 Average train loss: 0.0208845843
====> Epoch: 7 Average eval loss: 0.0208203830
====> Epoch: 8 Average train loss: 0.0145904721
====> Epoch: 8 Average eval loss: 0.0154579673
====> Epoch: 9 Average train loss: 0.0109056870
====> Epoch: 9 Average eval loss: 0.0110428771
====> Epoch: 10 Average train loss: 0.0082877376
=

In [6]:
reg_coefficient = 0.00001
sparsefcnn_model = SparseFCNN(input_dim=obs_dim+act_dim, output_dim=1, h_dim=h_dim, lambda_coeff=reg_coefficient)

if torch.cuda.is_available():
    sparsefcnn_model = sparsefcnn_model.cuda()

optimizer_sparsefcnn = torch.optim.Adam([
    {'params': sparsefcnn_model.parameters()},
], lr=lr, weight_decay=0.0)

metrics_sparsefcnn = train_eval_reward_model(sparsefcnn_model, optimizer_sparsefcnn, training_buffer, testing_buffer,
                                               batch_size, num_epochs, l0=True)
print("Best testing error sparse FCNN is {} and it was found at epoch {}".format(metrics_sparsefcnn[2], metrics_sparsefcnn[3]))


  init.kaiming_normal(self.weights, mode='fan_out')


L0Dense(4 -> 64, droprate_init=0.5, lamba=1e-05, temperature=0.6666666666666666, weight_decay=0.0, local_rep=False)
L0Dense(64 -> 64, droprate_init=0.5, lamba=1e-05, temperature=0.6666666666666666, weight_decay=0.0, local_rep=False)
L0Dense(64 -> 1, droprate_init=0.5, lamba=1e-05, temperature=0.6666666666666666, weight_decay=0.0, local_rep=False)
====> Epoch: 0 Average train loss: 19.8749318854
====> Epoch: 0 Average L0 reg loss: 0.0368784434
====> Epoch: 0 Average eval loss: 5.6155309677
====> Epoch: 1 Average train loss: 10.0457300331
====> Epoch: 1 Average L0 reg loss: 0.0370998036
====> Epoch: 1 Average eval loss: 2.2697939873
====> Epoch: 2 Average train loss: 7.6165008103
====> Epoch: 2 Average L0 reg loss: 0.0372092764
====> Epoch: 2 Average eval loss: 1.7601346970
====> Epoch: 3 Average train loss: 6.5303920369
====> Epoch: 3 Average L0 reg loss: 0.0372833702
====> Epoch: 3 Average eval loss: 1.2636929750
====> Epoch: 4 Average train loss: 5.8402730426
====> Epoch: 4 Average L0

In [7]:
degree = 3
reg_coefficient = 0.01
l0sindy_model = L0SINDy_reward(input_dim=obs_dim+act_dim, output_dim=1, degree=degree, lambda_coeff=reg_coefficient)

if torch.cuda.is_available():
    l0sindy_model = l0sindy_model.cuda()

optimizer_fcnn = torch.optim.Adam([
    {'params': l0sindy_model.parameters()},
], lr=lr, weight_decay=0.0)

metrics_l0sindy = train_eval_reward_model(l0sindy_model, optimizer_fcnn, training_buffer, testing_buffer, batch_size, num_epochs, l0=True)
print("Best testing error L0 SINDy is {} and it was found at epoch {}".format(metrics_l0sindy[2], metrics_l0sindy[3]))


policy polynomial of order  3
with 35 coefficients
['1' 'x0' 'x1' 'x2' 'x3' 'x0^2' 'x0 x1' 'x0 x2' 'x0 x3' 'x1^2' 'x1 x2'
 'x1 x3' 'x2^2' 'x2 x3' 'x3^2' 'x0^3' 'x0^2 x1' 'x0^2 x2' 'x0^2 x3'
 'x0 x1^2' 'x0 x1 x2' 'x0 x1 x3' 'x0 x2^2' 'x0 x2 x3' 'x0 x3^2' 'x1^3'
 'x1^2 x2' 'x1^2 x3' 'x1 x2^2' 'x1 x2 x3' 'x1 x3^2' 'x2^3' 'x2^2 x3'
 'x2 x3^2' 'x3^3']
L0Dense(35 -> 1, droprate_init=0.5, lamba=0.01, temperature=0.6666666666666666, weight_decay=0.0, local_rep=False, bias=False)


  init.kaiming_normal(self.weights, mode='fan_out')


====> Epoch: 0 Average train loss: 860.1311251191
====> Epoch: 0 Average L0 reg loss: 0.2908181244
====> Epoch: 0 Average eval loss: 191.3743743896
====> Epoch: 1 Average train loss: 220.3858759361
====> Epoch: 1 Average L0 reg loss: 0.2911279030
====> Epoch: 1 Average eval loss: 28.2232437134
====> Epoch: 2 Average train loss: 42.9168661465
====> Epoch: 2 Average L0 reg loss: 0.2911557696
====> Epoch: 2 Average eval loss: 4.3544363976
====> Epoch: 3 Average train loss: 14.5236438739
====> Epoch: 3 Average L0 reg loss: 0.2908907707
====> Epoch: 3 Average eval loss: 4.2804379463
====> Epoch: 4 Average train loss: 10.4002836793
====> Epoch: 4 Average L0 reg loss: 0.2905083405
====> Epoch: 4 Average eval loss: 3.8734283447
====> Epoch: 5 Average train loss: 9.1733456146
====> Epoch: 5 Average L0 reg loss: 0.2900795895
====> Epoch: 5 Average eval loss: 3.0318460464
====> Epoch: 6 Average train loss: 8.0199446121
====> Epoch: 6 Average L0 reg loss: 0.2894383367
====> Epoch: 6 Average eval l

In [None]:
# creating the plots
import matplotlib.pyplot as plt
import os

fig, (ax1, ax2) = plt.subplots(1, 2)
fig.suptitle('Training and Evaluation Metrics')

data_train = {'FCNN (train)': metrics_fcnn[0], 'SparseFCNN (train)': metrics_sparsefcnn[0], 'L0SINDy (train)': metrics_l0sindy[0]}
methods_train = list(data_train.keys())
values_train = list(data_train.values())

# creating the bar plot
ax1.bar(methods_train, values_train, color='maroon', width=0.4)

data_eval = {'FCNN (eval)': metrics_fcnn[1], 'SparseFCNN (eval)': metrics_sparsefcnn[1], 'L0SINDy (eval)': metrics_l0sindy[0]}
methods_eval = list(data_eval.keys())
values_eval = list(data_eval.values())

ax2.bar(methods_eval, values_eval, color='blue', width=0.4)

save_dir = "figures"
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
fig.savefig('figures/LearningReward.png', dpi=300)