In [15]:
import sys
import gym
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
from tqdm import tqdm
from ddpg import DDPGagent
from utils import *
from IPython.display import clear_output

In [16]:
NUM_EPISODES = 50


In [17]:
env = NormalizedEnv(gym.make("Pendulum-v0"))

agent = DDPGagent(env)
noise = OUNoise(env.action_space)
batch_size = 128
rewards = []
avg_rewards = []

for episode in tqdm(range(NUM_EPISODES)):
    state = env.reset()
    noise.reset()
    episode_reward = 0

    for step in range(500):
        action = agent.get_action(state)
        action = noise.get_action(action, step)
        new_state, reward, done, _ = env.step(action)
        agent.memory.push(state, action, reward, new_state, done)

        if len(agent.memory) > batch_size:
            agent.update(batch_size)

        state = new_state
        episode_reward += reward

        if done:
            sys.stdout.write("episode: {}, reward: {}, average _reward: {} \n".format(episode, np.round(episode_reward, decimals=2), np.mean(rewards[-10:])))
            break

    rewards.append(episode_reward)
    avg_rewards.append(np.mean(rewards[-10:]))
agent.save('ddpg_actor.pt')
plt.plot(rewards)
plt.plot(avg_rewards)
plt.plot()
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.show()

NameError: name 'tqdm' is not defined

In [None]:
def np2tsr(state):
    return torch.from_numpy(state).float().unsqueeze(0)

In [None]:
model = torch.load('ddpg_actor.pt')
state = np2tsr(env.reset())
for step in range(500):
    env.render()
    action = model(state).squeeze().detach().numpy()
    new_state, reward, done, _ = env.step(action)
    state = np2tsr(new_state)
    episode_reward += reward

    if done:
        env.close()
        print(f"episode: {episode}, reward: {np.round(episode_reward, decimals=2)}, average _reward: {np.mean(rewards[-10:])} \n")
        break