In [1]:
!pip install scikit-image



In [2]:
from rl_gans.algos.gan import GAN
from rl_gans.model.model import GANs_Model

In [3]:
from rl_gans.utils.argument import parse_args
args = parse_args()
args

Namespace(action_repeat=4, actor_beta=0.9, actor_log_std_max=2, actor_log_std_min=-10, actor_lr=0.001, actor_update_freq=2, agent='gan', agent_image_size=84, alpha_beta=0.5, alpha_lr=0.0001, batch_size=128, critic_beta=0.9, critic_encoder_tau=0.05, critic_lr=0.001, critic_target_update_freq=2, critic_tau=0.01, discount=0.99, discriminator_beta=0.5, discriminator_lr=0.0001, discriminator_update_freq=2, domain_name='cheetah', encoder_feature_dim=50, env_image_size=(84,), eval_freq=10000, frame_stack=3, generator_beta=0.5, generator_lr=0.0001, generator_update_freq=1, hidden_dim=1024, init_steps=1000, init_temperature=0.1, log_interval=25, num_eval_episodes=10, num_filters=32, num_layers=4, num_train_steps=1000000, replay_buffer_capacity=100000, save_buffer=False, save_model=False, save_tb=False, seed=1, task_name='run', work_dir='.')

In [4]:
import gym
from rl_gans.wrappers.pixel_observation_wrapper import PixelObservation

env = gym.make('HalfCheetah-v2')
env = PixelObservation(env,observation_size= args.env_image_size[0] ,normalize=False)

Creating window glfw


In [5]:
env_obs_shape = env.observation_space.shape
print(env_obs_shape)

(3, 84, 84)


In [6]:
z_dim       = 10
device      = 'cuda'
gan_model = GANs_Model(env_obs_shape, z_dim, args.num_layers, args.num_filters, device)

In [7]:
gan = GAN(gan_model, device, args)

In [8]:
import os
import json

with open(os.path.join(args.work_dir, 'args.json'), 'w') as f:
    json.dump(vars(args), f, sort_keys=True, indent=4)

In [9]:
from rl_gans.utils.replay_buffer import make_replay_buffer, ReplayBufferStorage
from pathlib import Path

In [10]:
replay_storage = ReplayBufferStorage(Path(args.work_dir) / 'buffer')

In [11]:
episode, episode_reward, done = 0, 0, True

for step in range(10000):
    if done:
        if step > 0:
            replay_storage.add(obs, None, None, True)  # add the last observation for each episode
        obs = env.reset()
        done = False
        episode_reward = 0
        episode_step = 0
        episode += 1
        print(episode)
    action = env.action_space.sample()
    next_obs, reward, done, _ = env.step(action)

    # allow infinit bootstrap
    done_bool = 0 if episode_step + 1 == 1000 else float(done)
    episode_reward += reward
    replay_storage.add(obs, action, reward, done_bool)    

    obs = next_obs
    episode_step += 1

1
2
3
4
5
6
7
8
9
10


In [12]:
replay_buffer = make_replay_buffer(replay_dir=Path(args.work_dir) / 'buffer',
                                                   max_size=args.replay_buffer_capacity,
                                                   batch_size=args.batch_size,
                                                   num_workers=1,
                                                   save_snapshot=False,
                                                   nstep=1,
                                                   discount=args.discount,
                                                   obs_shape=env_obs_shape,
                                                   device=device,
                                                   image_size=args.agent_image_size,
                                                   image_pad=False)

In [13]:
from rl_gans.utils.logger import Logger
L = Logger(args.work_dir, use_tb=args.save_tb, config=args.agent)

In [14]:
import time

In [15]:
start_time = time.time()

for step in range(args.num_train_steps+1):
    gan.update(replay_buffer, L, step)

RuntimeError: Given groups=1, weight of size [32, 3, 4, 4], expected input[128, 32, 3, 3] to have 3 channels, but got 32 channels instead

In [16]:
obs, _, _, _, _ = replay_buffer.sample()
batch_size = len(obs)

In [17]:
from rl_gans.algos.gan import get_noise
fake_noise = get_noise(batch_size, z_dim, device=device)
fake_noise

tensor([[ 0.3682, -0.1430,  0.7659,  ..., -0.5638, -0.9228,  0.9215],
        [ 0.2166,  0.6272,  0.7412,  ...,  0.3806, -0.4097,  0.4027],
        [ 0.6481,  0.0069,  0.9392,  ...,  0.0620, -0.7477, -0.1487],
        ...,
        [ 1.3160,  0.6830, -0.9897,  ...,  0.4550,  1.1168,  2.3770],
        [ 0.6375,  0.7678,  1.0186,  ...,  0.7207,  1.4929, -0.6468],
        [-1.3938,  0.7681,  1.6862,  ...,  0.4052, -0.0753, -0.2733]],
       device='cuda:0')

In [18]:
fake = gan_model.generator(fake_noise)

In [20]:
fake.shape

torch.Size([128, 32, 3, 3])

ModuleNotFoundError: No module named 'torchsummary'

In [23]:
print(gan_model.discriminator)

Discriminator(
  (disc): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(4, 4), stride=(2, 2))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.2, inplace=True)
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.2, inplace=True)
    )
    (2): Sequential(
      (0): Conv2d(64, 1, kernel_size=(4, 4), stride=(2, 2))
    )
  )
)


In [24]:
!pip install torchsummary 

Collecting torchsummary
  Downloading torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1


In [26]:
from torchsummary import summary
summary(gan_model.discriminator,(3, 84, 84))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 41, 41]           1,568
       BatchNorm2d-2           [-1, 32, 41, 41]              64
         LeakyReLU-3           [-1, 32, 41, 41]               0
            Conv2d-4           [-1, 64, 19, 19]          32,832
       BatchNorm2d-5           [-1, 64, 19, 19]             128
         LeakyReLU-6           [-1, 64, 19, 19]               0
            Conv2d-7              [-1, 1, 8, 8]           1,025
Total params: 35,617
Trainable params: 35,617
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.08
Forward/backward pass size (MB): 1.76
Params size (MB): 0.14
Estimated Total Size (MB): 1.98
----------------------------------------------------------------


In [29]:
summary(gan_model.generator,(1,10))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
   ConvTranspose2d-1             [-1, 32, 3, 3]           2,912
       BatchNorm2d-2             [-1, 32, 3, 3]              64
              ReLU-3             [-1, 32, 3, 3]               0
Total params: 2,976
Trainable params: 2,976
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 0.01
Estimated Total Size (MB): 0.02
----------------------------------------------------------------


In [30]:
84 /32

2.625