In [6]:
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

import os, sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('Algorithm.py'))))
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('Environment.py'))))
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('Replay_Buffer.py'))))
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('Meta_Learner.py'))))

import Algorithm as ALGS
import Environment as ENVS
import Replay_Buffer as RB
from Meta_Learner import SingleAgentMetaLearner

In [7]:
config = {
    
    'Learner': {
        'type': 'DDPG', 
        'episodes': 10_000
    },
    
    'Algorithm': {
        'algorithm': 'DDPG', 
        'replay_buffer': True, 
        'learning_rate': 0.0001, 
        'optimizer': 'Adam', 
        'loss_function': 'MSELoss', 
        'regularizer': 0, 
        'recurrence': 0, 
        'gamma': 0.99, 
        'beta': 0, 
        'epsilon_start': 1, 
        'epsilon_end': 0.02, 
        'epsilon_decay': 5e-05, 
        'c': 1000
    },
    
    'Environment': {
        'env_type': 'Gym', 
        'environment': 'MountainCar-v0', 
        'action_space': 'discrete', 
        'observation_space': 'discrete', 
        'env_render': False, 
        'num_agents': 1
    }, 
    
    'Replay_Buffer': {
        'max_size': 100_000, 
        'batch_size': 64, 
        'num_agents': 1
    }, 
    
    'Agent': {
        'num_agents': 1
    }, 
    
    'Network': {
        'network_actor': {
            'layers': '400,300', 
            'activation_function': 'ReLU', 
            'output_function': 'Tanh', 
            'last_layer': True
        }, 
        'network_critic_head': {
            'layers': '400', 
            'activation_function': 'ReLU', 
            'output_function': '', 
            'last_layer': False
        }, 
        'network_critic_tail': {
            'layers': '300', 
            'activation_function': 'ReLU', 
            'output_function': '', 
            'last_layer': True
        }
    }
}

meta = SingleAgentMetaLearner(ini_path)

In [8]:
env = ENVS.initialize_env(config['Environment'])
env

<Environment.GymEnvironment at 0x7f8d79dfc470>

In [9]:
buffer = RB.initialize_buffer(config['Replay_Buffer'], None, None, None)
buffer

<Replay_Buffer.SimpleExperienceBuffer at 0x7f8d79dd1470>

In [10]:
alg = ALGS.initialize_algorithm(env.get_observation_space(), env.get_action_space(), [config['Algorithm'], config['Agent'], config['Network']])
alg

<Algorithm.DDPGAlgorithm at 0x7f8d79dd1b38>

In [11]:
agent = alg.create_agent()
agent

<Agent.DDPGAgent at 0x7f8d79dd19e8>

In [12]:
agent.actor.net

Sequential(
  (0): Linear(in_features=2, out_features=400, bias=True)
  (1): ReLU()
  (2): Linear(in_features=400, out_features=300, bias=True)
  (3): ReLU()
  (4): Linear(in_features=300, out_features=3, bias=True)
  (5): Tanh()
)

In [13]:
agent.critic.net_head

Sequential(
  (0): Linear(in_features=2, out_features=400, bias=True)
  (1): ReLU()
)

In [14]:
agent.critic.net_tail

Sequential(
  (0): Linear(in_features=403, out_features=300, bias=True)
  (1): ReLU()
  (2): Linear(in_features=300, out_features=1, bias=True)
)

In [89]:
num_of_episodes = 50

step = 0
for i in range(num_of_episodes):
    obs = env.reset()
    done = False
    while not done:
        obs = env.get_observation()
        action = alg.get_action(agent, obs, i)
        next_obs, reward, done = env.step(action)

        experience = [obs, action, reward, done, next_obs]
        buffer.append(experience)
        
        experience = buffer.sample()
                
        alg.update(agent, experience, step)
        step+=1

        obs = next_obs

Hello


RuntimeError: size mismatch, m1: [1 x 64], m2: [2 x 400] at /pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:273

In [None]:

class CriticNetwork(nn.Module):
    
    def __init__(self, input_size, hidden_layer1, hidden_layer2, output_size):
        super().__init__()
        Sequential(
          self.linear1 = nn.Linear(in_features=2, out_features=hidden_layer1, bias=True),
          self.linear2 = nn.Linear(in_features=hidden_layer1, out_features=hidden_layer2, bias=True),
          self.linear3 = nn.Linear(in_features=hidden_layer2, out_features=1, bias=True)
        )
        
    def forward(self,x):
        x = torch.relu(self.linear1(x))
        x = torch.relu(self.linear2(x))
        x = torch.tanh(self.linear3(x))
        return x

In [None]:

class ActorNetwork(nn.Module):
    
    def __init__(self, input_size, hidden_layer1, hidden_layer2, output_size):
        super().__init__()
        Sequential(
          self.linear1 = nn.Linear(in_features=2, out_features=hidden_layer1, bias=True),
          self.linear2 = nn.Linear(in_features=hidden_layer1, out_features=hidden_layer2, bias=True),
          self.linear3 = nn.Linear(in_features=hidden_layer2, out_features=1, bias=True)
        )
        
    def forward(self,x):
        x = torch.relu(self.linear1(x))
        x = torch.relu(self.linear2(x))
        x = torch.tanh(self.linear3(x))
        return x