In [2]:
%load_ext autoreload
%autoreload 2

In [38]:
import gym
import gym_combrf

import numpy as np
import random
import math
import matplotlib.pyplot as plt
import tqdm
import os

import torch
import torch.optim as optim
import torch.nn.functional as F




In [7]:
#initialize gym environment
env = gym.make('combrf-v0')

In [6]:
#hyper-parameters
UPDATE_EVERY = 50 #how often to update the network
EPS_START = 1
EPS_END = 0.01
EPS_DECAY = 0.9983
EPS_STEP_LIMIT = 50

TRAIN_EPISODES = 20
TEST_EPISODES = 1
seed = 0

#initialize GPU device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


## Examine state and action spaces

In [9]:
#reset the environment
obs = env.reset()

#size of each action
action_size = env.action_space.shape
print("Size of each action: ", action_size)

#size of observation
print("Size of each observation: ", obs.shape)
print("Observation looks like:")
print(obs)

Size of each action:  (1,)
Size of each observation:  (65,)
Observation looks like:
[-1.40685647e-07-6.56304894e-08j  1.50340783e-07-3.86970426e-08j
 -9.15157490e-08+1.25398086e-07j -8.99467536e-09-1.54980347e-07j
  1.05408028e-07+1.13969118e-07j -1.53808063e-07-2.10449971e-08j
  1.32148582e-07-8.14651121e-08j -5.02954707e-08+1.46867893e-07j
  1.50340783e-07-3.86970426e-08j -9.15157490e-08+1.25398086e-07j
 -8.99467536e-09-1.54980347e-07j  1.05408028e-07+1.13969118e-07j
 -1.53808063e-07-2.10449971e-08j  1.32148582e-07-8.14651121e-08j
 -5.02954707e-08+1.46867893e-07j -5.44672093e-08-1.45372402e-07j
 -9.15157490e-08+1.25398086e-07j -8.99467536e-09-1.54980347e-07j
  1.05408028e-07+1.13969118e-07j -1.53808063e-07-2.10449971e-08j
  1.32148582e-07-8.14651121e-08j -5.02954707e-08+1.46867893e-07j
 -5.44672093e-08-1.45372402e-07j  1.34420096e-07+7.76598365e-08j
 -8.99467536e-09-1.54980347e-07j  1.05408028e-07+1.13969118e-07j
 -1.53808063e-07-2.10449971e-08j  1.32148582e-07-8.14651121e-08j
 -5.02

## Perform random actions in the environment

In [37]:
obs = env.reset()
count = 0
ep_rwd=0
while True:
    action = np.array([round(np.random.uniform(0,2*math.pi), 2)])
    obs, rwd, done, _ = env.step(action)
    ep_rwd += rwd
    count +=1
    if done:
        break
print("Random TX location: {0}".format(env.tx_loc))
print("Capacity: {0}".format(env.cap))
print("Episodic score: {0} with episode length: {1} ".format(ep_rwd, count))

Random TX location: [[ -50 -100    0]]
Capacity: 55.53076991098181
Episodic score: 3.2813957174806715e-10 with episode length: 8 


## Instantiate DDPG agent

In [43]:
#import custom classes
from Source.ddpg_rcv_agent import Agent
from Source.nn_model import Actor, Critic
from Source.ddpg_rcv_agent import Agent, ReplayBuffer, OUNoise
from collections import deque

#reset the environment
obs = env.reset()

state_size = obs.shape[0]
action_size = env.action_space.shape[0]

#instantiate the agent
agent = Agent(state_size=state_size, action_size=action_size, seed=0)

print(type(obs))
obs_tensor = torch.from_numpy(obs).to(device)
print(obs_tensor)

<class 'numpy.ndarray'>


TypeError: can't convert np.ndarray of type numpy.complex128. The only supported types are: float64, float32, float16, int64, int32, int16, int8, uint8, and bool.

## DDPG Training

In [None]:
train_episodes = 400
print_every= 100

scores_deque = deque(maxlen=print_every)
scores = []

for i_episode in range(1,train_episodes+1):
    
    # reset the environment
    obs = env.reset()
    agent.reset()
        
    score = 0
    while True:
        action = agent.act(obs)
        env_info = env.step(action)[brain_name]
        next_state = env_info.vector_observations
        reward = env_info.rewards
        done = env_info.local_done[0]
        agent.step(state, action, reward, next_state, done)
        state = next_state
        score+=env_info.rewards
        t_step +=1
        if done:
            #print('t_step: {}'.format(t_step))
            break
            
    scores_deque.append(score[0])
    scores.append(score[0])
    #print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)), end="")
    torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
    torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')
    #if i_episode % print_every == 0:
    #    print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))
    
    
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show() 