<a href="https://colab.research.google.com/github/sibyjackgrove/gym-SolarPVDER-environment/blob/master/examples/gym_PVDER_environment_import_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Clone gym-PVDER repository and install it

In [None]:
!git clone https://github.com/sibyjackgrove/gym-SolarPVDER-environment.git

## Go to directory and do pip install

In [None]:
cd gym-SolarPVDER-environment

In [None]:
!git pull

In [None]:
pip install -e .

## Import the necessary modules

In [None]:
import gym
import gym_PVDER
import tensorflow as tf

from tf_agents.agents.dqn import dqn_agent
from tf_agents.agents.dqn import q_network
from tf_agents.drivers import dynamic_step_driver
from tf_agents.environments import suite_gym
from tf_agents.environments import tf_py_environment
from tf_agents.environments import trajectory
from tf_agents.metrics import metric_utils
from tf_agents.metrics import tf_metrics
from tf_agents.policies import random_tf_policy
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.utils import common

## Hyperparameters


In [None]:
env_name = 'PVDER-v0'  # @param
num_iterations = 20000  # @param

initial_collect_steps = 1000  # @param
collect_steps_per_iteration = 1  # @param
replay_buffer_capacity = 100000  # @param

fc_layer_params = (100,)

batch_size = 64  # @param
learning_rate = 1e-3  # @param
log_interval = 200  # @param

num_eval_episodes = 10  # @param
eval_interval = 1000  # @param

## Environment

In [None]:
env = suite_gym.load(env_name) #Load environment using tf-agents environment loader for gym
env.render()
print('Observation Spec:')
print(env.time_step_spec().observation)
print('Action Spec:')
print(env.action_spec())

In [None]:
train_py_env = suite_gym.load(env_name)
eval_py_env = suite_gym.load(env_name)

train_env = tf_py_environment.TFPyEnvironment(train_py_env)
eval_env = tf_py_environment.TFPyEnvironment(eval_py_env)

## Create a DQN agent

In [None]:
q_net = q_network.QNetwork(train_env.observation_spec(),
                           train_env.action_spec(),
                           fc_layer_params=fc_layer_params)
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)

train_step_counter = tf.compat.v2.Variable(0)

tf_agent = dqn_agent.DqnAgent(train_env.time_step_spec(),
                              train_env.action_spec(),
                              q_network=q_net,
                              optimizer=optimizer,
                              td_errors_loss_fn=dqn_agent.element_wise_squared_loss,
                              train_step_counter=train_step_counter)
tf_agent.initialize()

## Create policies from the agent

In [None]:
eval_policy = tf_agent.policy
collect_policy = tf_agent.collect_policy
random_policy = random_tf_policy.RandomTFPolicy(time_step_spec=train_env.time_step_spec(),
                                                action_spec=train_env.action_spec())

## Evaluate policy using tf-agents drivers

In [None]:
average_return = tf_metrics.AverageReturnMetric()
num_episodes = tf_metrics.NumberOfEpisodes()
env_steps = tf_metrics.EnvironmentSteps()
observers = [average_return, num_episodes, env_steps]

driver = dynamic_episode_driver.DynamicEpisodeDriver(eval_env, random_policy, observers, num_episodes=2)

# Initial driver.run will reset the environment and initialize the policy.
final_time_step, policy_state = driver.run()

print('final_time_step', final_time_step)
print('Number of Steps: ', env_steps.result().numpy())
print('Number of Episodes: ', num_episodes.result().numpy())
print('Average Return: ', average_return.result())