# Tensorforce

### Why Tensorforce?
We've seen how we can create a model using Tensorflow (Keras). Works great, but it can be done easier. 
This is where Tensorforce comes in. Tensorforce is build on top of Tensorflow, with the goal to make it easier to apply reinforcement learning.

In [None]:
# Imports
import os
import logging

import tensorflow as tf

from tensorforce.agents import Agent
from tensorforce.environments import Environment
from tensorforce.execution import Runner

import numpy as np

In [None]:
# Set logging settings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
logger = tf.get_logger()
logger.setLevel(logging.ERROR)

### Opdracht
Use the Tensorforce library to create a similar model to the one made in the previous exercise. All needed imports are already there.

In [None]:
# Create an OpenAI-Gym environment
environment = Environment.create(environment='gym', level='CartPole-v1', visualize=False)

In [None]:
# Create a PPO agent
agent = Agent.create(
    agent='ppo', environment=environment,
    # Automatically configured network
    network='auto',
    # Optimization
    batch_size=10, update_frequency=2, learning_rate=1e-3, subsampling_fraction=0.2,
    optimization_steps=5,
    # Reward estimation
    likelihood_ratio_clipping=0.2, discount=0.99, estimate_terminal=False,
    # Critic
    critic_network='auto',
    critic_optimizer=dict(optimizer='adam', multi_step=10, learning_rate=1e-3),
    # Preprocessing
    preprocessing=None,
    # Exploration
    exploration=0.0, variable_noise=0.0,
    # Regularization
    l2_regularization=0.0, entropy_regularization=0.0,
    # TensorFlow etc
    name='agent', device=None, parallel_interactions=4, seed=None, execution=None, saver=None,
    summarizer=None, recorder=None
)

In [None]:
# Initialize the runner
runner = Runner(agent=agent, environment=environment)

In [None]:
# Start the runner
runner.run(num_episodes=300)
runner.close()

In [None]:
# Print statistics
print(f"Learning finished. Total episodes: {runner.episodes}. Average reward of last 100 episodes: {np.mean(runner.episode_rewards[-100:])}")

In [None]:
# Evaluate and visualize model
environment.visualize = True
runner.run(num_episodes=100, evaluation=True)