# Preparation

In [None]:
import subprocess
import sys
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'gym'])

import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

PYTHON_PATH = '~/anaconda3/envs/tensorflow_p36/bin/python3'

def train(chapter_dir_path, main_file_name, max_episode_num):
    ! export TF_CPP_MIN_LOG_LEVEL=2 ; \
    cd {chapter_dir_path} ; \
    {PYTHON_PATH} {main_file_name} {max_episode_num}

def plot_result(rewards_file_path):
    rewards_file = open(rewards_file_path, 'r')
    rewards = [ float(line) for line in rewards_file.readlines() ]
    plt.plot(rewards)
    plt.show()

def test(chapter_dir_path, load_play_file_name):
    ! export TF_CPP_MIN_LOG_LEVEL=2 ; \
    cd {chapter_dir_path} ; \
    {PYTHON_PATH} {load_play_file_name}

# Chapter 3. Policy Gradient

### Train

In [None]:
max_episode_num = 10000
chapter_dir_path = os.path.join(os.getcwd(), 'Chap3')
main_file_name = os.path.join(chapter_dir_path, 'pg_main.py')
train(chapter_dir_path, main_file_name, max_episode_num)

rewards_file_path = os.path.join(chapter_dir_path, 'save_weights', 'pendulum_epi_reward.txt')
plot_result(rewards_file_path)

### Test

In [None]:
load_play_file_name = 'pg_load_play.py'
test(chapter_dir_path, load_play_file_name)

# Chapter 4. A2C

### Train

In [None]:
max_episode_num = 1000
chapter_dir_path = os.path.join(os.getcwd(), 'Chap4')
main_file_name = os.path.join(chapter_dir_path, 'a2c_main.py')
train(chapter_dir_path, main_file_name, max_episode_num)

rewards_file_path = os.path.join(chapter_dir_path, 'save_weights', 'pendulum_epi_reward.txt')
plot_result(rewards_file_path)

### Test

In [None]:
load_play_file_name = 'a2c_load_play.py'
test(chapter_dir_path, load_play_file_name)

# Chapter 5. A3C (Gradient Parallelism)

### Train

In [None]:
max_episode_num = 1000
chapter_dir_path = os.path.join(os.getcwd(), 'Chap5', 'A3CGradient')
main_file_name = os.path.join(chapter_dir_path, 'a3c_main.py')
train(chapter_dir_path, main_file_name, max_episode_num)

rewards_file_path = os.path.join(chapter_dir_path, 'save_weights', 'pendulum_epi_reward.txt')
plot_result(rewards_file_path)

In [None]:
load_play_file_name = 'a3c_load_play.py'
test(chapter_dir_path, load_play_file_name)

# Chapter 5. A3C (Data Parallelism)

### Train

In [None]:
max_episode_num = 1000
chapter_dir_path = os.path.join(os.getcwd(), 'Chap5', 'A3CData')
main_file_name = os.path.join(chapter_dir_path, 'a3c_main.py')
train(chapter_dir_path, main_file_name, max_episode_num)

rewards_file_path = os.path.join(chapter_dir_path, 'save_weights', 'pendulum_epi_reward.txt')
plot_result(rewards_file_path)

In [None]:
load_play_file_name = 'a3c_load_play.py'
test(chapter_dir_path, load_play_file_name)

# Chapter 6. PPO

### Train

In [None]:
max_episode_num = 1000
chapter_dir_path = os.path.join(os.getcwd(), 'Chap6')
main_file_name = os.path.join(chapter_dir_path, 'ppo_main.py')
train(chapter_dir_path, main_file_name, max_episode_num)

rewards_file_path = os.path.join(chapter_dir_path, 'save_weights', 'pendulum_epi_reward.txt')
plot_result(rewards_file_path)

In [None]:
load_play_file_name = 'ppo_load_play.py'
test(chapter_dir_path, load_play_file_name)