<a href="https://colab.research.google.com/github/tim108108/Python/blob/master/RLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/gdrive') 
# %cd /content/gdrive/MyDrive/Colab Notebooks/

Mounted at /content/gdrive




```
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1
!pip install colabgymrender==1.0.2 -q

import pyvirtualdisplay
import gym
from colabgymrender.recorder import Recorder

env = gym.make("CartPole-v0")
directory = './video'
env = Recorder(env, directory)

observation = env.reset()
terminal = False
while not terminal:
  action = env.action_space.sample()
  observation, reward, terminal, info = env.step(action)

env.play()
```



#### Recorder

In [None]:
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1

from pyvirtualdisplay import Display
from moviepy.editor import *
# import time
import gym
import cv2
import os

class Recorder:
  def __init__(self, env, directory, fps=None):
    display = Display()
    display.start()
    if not os.path.exists(directory):
      os.mkdir(directory)
    self.env = env
    self.directory = directory
    self.env.reset()
    self.width, self.height, _ = self.env.render(mode = 'rgb_array').shape
    self.fps = fps if not fps is None else self.env.metadata['video.frames_per_second'] if hasattr(self.env.metadata, 'video.frames_per_second') else 30
    self.writer = None
    self.paused = False
    self.display = Display(size = (self.width, self.height))
    self.display.start()
    self.count = 0

  def pause(self):
    self.paused = True

  def resume(self):
    self.paused = False
    
  def __getattr__(self, name):
    if name in ['env', 'path', 'directory', 'play', 'width', 'height', 'fps', 'writer', 'reset', 'step']:
      return self.__getattr__(name)
    else:
      return self.env.__getattr__(name)

  def reset(self):
    observation = self.env.reset()
    # now = time.time()
    self.count += 1 
    self.path = f'{self.directory}/{self.count}.mp4'
    self.writer = cv2.VideoWriter(self.path, cv2.VideoWriter_fourcc(*'MP4V'), self.fps, (self.height, self.width))
    if not self.paused:
      self.writer.write(cv2.cvtColor(self.env.render(mode = 'rgb_array'), cv2.COLOR_RGB2BGR))
    return observation

  def step(self, action):
    observation, reward, terminal, info = self.env.step(action)
    if not self.paused:
      self.writer.write(cv2.cvtColor(self.env.render(mode = 'rgb_array'), cv2.COLOR_RGB2BGR))
    if terminal:
      self.writer.release()
    return observation, reward, terminal, info

  def play(self):
    if not self.display.is_alive():
      self.display.start()

    with VideoFileClip(self.path) as video:
      return video.ipython_display(width = self.width, height = self.height)

Imageio: 'ffmpeg-linux64-v3.3.1' was not found on your computer; downloading it now.
Try 1. Download from https://github.com/imageio/imageio-binaries/raw/master/ffmpeg/ffmpeg-linux64-v3.3.1 (43.8 MB)
Downloading: 8192/45929032 bytes (0.0%)647168/45929032 bytes (1.4%)3596288/45929032 bytes (7.8%)6717440/45929032 bytes (14.6%)9838592/45929032 bytes (21.4%)12943360/45929032 bytes (28.2%)16302080/45929032 bytes (35.5%)19587072/45929032 bytes (42.6%)22962176/45929032 bytes (50.0%)26116096/45929032 bytes (56.9%)29368320/45929032 bytes (63.9%)32612352/45929032 bytes (71.0%)35930112/45929032 bytes (78.2%)38961

#### Demo CartPole




In [None]:
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1

# !pip install colabgymrender==1.0.2 -q

import pyvirtualdisplay
import gym
# from colabgymrender.recorder import Recorder

env = gym.make("CartPole-v1")
directory = './video'
# directory = '/content/gdrive/MyDrive/Colab Notebooks/scores/video'

env = Recorder(env, directory)

observation = env.reset()
terminal = False
while not terminal:
  action = env.action_space.sample()
  observation, reward, terminal, info = env.step(action)
env.play()

100%|██████████| 15/15 [00:00<00:00, 188.10it/s]


#### DQN
[github](https://github.com/gsurma/cartpole)

In [None]:
from statistics import mean
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from collections import deque
import os
import csv
import numpy as np

SCORES_CSV_PATH = "/content/gdrive/MyDrive/Colab Notebooks/scores/scores.csv"
SCORES_PNG_PATH = "/content/gdrive/MyDrive/Colab Notebooks/scores/scores.png"
SOLVED_CSV_PATH = "/content/gdrive/MyDrive/Colab Notebooks/scores/solved.csv"
SOLVED_PNG_PATH = "/content/gdrive/MyDrive/Colab Notebooks/scores/solved.png"
AVERAGE_SCORE_TO_SOLVE = 195
CONSECUTIVE_RUNS_TO_SOLVE = 100


class ScoreLogger:

    def __init__(self, env_name):
        self.scores = deque(maxlen=CONSECUTIVE_RUNS_TO_SOLVE)
        self.env_name = env_name

        if os.path.exists(SCORES_PNG_PATH):
            os.remove(SCORES_PNG_PATH)
        if os.path.exists(SCORES_CSV_PATH):
            os.remove(SCORES_CSV_PATH)

    def add_score(self, score, run):
        self._save_csv(SCORES_CSV_PATH, score)
        self._save_png(input_path=SCORES_CSV_PATH,
                       output_path=SCORES_PNG_PATH,
                       x_label="runs",
                       y_label="scores",
                       average_of_n_last=CONSECUTIVE_RUNS_TO_SOLVE,
                       show_goal=True,
                       show_trend=True,
                       show_legend=True)
        self.scores.append(score)
        mean_score = mean(self.scores)
        print ("Scores: (min: " + str(min(self.scores)) + ", avg: " + str(mean_score) + ", max: " + str(max(self.scores)) + ")\n")
        if mean_score >= AVERAGE_SCORE_TO_SOLVE and len(self.scores) >= CONSECUTIVE_RUNS_TO_SOLVE:
            solve_score = run-CONSECUTIVE_RUNS_TO_SOLVE
            print ("Solved in " + str(solve_score) + " runs, " + str(run) + " total runs.")
            self._save_csv(SOLVED_CSV_PATH, solve_score)
            self._save_png(input_path=SOLVED_CSV_PATH,
                           output_path=SOLVED_PNG_PATH,
                           x_label="trials",
                           y_label="steps before solve",
                           average_of_n_last=None,
                           show_goal=False,
                           show_trend=False,
                           show_legend=False)
            exit()

    def _save_png(self, input_path, output_path, x_label, y_label, average_of_n_last, show_goal, show_trend, show_legend):
        x = []
        y = []
        with open(input_path, "r") as scores:
            reader = csv.reader(scores)
            data = list(reader)
            for i in range(0, len(data)):
                x.append(int(i))
                y.append(int(data[i][0]))

        plt.subplots()
        plt.plot(x, y, label="score per run")

        average_range = average_of_n_last if average_of_n_last is not None else len(x)
        plt.plot(x[-average_range:], [np.mean(y[-average_range:])] * len(y[-average_range:]), linestyle="--", label="last " + str(average_range) + " runs average")

        if show_goal:
            plt.plot(x, [AVERAGE_SCORE_TO_SOLVE] * len(x), linestyle=":", label=str(AVERAGE_SCORE_TO_SOLVE) + " score average goal")

        if show_trend and len(x) > 1:
            trend_x = x[1:]
            z = np.polyfit(np.array(trend_x), np.array(y[1:]), 1)
            p = np.poly1d(z)
            plt.plot(trend_x, p(trend_x), linestyle="-.",  label="trend")

        plt.title(self.env_name)
        plt.xlabel(x_label)
        plt.ylabel(y_label)

        if show_legend:
            plt.legend(loc="upper left")

        plt.savefig(output_path, bbox_inches="tight")
        plt.close()

    def _save_csv(self, path, score):
        if not os.path.exists(path):
            with open(path, "w"):
                pass
        scores_file = open(path, "a")
        with scores_file:
            writer = csv.writer(scores_file)
            writer.writerow([score])

In [None]:
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1
!pip install pyglet
!pip install keras
# !pip install colabgymrender==1.0.2 -q

import pyvirtualdisplay
import random
import gym
import numpy as np
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
# from colabgymrender.recorder import Recorder
# from scores.score_logger import ScoreLogger




ENV_NAME = "CartPole-v1"

GAMMA = 0.95
LEARNING_RATE = 0.001

MEMORY_SIZE = 1000000
BATCH_SIZE = 20

EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01
EXPLORATION_DECAY = 0.995


class DQNSolver:

    def __init__(self, observation_space, action_space):
        self.exploration_rate = EXPLORATION_MAX

        self.action_space = action_space
        self.memory = deque(maxlen=MEMORY_SIZE)

        self.model = Sequential()
        self.model.add(Dense(24, input_shape=(observation_space,), activation="relu"))
        self.model.add(Dense(24, activation="relu"))
        self.model.add(Dense(self.action_space, activation="linear"))
        self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() < self.exploration_rate:
            return random.randrange(self.action_space)
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])

    def experience_replay(self):
        if len(self.memory) < BATCH_SIZE:
            return
        batch = random.sample(self.memory, BATCH_SIZE)
        for state, action, reward, state_next, terminal in batch:
            q_update = reward
            if not terminal:
                q_update = (reward + GAMMA * np.amax(self.model.predict(state_next)[0]))
            q_values = self.model.predict(state)
            q_values[0][action] = q_update
            self.model.fit(state, q_values, verbose=0)
        self.exploration_rate *= EXPLORATION_DECAY
        self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)


def cartpole():
    env = gym.make(ENV_NAME)
    directory = '/content/gdrive/MyDrive/Colab Notebooks/scores/video'
    env = Recorder(env, directory)
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space)
    run = 0
    for _ in range(100):
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        while True:
            step += 1
            #env.render()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if terminal:
                print ("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
                score_logger.add_score(step, run)
                break
            dqn_solver.experience_replay()


if __name__ == "__main__":
    cartpole()

Run: 1, exploration: 0.9046104802746175, score: 40
Scores: (min: 40, avg: 40, max: 40)

Run: 2, exploration: 0.7183288830986236, score: 47
Scores: (min: 40, avg: 43.5, max: 47)

Run: 3, exploration: 0.6900935609921609, score: 9
Scores: (min: 9, avg: 32, max: 47)

Run: 4, exploration: 0.6401093727576664, score: 16
Scores: (min: 9, avg: 28, max: 47)

Run: 5, exploration: 0.5967292370047992, score: 15
Scores: (min: 9, avg: 25.4, max: 47)

Run: 6, exploration: 0.5732736268885887, score: 9
Scores: (min: 9, avg: 22.666666666666668, max: 47)

Run: 7, exploration: 0.547986285490042, score: 10
Scores: (min: 9, avg: 20.857142857142858, max: 47)

Run: 8, exploration: 0.5238143793828016, score: 10
Scores: (min: 9, avg: 19.5, max: 47)

Run: 9, exploration: 0.49571413690105054, score: 12
Scores: (min: 9, avg: 18.666666666666668, max: 47)

Run: 10, exploration: 0.46677573701590436, score: 13
Scores: (min: 9, avg: 18.1, max: 47)

Run: 11, exploration: 0.3352984938281715, score: 67
Scores: (min: 9, avg

Exception ignored in: <function WeakKeyDictionary.__init__.<locals>.remove at 0x7f9be14de170>
Traceback (most recent call last):
  File "/usr/lib/python3.7/weakref.py", line 358, in remove
    def remove(k, selfref=ref(self)):
KeyboardInterrupt: 
