In [1]:
# 環境確認
!cat /etc/issue
!echo "-----------"
# ファイルサイズ
# !df -h 
# !echo "-----------"
# Memory
# !free -h
# !echo "-----------"
# CPU

# !cat /proc/cpuinfo
# !echo "-----------"
# GPU情報(GPU mode: ONのとき)
!cat /proc/driver/nvidia/gpus/0000:00:04.0/information
!echo "-----" 
!nvcc -v
!echo "-----"
!nvidia-smi
!echo "-----"

Ubuntu 18.04.5 LTS \n \l

-----------
Model: 		 Tesla T4
IRQ:   		 11
GPU UUID: 	 GPU-53d6a56f-960f-9844-29d6-9a8df4035c77
Video BIOS: 	 90.04.96.00.01
Bus Type: 	 PCI
DMA Size: 	 47 bits
DMA Mask: 	 0x7fffffffffff
Bus Location: 	 0000:00:04.0
Device Minor: 	 0
Blacklisted:	 No
-----
nvcc fatal   : No input files specified; use option --help for more information
-----
Tue Mar  9 12:59:44 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.56       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   67C    P8    12W /  70W |    

In [2]:
# GPUアサインされてるかチェック
import tensorflow as tf
tf.test.gpu_device_name()

'/device:GPU:0'

# Paper Runner Task

- testing deep Q-learning model
- from section11: https://www.udemy.com/course/ai-master/


In [3]:
print(tf.__version__)
print(tf.keras.__version__)

2.4.1
2.4.0


In [4]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation, rc

from keras.layers import Dense, ReLU
from keras.models import Sequential
from keras.optimizers import RMSprop

optimizer = RMSprop()

class Brain:
    def __init__(self, n_state, n_mid, n_action, gamma=0.9, r=0.99):
        self.eps = 1.0 # ε
        self.gamma = gamma # 割引率
        self.r = r # 減衰率

        model = Sequential()
        model.add(Dense(n_mid, input_shape=(n_state, )))
        model.add(ReLU())
        model.add(Dense(n_mid))
        model.add(ReLU())
        model.add(Dense(n_action))
        model.compile(loss="mse", optimizer=optimizer)
        self.model = model

    def train(self, states, next_states, action, reward, terminal):
        q = self.model.predict(states)
        next_q = self.model.predict(next_states)
        t = np.copy(q)
        if terminal: # エピソード終了時
            t[:, action] = reward
        else:
            t[:, action] = reward + self.gamma * np.max(next_q, axis=1)
        
        self.model.train_on_batch(states, t)

    def get_action(self, states):
        q = self.model.predict(states)
        if np.random.rand() < self.eps:
            action = np.random.randint(q.shape[1], size=q.shape[0])
        else:
            action = np.argmax(q, axis=1)
        if self.eps > 0.1:
            self.eps += self.r
        return action

class Agent:
    def __init__(self, v_x, v_y_sigma, v_jump, brain):
        self.v_x = v_x
        self.v_y_sigma = v_y_sigma
        self.v_jump = v_jump
        self.brain = brain
        self.reset()

    def reset(self): #初期配置
        self.x = -1 
        self.y = 0
        self.v_y = self.v_y_sigma * np.random.randn()

    def step(self, g):
        states = np.array([[self.y, self.v_y]])
        self.x += self.v_x
        self.y += self.v_y

        reward = 0 #報酬
        terminal = False #終了判定

        # 報酬の設定
        if self.x > 1.0:
            reward = 1
            terminal = True
        elif self.y < -1.0 or self.y > 1.0:
            reward = -1
            terminal = True
        reward = np.array([reward])

        # 行動の決定
        action = self.brain.get_action(states)
        if action[0] == 0:
            self.v_y -= g #自由落下
        else:
            self.v_y = self.v_jump #ジャンプ
        
        next_states = np.array([[self.y, self.v_y]])
        brain.train(states, next_states, action, reward, terminal)

        # 終了判定したら初期配置にもどる
        if terminal:
            self.reset()
    

class Environment: 
    def __init__(self, agent, g):
        self.agent = agent
        self.g = g
    def step(self):
        self.agent.step(self.g)
        return (self.agent.x, self.agent.y)

In [15]:
# 動画保存するためにGoogleDriveへマウント
from google.colab import drive
drive.mount('/content/drive')

def animate(environment, interval, frames):
    fig, ax = plt.subplots()
    plt.close()
    ax.set_xlim(( -1, 1))
    ax.set_ylim((-1, 1))
    sc = ax.scatter([], []) #散布図で位置を描画

    def plot(data):
        x, y = environment.step()
        sc.set_offsets(np.array([[x, y]]))
        return (sc,)

    return animation.FuncAnimation(fig, plot, interval=interval, frames=frames, blit=True)

Mounted at /content/drive


In [24]:
# ケース：ランダム行動

n_state = 2
n_mid = 32
n_action = 2
brain = Brain(n_state, n_mid, n_action, r=1.0)  # εの減衰なし

v_x = 0.05
v_y_sigma = 0.1
v_jump = 0.2
agent = Agent(v_x, v_y_sigma, v_jump, brain)

g = 0.2
environment = Environment(agent, g)

anim = animate(environment, 50, 1024)
rc('animation', html='jshtml')

anim.save('PaperRunner_Random.mp4', writer="ffmpeg")

#描画完了するまでそこそこかかるんでコーヒータイム
# anim 

In [25]:
# ケース：Q学習あり

n_state = 2
n_mid = 32
n_action = 2
brain = Brain(n_state, n_mid, n_action, r=0.99)  # εの減衰あり

v_x = 0.05
v_y_sigma = 0.1
v_jump = 0.2
agent = Agent(v_x, v_y_sigma, v_jump, brain)

g = 0.2
environment = Environment(agent, g)

anim = animate(environment, 50, 1024)
rc('animation', html='jshtml')

anim.save('PaperRunner_DQN.mp4', writer="ffmpeg")

#描画完了するまでそこそこかかるんでコーヒータイム
# anim 

In [23]:
# Driveに動画コピー
!pwd
!cp PaperRunner_Random.mp4 ./drive/MyDrive/PrjFX/Colab/
!cp PaperRunner_DQN.mp4 ./drive/MyDrive/PrjFX/Colab/

/content


In [None]:
# 安定した学習のためのテクニック
# experience replay
# fixed target q-network

In [None]:
# ケース：SARSA
# 参考: Q学習との違い https://qiita.com/triwave33/items/cae48e492769852aa9f1