# Super intelligence

## CartPole

### インポート

In [3]:
import gym
import numpy as np
import pandas as pd
np.random.seed(100)

### CartPole環境設定

In [4]:
env = gym.make('CartPole-v0')
env.seed(100)

[100]

### 行動空間のサイズ表示

In [5]:
action_size = env.action_space.n
action_size

2

### 行動空間からランダムにサンプル抽出

In [6]:
[env.action_space.sample() for _ in range(10)]

[1, 0, 0, 1, 1, 1, 1, 1, 0, 1]

### 状態空間のサイズ表示

In [7]:
state_size = env.observation_space.shape[0]
state_size

4

### 環境の初期化と表示

In [8]:
state = env.reset()
state

array([ 0.03349816,  0.0096554 , -0.02111368, -0.04570484], dtype=float32)

### ランダムな行動を取り状態空間を次の状態に進める

In [9]:
state, reward, done, _ = env.step(env.action_space.sample())
state, reward, done, _

(array([ 0.03369127, -0.18515752, -0.02202777,  0.24024247], dtype=float32),
 1.0,
 False,
 {})

### 学習用データセット収集

In [14]:
%%time
data = pd.DataFrame()
state = env.reset()
length = []

for run in range(25000):
    done = False
    prev_state = env.reset()
    treward = 1
    results = []
    while not done:
        action = env.action_space.sample()
        state, reward, done, _ = env.step(action)
        results.append({'s1': prev_state[0],
                        's2': prev_state[1],
                        's3': prev_state[2],
                        's4': prev_state[3],
                        'a': action,
                        'r': reward})
        treward += reward if not done else 0
        prev_state = state
    if treward >= 110:
        data = data.append(pd.DataFrame(results))
        length.append(treward)

CPU times: user 13.1 s, sys: 0 ns, total: 13.1 s
Wall time: 13 s


### lengthの平均

In [16]:
np.array(length).mean()

119.0

### dataの情報

In [17]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 595 entries, 0 to 109
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   s1      595 non-null    float64
 1   s2      595 non-null    float64
 2   s3      595 non-null    float64
 3   s4      595 non-null    float64
 4   a       595 non-null    int64  
 5   r       595 non-null    float64
dtypes: float64(5), int64(1)
memory usage: 48.7 KB


### data の末尾５行

In [18]:
data.tail()

Unnamed: 0,s1,s2,s3,s4,a,r
105,0.499517,0.13251,0.092659,1.081809,0,1.0
106,0.502167,-0.063704,0.114295,1.402071,1,1.0
107,0.500893,0.129827,0.142336,1.147197,0,1.0
108,0.503489,-0.066837,0.16528,1.480916,1,1.0
109,0.502153,0.125928,0.194898,1.244081,0,1.0


### Neural-Network の学習

### 描画設定

In [19]:
from pylab import plt
plt.style.use('seaborn')
%matplotlib inline

### tensorflow インポート

In [20]:
import tensorflow as tf
from tensorflow import keras
tf.random.set_seed(100)
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()
from keras.layers import Dense
from keras.models import Sequential

### model 定義

In [21]:
model = Sequential()
model.add(Dense(64,
                activation='relu',
                input_dim=env.observation_space.shape[0]))
model.add(Dense(1,
                activation='sigmoid',))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['acc'])

### model 学習

In [22]:
%%time
model.fit(data[['s1', 's2', 's3', 's4']],
          data['a'],
          epochs=25,
          verbose=False,
          validation_split=0.2)

2022-05-10 14:38:21.199498: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1036] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-05-10 14:38:21.339305: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1036] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-05-10 14:38:21.339761: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1036] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-05-10 14:38:21.366295: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1036] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-05-10 14:38:21.366966: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1036] could n

CPU times: user 5.03 s, sys: 2.04 s, total: 7.07 s
Wall time: 7.48 s


<keras.callbacks.History at 0x7f70f86bc4f0>

### トレーニング中の metrics の表示

In [23]:
res = pd.DataFrame(model.history.history)
res.tail(3)

Unnamed: 0,loss,acc,val_loss,val_acc
22,0.645436,0.634454,0.664541,0.613445
23,0.645346,0.634454,0.66634,0.613445
24,0.645015,0.634454,0.667169,0.605042


### 学習した Neural-Network をエージェントとして Cart-Pole をプレイさせる

In [28]:
def epoch():
    print('|', end='')
    done = False
    state = env.reset()
    treward = 1
    while not done:
        action = np.where(model.predict(np.atleast_2d(state))[0][0] > 0.5,
                          1,
                          0)
        state, reward, done, _ = env.step(action)
        treward += reward if not done else 0
    return treward

In [29]:
epoch()

|

200.0

In [30]:
%%time
res = np.array([epoch() for _ in range(100)])
print()
res

||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CPU times: user 36.2 s, sys: 11.6 s, total: 47.9 s
Wall time: 43.3 s


array([200., 200., 200., 200., 200., 200., 200., 200., 200., 200., 200.,
       200., 200., 200., 200., 200., 200., 200., 200., 200., 200., 200.,
       200., 200., 200., 200., 200., 200., 200., 200., 200., 200., 200.,
       200., 200., 200., 200., 200., 200., 200., 200., 200., 200., 200.,
       200., 200., 200., 200., 200., 200., 200., 200., 200., 200., 200.,
       200., 200., 200., 200., 200., 200., 200., 200., 200., 200., 200.,
       200., 200., 200., 200., 200., 200., 200., 200., 200., 200., 200.,
       200., 200., 200., 200., 200., 200., 200., 200., 200., 200., 200.,
       200., 200., 200., 200., 200., 200., 200., 200., 200., 200., 200.,
       200.])

In [31]:
res.mean()

200.0