In [12]:
# 載入相關套件
import gym
from gym import envs

## 隨機行動

In [13]:
# 參數設定
no = 50        # 比賽回合數

# 載入 木棒台車(CartPole) 遊戲
env = gym.make("CartPole-v0")

# 重置
observation = env.reset()
all_rewards=[] # 每回合總報酬
total_rewards = 0

while no > 0:   # 執行 50 比賽回合數
    # 隨機行動
    action = env.action_space.sample() 
    # 觸動下一步
    observation, reward, done, info = env.step(action)
    # 累計報酬
    total_rewards += reward

    # 比賽回合結束，重置
    if done:
        observation = env.reset()
        all_rewards.append(total_rewards)
        total_rewards = 0
        no-=1

env.close()

In [15]:
# 顯示執行結果
print('回合\t報酬\t結果')
for i, (rewards, steps) in enumerate(zip(all_rewards, all_steps)):
    result = 'Win' if steps >= 200 else 'Loss'
    print(f'{i}\t{rewards}\t{result}')

回合	報酬	結果
0	56.0	Loss
1	12.0	Loss
2	37.0	Loss
3	23.0	Loss
4	18.0	Loss
5	23.0	Loss
6	16.0	Loss
7	12.0	Loss
8	16.0	Loss
9	24.0	Loss
10	11.0	Loss
11	58.0	Loss
12	25.0	Loss
13	15.0	Loss
14	12.0	Loss
15	25.0	Loss
16	19.0	Loss
17	36.0	Loss
18	12.0	Loss
19	10.0	Loss
20	13.0	Loss
21	19.0	Loss
22	31.0	Loss
23	49.0	Loss
24	17.0	Loss
25	13.0	Loss
26	15.0	Loss
27	23.0	Loss
28	24.0	Loss
29	77.0	Loss
30	35.0	Loss
31	23.0	Loss
32	17.0	Loss
33	31.0	Loss
34	57.0	Loss
35	26.0	Loss
36	9.0	Loss
37	29.0	Loss
38	35.0	Loss
39	30.0	Loss
40	12.0	Loss
41	18.0	Loss
42	31.0	Loss
43	11.0	Loss
44	19.0	Loss
45	9.0	Loss
46	21.0	Loss
47	20.0	Loss
48	35.0	Loss
49	13.0	Loss


## 傳統解法

In [None]:
import math 

# 參數設定
left, right = 0, 1  # 台車行進方向
max_angle = 8       # 偏右8度以上，就往右前進，偏左也是同樣處理

In [28]:
class Agent:
    # 初始化
    def __init__(self):
        self.direction = left
        self.last_direction=right
        
    # 自訂策略
    def act(self, observation):
        # 台車位置、台車速度、平衡桿角度、平衡桿速度
        cart_position, cart_velocity, pole_angle, pole_velocity = observation
        
        '''
        行動策略：
        1. 設定每次行動採一左一右，盡量不離中心點。
        2. 平衡桿角度偏右8度以上，就往右前進，直到角度偏右小於8度。
        3. 反之，偏左也是同樣處理。
        '''
        if pole_angle < math.radians(max_angle) and \
            pole_angle > math.radians(-max_angle):
            self.direction = (self.last_direction + 1) % 2
        elif pole_angle >= math.radians(max_angle):
            self.direction = right
        else:
            self.direction = left

        self.last_direction = self.direction
        
        return self.direction  

In [29]:
no = 50        # 比賽回合數

# 載入 木棒台車(CartPole) 遊戲
env = gym.make("CartPole-v0")

# 重置
observation = env.reset()
all_rewards=[] # 每回合總報酬
total_rewards = 0
agent = Agent()
while no > 0:   # 執行 50 比賽回合數
    # 行動
    action = agent.act(observation) #env.action_space.sample()
    # 觸動下一步
    observation, reward, done, info = env.step(action)
    # 累計報酬
    total_rewards += reward

    # 比賽回合結束，重置
    if done:
        observation = env.reset()
        all_rewards.append(total_rewards)
        total_rewards = 0
        no-=1

env.close()

In [30]:
# 顯示執行結果
print('回合\t報酬\t結果')
for i, (rewards, steps) in enumerate(zip(all_rewards, all_steps)):
    result = 'Win' if steps >= 200 else 'Loss'
    print(f'{i}\t{rewards}\t{result}')

回合	報酬	結果
0	97.0	Loss
1	71.0	Loss
2	112.0	Loss
3	129.0	Loss
4	96.0	Loss
5	78.0	Loss
6	116.0	Loss
7	82.0	Loss
8	84.0	Loss
9	105.0	Loss
10	80.0	Loss
11	62.0	Loss
12	71.0	Loss
13	145.0	Loss
14	72.0	Loss
15	135.0	Loss
16	127.0	Loss
17	78.0	Loss
18	100.0	Loss
19	87.0	Loss
20	93.0	Loss
21	55.0	Loss
22	95.0	Loss
23	66.0	Loss
24	156.0	Loss
25	79.0	Loss
26	48.0	Loss
27	101.0	Loss
28	77.0	Loss
29	71.0	Loss
30	107.0	Loss
31	128.0	Loss
32	94.0	Loss
33	126.0	Loss
34	56.0	Loss
35	78.0	Loss
36	68.0	Loss
37	43.0	Loss
38	69.0	Loss
39	158.0	Loss
40	77.0	Loss
41	154.0	Loss
42	128.0	Loss
43	51.0	Loss
44	148.0	Loss
45	115.0	Loss
46	93.0	Loss
47	114.0	Loss
48	74.0	Loss
49	155.0	Loss
