In [1]:
from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import Adam
from keras.utils import to_categorical
import keras.backend as K
import numpy as np

Using TensorFlow backend.


In [2]:
class Agent(object):
    def __init__(self):
        # 初始的折扣率
        self.gamma = 0.99
        self.input_dims = 8
        self.n_actions = 4
        # 定义状态、动作、奖励的存储空间
        self.state_memory = []
        self.action_memory = []
        self.reward_memory = []
        # 定义动作空间
        self.action_space = [0, 1, 2, 3]
        self.policy, self.model = self.build_policy_network()
    # 构建策略模型
    def build_policy_network(self):
        # 接收状态
        inputs = Input(shape=(self.input_dims,))
        # 接收G值
        G = Input(shape=[1])
        # 两个中间层
        dense1 = Dense(units=128,
                       activation='relu')(inputs)
        dense2 = Dense(units=128, 
                       activation='relu')(dense1)
        # 模型的输出
        outputs = Dense(units=self.n_actions,
                        activation='softmax')(dense2)
        # 自定义损失函数
        def custom_loss(y_true, y_pred):
            y_pred = K.clip(y_pred, 1e-8, 1-1e-8)
            log_lik = y_true * K.log(y_pred)
            loss = K.sum(-log_lik * G)
            return loss
        # 构建策略模型
        policy = Model(inputs, outputs)
        # 构建用于对策略模型进行训练的模型
        model = Model([inputs, G], outputs)
        model.compile(optimizer=Adam(lr=0.001), 
                      loss=custom_loss,
                      metrics=None)
        return policy, model
    def choose_action(self, state):
        state = state[np.newaxis, :]
        # 预测当前状态下采取每一个动作的概率
        probabilities = self.policy.predict(state)[0]
        # 依据概率选择一个动作
        action = np.random.choice(self.action_space, p=probabilities)
        return action
    # 将状态、动作、奖励进行存储
    def store_transition(self, state, action, reward):
        self.action_memory.append(action)
        self.state_memory.append(state)
        self.reward_memory.append(reward)
    # 对策略模型进行训练
    def learn(self):
        state_memory = np.array(self.state_memory)
        action_memory = np.array(self.action_memory)
        reward_memory = np.array(self.reward_memory)
        # 将动作进行独热编码处理
        actions = to_categorical(action_memory, num_classes=self.n_actions)
        # 计算G值
        G = np.zeros_like(reward_memory)
        for t in range(len(reward_memory)):
            G_sum = 0
            discount = 1
            for k in range(t, len(reward_memory)):
                # 将奖励值乘以折扣率
                G_sum += reward_memory[k] * discount
                # 对未来奖励增加折扣的力度
                discount *= self.gamma
            G[t] = G_sum
        # 将G值标准化
        mean = np.mean(G)
        std = np.std(G)
        G = (G - mean) / std
        # 对模型进行训练
        self.model.train_on_batch([state_memory, G], actions)
        # 清空状态、动作、奖励的存储空间
        self.state_memory = []
        self.action_memory = []
        self.reward_memory = []

In [3]:
import gym
agent = Agent()
env = gym.make('LunarLander-v2')
n_episodes = 2000
for i in range(n_episodes):
    done = False
    total_reward = 0
    state = env.reset()
    while not done:
        action = agent.choose_action(state)
        next_state, reward, done, _ = env.step(action)
        total_reward += reward
        # 保存当前回合交互过程中的状态、动作与奖励
        agent.store_transition(state, action, reward)
        state = next_state
    # 在每次回合结束后，对策略模型进行训练
    agent.learn()
    print(f'Episode {i}/{n_episodes} ---> Total Reward: {total_reward}')

Instructions for updating:
Colocations handled automatically by placer.
Episode 0/2000 ---> Total Reward: -286.40589020889695
Episode 1/2000 ---> Total Reward: -216.440491378656
Episode 2/2000 ---> Total Reward: -197.82962700686818
Episode 3/2000 ---> Total Reward: -62.31394943067728
Episode 4/2000 ---> Total Reward: -144.1243506758791
Episode 5/2000 ---> Total Reward: -133.9219626200108
Episode 6/2000 ---> Total Reward: -368.25148820233005
Episode 7/2000 ---> Total Reward: -99.34901055486675
Episode 8/2000 ---> Total Reward: -120.96060436908544
Episode 9/2000 ---> Total Reward: -366.66280200070787
Episode 10/2000 ---> Total Reward: -118.4281931485862
Episode 11/2000 ---> Total Reward: -87.9258258452069
Episode 12/2000 ---> Total Reward: -104.46641126172094
Episode 13/2000 ---> Total Reward: -84.10779191324231
Episode 14/2000 ---> Total Reward: -100.78078206004803
Episode 15/2000 ---> Total Reward: -136.77297842258315
Episode 16/2000 ---> Total Reward: -56.76962048743201
Episode 17/200

Episode 146/2000 ---> Total Reward: -216.8501557297945
Episode 147/2000 ---> Total Reward: -122.17415285006064
Episode 148/2000 ---> Total Reward: -49.25538537891482
Episode 149/2000 ---> Total Reward: -67.04921787911928
Episode 150/2000 ---> Total Reward: -157.43595483199792
Episode 151/2000 ---> Total Reward: -243.13294118672718
Episode 152/2000 ---> Total Reward: -117.6588042382218
Episode 153/2000 ---> Total Reward: -84.67523315535053
Episode 154/2000 ---> Total Reward: -51.842411326202615
Episode 155/2000 ---> Total Reward: -93.71212135783334
Episode 156/2000 ---> Total Reward: -269.0930111266946
Episode 157/2000 ---> Total Reward: -189.38562131909703
Episode 158/2000 ---> Total Reward: -26.383669329608693
Episode 159/2000 ---> Total Reward: -89.78343062041802
Episode 160/2000 ---> Total Reward: -123.96585765342331
Episode 161/2000 ---> Total Reward: -126.77271758256202
Episode 162/2000 ---> Total Reward: -85.80883473117674
Episode 163/2000 ---> Total Reward: -141.61576387540083
E

Episode 294/2000 ---> Total Reward: -29.945461444374843
Episode 295/2000 ---> Total Reward: -24.278156576993965
Episode 296/2000 ---> Total Reward: -62.43221634375934
Episode 297/2000 ---> Total Reward: -80.00394494891341
Episode 298/2000 ---> Total Reward: -58.08552725829753
Episode 299/2000 ---> Total Reward: -62.80218685669515
Episode 300/2000 ---> Total Reward: -19.131890819963516
Episode 301/2000 ---> Total Reward: -91.72900459277551
Episode 302/2000 ---> Total Reward: -7.052467280293413
Episode 303/2000 ---> Total Reward: -21.655496377562045
Episode 304/2000 ---> Total Reward: 12.399636408352308
Episode 305/2000 ---> Total Reward: -28.399419646238
Episode 306/2000 ---> Total Reward: -54.043899401123824
Episode 307/2000 ---> Total Reward: -194.97894880965202
Episode 308/2000 ---> Total Reward: -129.87492122731197
Episode 309/2000 ---> Total Reward: 32.19798703204589
Episode 310/2000 ---> Total Reward: -18.184283702711298
Episode 311/2000 ---> Total Reward: -65.52282024392525
Episo

Episode 443/2000 ---> Total Reward: 66.96772883617912
Episode 444/2000 ---> Total Reward: 23.85097142968074
Episode 445/2000 ---> Total Reward: 76.25370545017776
Episode 446/2000 ---> Total Reward: 29.3922819905016
Episode 447/2000 ---> Total Reward: 33.19299263820748
Episode 448/2000 ---> Total Reward: 28.7894193148081
Episode 449/2000 ---> Total Reward: -12.598698099735003
Episode 450/2000 ---> Total Reward: 36.73579865714501
Episode 451/2000 ---> Total Reward: 38.16059545317178
Episode 452/2000 ---> Total Reward: 54.56789499140142
Episode 453/2000 ---> Total Reward: 4.4943476237710485
Episode 454/2000 ---> Total Reward: 57.38097399327073
Episode 455/2000 ---> Total Reward: -257.3531593499099
Episode 456/2000 ---> Total Reward: 14.241927721458026
Episode 457/2000 ---> Total Reward: 40.271002021932645
Episode 458/2000 ---> Total Reward: 67.76589705943215
Episode 459/2000 ---> Total Reward: -282.59441694476857
Episode 460/2000 ---> Total Reward: 57.97598261511253
Episode 461/2000 ---> 

Episode 593/2000 ---> Total Reward: 24.03021002338778
Episode 594/2000 ---> Total Reward: -73.08733417124262
Episode 595/2000 ---> Total Reward: 230.87382320539575
Episode 596/2000 ---> Total Reward: -75.49971901828908
Episode 597/2000 ---> Total Reward: -0.15451013157765203
Episode 598/2000 ---> Total Reward: 251.16121114067457
Episode 599/2000 ---> Total Reward: -44.116949937832416
Episode 600/2000 ---> Total Reward: 35.59615050756028
Episode 601/2000 ---> Total Reward: -67.46593771969387
Episode 602/2000 ---> Total Reward: 225.0199139217135
Episode 603/2000 ---> Total Reward: -252.8433827402185
Episode 604/2000 ---> Total Reward: 4.4081717586796
Episode 605/2000 ---> Total Reward: 234.81655749149996
Episode 606/2000 ---> Total Reward: 208.53020549960019
Episode 607/2000 ---> Total Reward: -92.76510553541051
Episode 608/2000 ---> Total Reward: -253.54522754693318
Episode 609/2000 ---> Total Reward: -275.5151867945631
Episode 610/2000 ---> Total Reward: -78.41484524557802
Episode 611/

Episode 744/2000 ---> Total Reward: -13.805191472699917
Episode 745/2000 ---> Total Reward: 22.67009045155831
Episode 746/2000 ---> Total Reward: -11.336476230496373
Episode 747/2000 ---> Total Reward: 251.2592327284077
Episode 748/2000 ---> Total Reward: -10.557877851690037
Episode 749/2000 ---> Total Reward: -27.894010245828113
Episode 750/2000 ---> Total Reward: 249.70205047626712
Episode 751/2000 ---> Total Reward: -35.5370302452952
Episode 752/2000 ---> Total Reward: -87.46304970246149
Episode 753/2000 ---> Total Reward: -23.654214065138532
Episode 754/2000 ---> Total Reward: -2.5895402545844775
Episode 755/2000 ---> Total Reward: -19.32077875798896
Episode 756/2000 ---> Total Reward: 244.88263486334833
Episode 757/2000 ---> Total Reward: 5.690501570152165
Episode 758/2000 ---> Total Reward: 8.785739261283979
Episode 759/2000 ---> Total Reward: 15.383844442482541
Episode 760/2000 ---> Total Reward: 33.96403526062073
Episode 761/2000 ---> Total Reward: 54.09865144409005
Episode 762

Episode 894/2000 ---> Total Reward: -82.24057916080002
Episode 895/2000 ---> Total Reward: -66.12249797079615
Episode 896/2000 ---> Total Reward: 279.5165883181391
Episode 897/2000 ---> Total Reward: 249.00447121777628
Episode 898/2000 ---> Total Reward: -77.15776569429251
Episode 899/2000 ---> Total Reward: 251.51503704010582
Episode 900/2000 ---> Total Reward: -11.3457078218851
Episode 901/2000 ---> Total Reward: -109.45719500542816
Episode 902/2000 ---> Total Reward: -35.60727445659208
Episode 903/2000 ---> Total Reward: 206.84805685763047
Episode 904/2000 ---> Total Reward: 254.22713480285265
Episode 905/2000 ---> Total Reward: -98.75801423943867
Episode 906/2000 ---> Total Reward: 182.03106338068784
Episode 907/2000 ---> Total Reward: 266.7755196513466
Episode 908/2000 ---> Total Reward: -21.53344605550589
Episode 909/2000 ---> Total Reward: -47.651949848718075
Episode 910/2000 ---> Total Reward: -169.67745025187278
Episode 911/2000 ---> Total Reward: 265.2907138192145
Episode 912

Episode 1043/2000 ---> Total Reward: 271.9488967285429
Episode 1044/2000 ---> Total Reward: 247.60558508416705
Episode 1045/2000 ---> Total Reward: 228.3416048101135
Episode 1046/2000 ---> Total Reward: 170.80820001548102
Episode 1047/2000 ---> Total Reward: 233.2747072702962
Episode 1048/2000 ---> Total Reward: -15.247182920140062
Episode 1049/2000 ---> Total Reward: 248.57302254403515
Episode 1050/2000 ---> Total Reward: -134.92958206135995
Episode 1051/2000 ---> Total Reward: 209.39054334382743
Episode 1052/2000 ---> Total Reward: -32.10123285354557
Episode 1053/2000 ---> Total Reward: 265.9272596682626
Episode 1054/2000 ---> Total Reward: 25.933073156978537
Episode 1055/2000 ---> Total Reward: 253.02739749137908
Episode 1056/2000 ---> Total Reward: 253.7716930802379
Episode 1057/2000 ---> Total Reward: 242.07812126227566
Episode 1058/2000 ---> Total Reward: 216.59317577026835
Episode 1059/2000 ---> Total Reward: 239.21664649549595
Episode 1060/2000 ---> Total Reward: -116.198188857

Episode 1191/2000 ---> Total Reward: 31.89828880178331
Episode 1192/2000 ---> Total Reward: 46.61433417034007
Episode 1193/2000 ---> Total Reward: 41.6837197638651
Episode 1194/2000 ---> Total Reward: 45.35419819975888
Episode 1195/2000 ---> Total Reward: 278.4533589083019
Episode 1196/2000 ---> Total Reward: 1.7852352827837876
Episode 1197/2000 ---> Total Reward: 287.56980465061855
Episode 1198/2000 ---> Total Reward: 9.48745735890374
Episode 1199/2000 ---> Total Reward: 266.01700978328375
Episode 1200/2000 ---> Total Reward: 33.337036666324366
Episode 1201/2000 ---> Total Reward: 238.06252847467505
Episode 1202/2000 ---> Total Reward: 18.525422559821962
Episode 1203/2000 ---> Total Reward: 288.4851134459971
Episode 1204/2000 ---> Total Reward: 34.70863188728248
Episode 1205/2000 ---> Total Reward: -4.167931509355256
Episode 1206/2000 ---> Total Reward: 255.55609028580957
Episode 1207/2000 ---> Total Reward: 227.19159076225964
Episode 1208/2000 ---> Total Reward: 0.5232893577501017
Ep

Episode 1338/2000 ---> Total Reward: -27.2080430416125
Episode 1339/2000 ---> Total Reward: 57.920615816554175
Episode 1340/2000 ---> Total Reward: -39.029620579747345
Episode 1341/2000 ---> Total Reward: -5.141652567608816
Episode 1342/2000 ---> Total Reward: 16.69156398920694
Episode 1343/2000 ---> Total Reward: 228.4727385969114
Episode 1344/2000 ---> Total Reward: 23.97129042182246
Episode 1345/2000 ---> Total Reward: -47.74249174026269
Episode 1346/2000 ---> Total Reward: 214.01534688812603
Episode 1347/2000 ---> Total Reward: 293.5319089708236
Episode 1348/2000 ---> Total Reward: 12.820385863786498
Episode 1349/2000 ---> Total Reward: 242.73998658984948
Episode 1350/2000 ---> Total Reward: 240.247706235784
Episode 1351/2000 ---> Total Reward: 37.527793750858166
Episode 1352/2000 ---> Total Reward: 226.62814669108144
Episode 1353/2000 ---> Total Reward: 228.21657964373549
Episode 1354/2000 ---> Total Reward: 64.14966247840488
Episode 1355/2000 ---> Total Reward: -46.01489525752481

Episode 1486/2000 ---> Total Reward: 250.93847088419477
Episode 1487/2000 ---> Total Reward: 279.91569231533526
Episode 1488/2000 ---> Total Reward: 267.68991545158616
Episode 1489/2000 ---> Total Reward: 250.56200449134852
Episode 1490/2000 ---> Total Reward: 269.2373108670964
Episode 1491/2000 ---> Total Reward: 233.42008264512447
Episode 1492/2000 ---> Total Reward: 275.36977952340135
Episode 1493/2000 ---> Total Reward: 287.04707120899815
Episode 1494/2000 ---> Total Reward: 235.05072223201773
Episode 1495/2000 ---> Total Reward: 241.8762077502154
Episode 1496/2000 ---> Total Reward: 227.72464024817594
Episode 1497/2000 ---> Total Reward: 273.2192364402465
Episode 1498/2000 ---> Total Reward: 257.1012306039183
Episode 1499/2000 ---> Total Reward: 7.802363731917509
Episode 1500/2000 ---> Total Reward: 252.77786524009457
Episode 1501/2000 ---> Total Reward: 40.39438450147799
Episode 1502/2000 ---> Total Reward: 247.61756671440548
Episode 1503/2000 ---> Total Reward: -61.6893399940330

Episode 1634/2000 ---> Total Reward: 193.67790922106144
Episode 1635/2000 ---> Total Reward: -119.7395297737094
Episode 1636/2000 ---> Total Reward: 22.401705542000485
Episode 1637/2000 ---> Total Reward: 250.34045575204888
Episode 1638/2000 ---> Total Reward: -5.989640116427054
Episode 1639/2000 ---> Total Reward: 255.40582189548005
Episode 1640/2000 ---> Total Reward: 232.29637853042288
Episode 1641/2000 ---> Total Reward: -11.23679116589257
Episode 1642/2000 ---> Total Reward: 23.272417554858976
Episode 1643/2000 ---> Total Reward: 70.65615167282289
Episode 1644/2000 ---> Total Reward: 38.426941646096935
Episode 1645/2000 ---> Total Reward: 183.60618476811476
Episode 1646/2000 ---> Total Reward: 40.0995489620976
Episode 1647/2000 ---> Total Reward: 245.55691721394973
Episode 1648/2000 ---> Total Reward: 257.53049880610354
Episode 1649/2000 ---> Total Reward: 204.61986238912846
Episode 1650/2000 ---> Total Reward: 273.0587151716667
Episode 1651/2000 ---> Total Reward: 52.937520800429

Episode 1782/2000 ---> Total Reward: 207.61626584862324
Episode 1783/2000 ---> Total Reward: 83.25710001203063
Episode 1784/2000 ---> Total Reward: 287.7806066806389
Episode 1785/2000 ---> Total Reward: -126.5388869005344
Episode 1786/2000 ---> Total Reward: 257.3793937726649
Episode 1787/2000 ---> Total Reward: 42.41119203459425
Episode 1788/2000 ---> Total Reward: -5.397663244838512
Episode 1789/2000 ---> Total Reward: 292.2734833707624
Episode 1790/2000 ---> Total Reward: 227.57329081675272
Episode 1791/2000 ---> Total Reward: -45.739504394516004
Episode 1792/2000 ---> Total Reward: 14.712861846680411
Episode 1793/2000 ---> Total Reward: 261.4904082257999
Episode 1794/2000 ---> Total Reward: 283.14853471124707
Episode 1795/2000 ---> Total Reward: 41.235437984585474
Episode 1796/2000 ---> Total Reward: -6.654089867002085
Episode 1797/2000 ---> Total Reward: 253.20826826170665
Episode 1798/2000 ---> Total Reward: 205.59237643451843
Episode 1799/2000 ---> Total Reward: 19.7691089684709

Episode 1930/2000 ---> Total Reward: 254.52731649827388
Episode 1931/2000 ---> Total Reward: 257.1591431888403
Episode 1932/2000 ---> Total Reward: 16.462671319733417
Episode 1933/2000 ---> Total Reward: -4.423559066423252
Episode 1934/2000 ---> Total Reward: 58.9795520523044
Episode 1935/2000 ---> Total Reward: 282.15973166746136
Episode 1936/2000 ---> Total Reward: 245.36220277081867
Episode 1937/2000 ---> Total Reward: 250.79302125568537
Episode 1938/2000 ---> Total Reward: 23.907794400975888
Episode 1939/2000 ---> Total Reward: 21.156293659720447
Episode 1940/2000 ---> Total Reward: 0.47254974268658145
Episode 1941/2000 ---> Total Reward: 270.7565369740322
Episode 1942/2000 ---> Total Reward: 270.8766408268464
Episode 1943/2000 ---> Total Reward: 251.27740599735625
Episode 1944/2000 ---> Total Reward: 27.562464099921257
Episode 1945/2000 ---> Total Reward: -34.933811321673645
Episode 1946/2000 ---> Total Reward: -37.53441368372219
Episode 1947/2000 ---> Total Reward: 29.62797513837