In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, BatchNormalization, ReLU, Dropout
from tensorflow.keras.models import Model, clone_model
from itertools import product

import functions as fs
from functions import make_state, run, get_optimal_value, get_optimal_action, get_optimal_actions, get_model_actions, test_model, test_model_accuracy, one_batch_supervised, train_supervised, create_model


从原理上讲，退火是错误做法。

In [2]:
def one_batch(model, batch_size, temperature=1, n_test_rounds=0):
    y_target_list = []
    state_list = np.array([make_state() for i in np.arange(batch_size)])
    prob_list = model(np.array(state_list)).numpy()
    actions = get_model_actions(model, state_list)
    for i, prob in enumerate(prob_list):
        y_target = np.zeros(8)
        state = state_list[i]
        prob_w = prob ** (1/temperature)
        action = np.random.choice(8, p=prob_w/sum(prob_w))
        y_target[action] = run(state, action)
        y_target_list.append(y_target)
    model.fit(state_list, np.array(y_target_list), verbose = 0)
    if n_test_rounds > 0:
        return (test_model(model, n_test_rounds))

def train(model, max_batch=200, batch_size=128, temperature = 1, temperature_decay=1, n_test_rounds=10000, verbose = 0):
    best_weights = []
    best_idx = 0
    best_score = 0
    for i in np.arange(max_batch):
        score = one_batch(model, batch_size, temperature * temperature_decay ** i, n_test_rounds)
        if best_score < score:
            best_score = score
            best_idx = i
            best_weights = model.get_weights()
        if verbose == 1:
            print(i, score)
    return best_idx, best_score, best_weights

## 试验场

In [15]:
model = create_model(n_hidden_layers=1, n_dense_units=512, ratio_dropout=0.5, optimizer='adam')

In [16]:
best_idx, best_score, best_weights = train(model, 200, temperature = 1, temperature_decay=0.99, verbose = 1)

0 0.3284540059023309
1 0.4022597860125601
2 0.45626623391189625
3 0.5206979608814543
4 0.5884860416710926
5 0.6468177393013881
6 0.682235846508349
7 0.686716036526123
8 0.7173011441218551
9 0.7427781225648772
10 0.7338219146320922
11 0.720679048890288
12 0.743526187754808
13 0.7480147719199366
14 0.7305075220655389
15 0.7193883371208767
16 0.7526526513997959
17 0.7577204666473379
18 0.7789776038705098
19 0.8000758831170246
20 0.7898571265549253
21 0.7894862465403368
22 0.8029533611084658
23 0.8224547572691916
24 0.8262668243773243
25 0.8196763331798106
26 0.832445669073789
27 0.8228846959715383
28 0.8152031095123063
29 0.7989919900971613
30 0.7811476671064066
31 0.7674382677789535
32 0.7571176007343585
33 0.7672342627939801
34 0.8099320579563648
35 0.8156895591087203
36 0.8195363252437792
37 0.8159636668697668
38 0.801079007467903
39 0.8185260018499843
40 0.7886941568321932
41 0.7639692807137606
42 0.7934787314282901
43 0.8211074455247174
44 0.810289280049785
45 0.823784638632082
46 0.

In [17]:
best_model = clone_model(model)
best_model.set_weights(best_weights)

In [18]:
test_model(best_model, 10000)

np.float64(0.8623330038084214)

In [19]:
test_model(model, 10000)

np.float64(0.6843864446513317)

In [20]:
model.weights[0][0][0:50]

<tf.Tensor: shape=(50,), dtype=float32, numpy=
array([-0.10846302, -0.09363967, -0.10256259, -0.12220611, -0.10043977,
       -0.12048358, -0.1143779 , -0.11061859, -0.0819042 , -0.07944234,
       -0.08579838, -0.11133981, -0.110672  , -0.09598734, -0.11081921,
       -0.11012349, -0.08634783, -0.10418039, -0.08456437, -0.09229805,
       -0.10732491, -0.11185248, -0.11535256, -0.10148907, -0.09869622,
       -0.11056221, -0.09000882, -0.09208713, -0.0598283 , -0.08720026,
       -0.12041785, -0.08431211, -0.09904582, -0.11579016, -0.0793704 ,
       -0.14844064, -0.08551419, -0.09759018, -0.068274  , -0.04889043,
       -0.08643989, -0.11690804, -0.11649935, -0.09219953, -0.09040482,
       -0.10047588, -0.06579048, -0.07924906, -0.0832098 , -0.11246671],
      dtype=float32)>

In [21]:
model = create_model(n_hidden_layers=1, n_dense_units=512, ratio_dropout=0.5, optimizer='adam')

In [22]:
best_idx, best_score, best_weights = train(model, 200, verbose = 1)

0 0.5384590436273902
1 0.5802635910321251
2 0.621978251992235
3 0.6276948273397991
4 0.6201411553365314
5 0.609711337373402
6 0.6275114149421601
7 0.6567337131945155
8 0.6817516983119097
9 0.6652173237828999
10 0.6713429487339408
11 0.6725951244218431
12 0.6771233117106743
13 0.7039295493171229
14 0.7086510551725765
15 0.722676334719705
16 0.7223673563920436
17 0.7266063197369507
18 0.7427527924488475
19 0.7466812674037772
20 0.7524916429584869
21 0.7619961980648793
22 0.7670574198074354
23 0.7705762953094916
24 0.7654734384561419
25 0.7465809346063639
26 0.7357507957453797
27 0.7079083583262262
28 0.7293564258579629
29 0.7385657239832059
30 0.7587864404239651
31 0.7725162060605284
32 0.7808285903241933
33 0.78389041610854
34 0.769716099446532
35 0.7588301475199908
36 0.753411480510833
37 0.7912128948844114
38 0.7812121012974997
39 0.7834453888504027
40 0.7918727382270087
41 0.7942026738951611
42 0.8090397979623203
43 0.8228972351756485
44 0.8371817517215754
45 0.8079173262752348
46 0.

In [23]:
best_model = clone_model(model)
best_model.set_weights(best_weights)

In [24]:
test_model(best_model, 10000)

np.float64(0.8896954295441154)

In [25]:
test_model(model, 10000)

np.float64(0.7700434048465293)

In [26]:
model.weights[0][0][0:50]

<tf.Tensor: shape=(50,), dtype=float32, numpy=
array([-0.13171698, -0.07805236, -0.19163772, -0.16469546, -0.13337971,
       -0.15413234, -0.18378848, -0.13037446, -0.20202072, -0.10383993,
       -0.11229815, -0.14827262, -0.12645932, -0.12167479, -0.06546519,
       -0.03052763, -0.16950527, -0.14042017, -0.03947781, -0.03243637,
       -0.14985435, -0.07508776, -0.20118535, -0.15045352, -0.1472116 ,
       -0.06761225, -0.13061437, -0.20624864, -0.0611332 , -0.09138539,
       -0.109717  , -0.1518251 , -0.13589486, -0.08704136, -0.12082373,
       -0.14995103, -0.14566788, -0.22887288, -0.01483672, -0.16935329,
       -0.05852098, -0.15049323, -0.1335745 , -0.10824125, -0.10736649,
       -0.16560148, -0.1655342 , -0.14645581, -0.15772112, -0.08011957],
      dtype=float32)>

看上去退火确实使模型表现变差了

In [44]:
np.sum(model.weights[1].numpy()>0)

np.int64(246)

In [51]:
np.sum(np.abs(model.weights[1].numpy()) > 0.05)

np.int64(1)