In [1]:
import time
import flappy_bird_gym
import gymnasium
env = flappy_bird_gym.make("FlappyBird-12-v0")

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

def build_model(obs, actions):
    model = Sequential()
    
    model.add(Dense(64, activation='relu', input_shape=(1, obs)))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(128, activation='relu'))
    
    model.add(Flatten())
    model.add(Dense(actions, activation='linear'))
    model.summary()
    return model

In [3]:
obs = env.observation_space.shape[0]
actions = env.action_space.n

In [4]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [5]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=0.1, value_min=.0001, value_test=.0, nb_steps=4000000)
    memory = SequentialMemory(limit=100000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                enable_dueling_network=True, dueling_type='avg',
                nb_actions=actions, nb_steps_warmup=500)
    return dqn

In [6]:
model = build_model(obs, actions)
dqn = build_agent(model, actions)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1, 64)             832       
                                                                 
 dense_1 (Dense)             (None, 1, 128)            8320      
                                                                 
 dense_2 (Dense)             (None, 1, 256)            33024     
                                                                 
 dense_3 (Dense)             (None, 1, 64)             16448     
                                                                 
 dense_4 (Dense)             (None, 1, 128)            8320      
                                                                 
 flatten (Flatten)           (None, 128)               0         
                                                                 
 dense_5 (Dense)             (None, 2)                 2

In [7]:
import warnings
warnings.simplefilter("ignore")

In [8]:
from tensorflow.keras.optimizers.legacy import Adam
#Training the Neural Network
dqn.compile(Adam(learning_rate=0.0001))

In [9]:
dqn.fit(env, nb_steps=8000000, visualize=False, verbose=1)

Training for 8000000 steps ...
Interval 1 (0 steps performed)
103 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 4730.556 - mean_q: -9.884 - mean_eps: 0.100 - score: 0.000

Interval 2 (10000 steps performed)
99 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 1869.106 - mean_q: -20.883 - mean_eps: 0.100 - score: 0.000

Interval 3 (20000 steps performed)
99 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 388.150 - mean_q: -29.599 - mean_eps: 0.099 - score: 0.000

Interval 4 (30000 steps performed)
100 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 100.680 - mean_q: -37.899 - mean_eps: 0.099 - score: 0.000

Interval 5 (40000 steps performed)
99 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 74.232 - mean_q: -49.085 - mean_eps: 0.099 - score: 0.000

Interval 6 (50000 steps performed)
99 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 62.137 - mean_q: -57.860 - mean_eps

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



78 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 294.673 - mean_q: -470.085 - mean_eps: 0.069 - score: 0.231

Interval 127 (1260000 steps performed)
81 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 306.240 - mean_q: -466.172 - mean_eps: 0.068 - score: 0.207

Interval 128 (1270000 steps performed)
82 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 280.941 - mean_q: -464.115 - mean_eps: 0.068 - score: 0.170

Interval 129 (1280000 steps performed)
75 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 273.791 - mean_q: -463.444 - mean_eps: 0.068 - score: 0.298

Interval 130 (1290000 steps performed)
75 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 264.585 - mean_q: -461.993 - mean_eps: 0.068 - score: 0.329

Interval 131 (1300000 steps performed)
75 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 261.792 - mean_q: -460.708 - mean_eps: 0.067 - score: 0.377

Interval 132 

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



83 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 158.554 - mean_q: -396.524 - mean_eps: 0.059 - score: 0.201

Interval 164 (1630000 steps performed)
80 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 151.463 - mean_q: -400.513 - mean_eps: 0.059 - score: 0.274

Interval 165 (1640000 steps performed)
83 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 158.737 - mean_q: -402.610 - mean_eps: 0.059 - score: 0.185

Interval 166 (1650000 steps performed)
80 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 149.376 - mean_q: -403.531 - mean_eps: 0.059 - score: 0.209

Interval 167 (1660000 steps performed)
83 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 141.368 - mean_q: -403.680 - mean_eps: 0.058 - score: 0.234

Interval 168 (1670000 steps performed)
82 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 138.695 - mean_q: -399.760 - mean_eps: 0.058 - score: 0.249

Interval 169 

77 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 242.056 - mean_q: -343.329 - mean_eps: 0.051 - score: 0.319

Interval 197 (1960000 steps performed)
64 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 222.746 - mean_q: -341.653 - mean_eps: 0.051 - score: 0.769

Interval 198 (1970000 steps performed)
62 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 210.530 - mean_q: -339.478 - mean_eps: 0.051 - score: 0.726

Interval 199 (1980000 steps performed)
73 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 211.806 - mean_q: -338.442 - mean_eps: 0.050 - score: 0.417

Interval 200 (1990000 steps performed)
72 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 205.983 - mean_q: -335.472 - mean_eps: 0.050 - score: 0.525

Interval 201 (2000000 steps performed)
65 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 217.539 - mean_q: -333.790 - mean_eps: 0.050 - score: 0.688

Interval 202 

65 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 173.870 - mean_q: -273.459 - mean_eps: 0.043 - score: 0.582

Interval 230 (2290000 steps performed)
60 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 183.085 - mean_q: -266.782 - mean_eps: 0.043 - score: 0.845

Interval 231 (2300000 steps performed)
64 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 177.881 - mean_q: -263.674 - mean_eps: 0.042 - score: 0.837

Interval 232 (2310000 steps performed)
60 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 178.729 - mean_q: -259.130 - mean_eps: 0.042 - score: 0.823

Interval 233 (2320000 steps performed)
63 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 176.167 - mean_q: -257.499 - mean_eps: 0.042 - score: 0.802

Interval 234 (2330000 steps performed)
58 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 161.626 - mean_q: -251.356 - mean_eps: 0.042 - score: 1.015

Interval 235 

56 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 174.334 - mean_q: -204.250 - mean_eps: 0.035 - score: 1.429

Interval 263 (2620000 steps performed)
59 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 173.686 - mean_q: -203.495 - mean_eps: 0.034 - score: 1.136

Interval 264 (2630000 steps performed)
54 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 160.678 - mean_q: -201.502 - mean_eps: 0.034 - score: 1.261

Interval 265 (2640000 steps performed)
55 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 162.727 - mean_q: -200.650 - mean_eps: 0.034 - score: 1.217

Interval 266 (2650000 steps performed)
54 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 151.344 - mean_q: -196.924 - mean_eps: 0.034 - score: 1.109

Interval 267 (2660000 steps performed)
48 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 141.288 - mean_q: -192.735 - mean_eps: 0.033 - score: 1.631

Interval 268 

57 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 221.905 - mean_q: -138.321 - mean_eps: 0.026 - score: 1.388

Interval 296 (2950000 steps performed)
50 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 221.887 - mean_q: -138.668 - mean_eps: 0.026 - score: 1.735

Interval 297 (2960000 steps performed)
46 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 222.671 - mean_q: -136.545 - mean_eps: 0.026 - score: 1.872

Interval 298 (2970000 steps performed)
44 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 220.174 - mean_q: -136.039 - mean_eps: 0.026 - score: 1.809

Interval 299 (2980000 steps performed)
49 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 200.907 - mean_q: -136.418 - mean_eps: 0.025 - score: 1.468

Interval 300 (2990000 steps performed)
40 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 190.672 - mean_q: -134.052 - mean_eps: 0.025 - score: 3.087

Interval 301 

39 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 270.812 - mean_q: -93.662 - mean_eps: 0.018 - score: 2.439

Interval 329 (3280000 steps performed)
38 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 275.000 - mean_q: -92.586 - mean_eps: 0.018 - score: 3.153

Interval 330 (3290000 steps performed)
31 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 256.877 - mean_q: -92.147 - mean_eps: 0.018 - score: 4.958

Interval 331 (3300000 steps performed)
45 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 280.270 - mean_q: -90.696 - mean_eps: 0.017 - score: 2.261

Interval 332 (3310000 steps performed)
39 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 291.291 - mean_q: -90.992 - mean_eps: 0.017 - score: 2.905

Interval 333 (3320000 steps performed)
36 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 266.454 - mean_q: -90.134 - mean_eps: 0.017 - score: 3.878

Interval 334 (33300

4 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 280.669 - mean_q: -48.945 - mean_eps: 0.002 - score: 36.922

Interval 395 (3940000 steps performed)
12 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 242.341 - mean_q: -48.263 - mean_eps: 0.001 - score: 14.806

Interval 396 (3950000 steps performed)
12 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 220.471 - mean_q: -47.041 - mean_eps: 0.001 - score: 22.562

Interval 397 (3960000 steps performed)
Interval 398 (3970000 steps performed)
Interval 399 (3980000 steps performed)
2 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 189.684 - mean_q: -44.541 - mean_eps: 0.000 - score: 151.053

Interval 400 (3990000 steps performed)
3 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 174.486 - mean_q: -43.509 - mean_eps: 0.000 - score: 314.339

Interval 401 (4000000 steps performed)
22 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - los

7 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 322.316 - mean_q: -36.828 - mean_eps: 0.000 - score: 45.043

Interval 429 (4280000 steps performed)
6 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 285.728 - mean_q: -35.583 - mean_eps: 0.000 - score: 28.752

Interval 430 (4290000 steps performed)
13 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 295.377 - mean_q: -35.413 - mean_eps: 0.000 - score: 23.183

Interval 431 (4300000 steps performed)
11 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 258.146 - mean_q: -38.424 - mean_eps: 0.000 - score: 22.590

Interval 432 (4310000 steps performed)
9 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 236.909 - mean_q: -38.347 - mean_eps: 0.000 - score: 34.269

Interval 433 (4320000 steps performed)
14 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 277.814 - mean_q: -37.087 - mean_eps: 0.000 - score: 12.101

Interval 434 (43

In [10]:
#Saving weights of Neural Network
#dqn.save_weights("weights/flappy_bird_solution_8million.h5")

In [11]:
dqn.load_weights("weights/flappy_bird_solution_8million.h5")

In [None]:
results = dqn.test(env, visualize=False, nb_episodes=100, verbose=0)

In [28]:
import numpy as np
print(np.mean(results.history['nb_steps']))
print(np.sqrt(np.cov(results.history['nb_steps'])))

6081.77
5665.289834259287
