In [1]:
import time
import flappy_bird_gym
import gymnasium
env = flappy_bird_gym.make("FlappyBird-v0")

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

def build_model(obs, actions):
    model = Sequential()
    
    model.add(Dense(64, activation='relu', input_shape=(1, obs)))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(128, activation='relu'))
    
    model.add(Flatten())
    model.add(Dense(actions, activation='linear'))
    model.summary()
    return model

In [3]:
obs = env.observation_space.shape[0]
actions = env.action_space.n

In [4]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [5]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=0.1, value_min=.0001, value_test=.0, nb_steps=1000000)
    memory = SequentialMemory(limit=100000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                enable_dueling_network=True, dueling_type='avg',
                nb_actions=actions, nb_steps_warmup=500)
    return dqn

In [6]:
model = build_model(obs, actions)
dqn = build_agent(model, actions)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1, 64)             192       
                                                                 
 dense_1 (Dense)             (None, 1, 128)            8320      
                                                                 
 dense_2 (Dense)             (None, 1, 256)            33024     
                                                                 
 dense_3 (Dense)             (None, 1, 64)             16448     
                                                                 
 dense_4 (Dense)             (None, 1, 128)            8320      
                                                                 
 flatten (Flatten)           (None, 128)               0         
                                                                 
 dense_5 (Dense)             (None, 2)                 2

In [7]:
import warnings
warnings.simplefilter("ignore")

In [8]:
from tensorflow.keras.optimizers.legacy import Adam
#Training the Neural Network
dqn.compile(Adam(learning_rate=0.00025))

In [9]:
dqn.fit(env, nb_steps=1000000, visualize=False, verbose=1)

Training for 1000000 steps ...
Interval 1 (0 steps performed)
101 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 3028.011 - mean_q: -7.578 - mean_eps: 0.099 - score: 0.000

Interval 2 (10000 steps performed)
99 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 248.466 - mean_q: -18.711 - mean_eps: 0.099 - score: 0.000

Interval 3 (20000 steps performed)
99 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 148.769 - mean_q: -26.409 - mean_eps: 0.098 - score: 0.000

Interval 4 (30000 steps performed)
99 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 122.113 - mean_q: -36.216 - mean_eps: 0.097 - score: 0.000

Interval 5 (40000 steps performed)
99 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 108.734 - mean_q: -42.483 - mean_eps: 0.096 - score: 0.000

Interval 6 (50000 steps performed)
99 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 69.600 - mean_q: -51.369 - mean_eps:

93 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 30.279 - mean_q: -473.952 - mean_eps: 0.033 - score: 0.046

Interval 69 (680000 steps performed)
93 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 27.180 - mean_q: -480.868 - mean_eps: 0.032 - score: 0.036

Interval 70 (690000 steps performed)
93 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 33.487 - mean_q: -482.686 - mean_eps: 0.031 - score: 0.056

Interval 71 (700000 steps performed)
86 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 35.233 - mean_q: -490.804 - mean_eps: 0.030 - score: 0.170

Interval 72 (710000 steps performed)
78 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 38.037 - mean_q: -499.718 - mean_eps: 0.029 - score: 0.311

Interval 73 (720000 steps performed)
82 episodes - episode_reward: -1000.000 [-1000.000, -1000.000] - loss: 46.988 - mean_q: -506.484 - mean_eps: 0.028 - score: 0.291

Interval 74 (730000 steps per

<keras.callbacks.History at 0x1fee4f39780>

In [10]:
#Saving weights of Neural Network
#dqn.save_weights("weights/flappy_bird_solution_simple.h5")

In [9]:
dqn.load_weights("weights/flappy_bird_solution_simple.h5")

In [10]:
results = dqn.test(env, visualize=False, nb_episodes=100, verbose=0)

In [11]:
import numpy as np
print(np.mean(results.history['nb_steps']))
print(np.sqrt(np.cov(results.history['nb_steps'])))

108.65
48.39387535336648
