In [None]:
# enviroment: flappy bird
    # 4 states (normalized), 2 actions
    #  reward: -5 die, +0.05 survive, +5 pass pipe

# framework: tf.keras
# model: RL A2C (2 NN)
    # method: Advantage = Q - value where Q = R + discount * Q'
    # Actor: 4 input | 64 relu | 64 relu | softmax -> policy. loss: mean (log policy * advantage)
    # Critic: 4 input | 64 relu | 64 relu | linear -> value. loss: mean square (advantage)
    # hyperparams: discount 0.999, optimizer adam-learning rate 0.001

# result: 1.5 points mva at ep1000
    # can't make it pass 2.0 mva after ep5000, peaked 17 by luck (maybe met local minimum loss)

from IPython import display
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import pygame
import sys
from enviroment import FlappyBird
from modelA2C import A2C

In [None]:
env = FlappyBird()
scores, rewards = [], []

model = A2C(0.999)
model.actor = tf.keras.models.load_model('models/A2Cactor.keras')
model.critic = tf.keras.models.load_model('models/A2Ccritic.keras')

for episode in range(10000):
    model.training_loop(env, 10000)
    scores.append(model.score)
    rewards.append(model.reward)
    display.clear_output(wait=True)
    print(f"Episode {episode}:")
    print(f"current score: {model.score:.2f}, highest score: {max(scores):.2f}, avg score: {np.mean(scores):.2f}")
    print(f"current reward: {model.reward:.2f}, highest reward: {max(rewards):.2f}, avg reward: {np.mean(rewards):.2f}")
pygame.quit()
sys.exit()

In [None]:
# Reward EMA
previous_reward = 0
R_EMA = []
factor = 2 / (1+50)
for reward in rewards:
    if previous_reward != 0:
        new_reward = previous_reward * (1 - factor) + reward * factor
    else:
        new_reward = reward
    previous_reward = new_reward
    
    R_EMA.append(new_reward)

plt.plot(list(range(len(rewards))), rewards, alpha=0.5, label="Reward")
plt.plot(list(range(len(R_EMA))), R_EMA, label="Exponential Moving Average")
plt.legend()

In [None]:
# Score EMA
previous_score = 0
S_EMA = []
factor = 2 / (1+50)
for score in scores:
    if previous_score != 0:
        new_score = previous_score * (1 - factor) + score * factor
    else:
        new_score = score
    previous_score = new_score
    
    S_EMA.append(new_score)

plt.plot(list(range(len(scores))), scores, alpha=0.5, label="Score")
plt.plot(list(range(len(S_EMA))), S_EMA, label="Exponential Moving Average")
plt.legend()

In [None]:
model.actor.save('A2Cactor.keras')
model.critic.save('A2Ccritic.keras')