# Module Five Assignment: Cartpole Problem
Review the code in this notebook and in the score_logger.py file in the *scores* folder (directory). Once you have reviewed the code, return to this notebook and select **Cell** and then **Run All** from the menu bar to run this code. The code takes several minutes to run.

In [2]:
import random  
import gym  
import numpy as np  
from collections import deque  
from keras.models import Sequential  
from keras.layers import Dense  
from keras.optimizers import Adam  
  
  
from scores.score_logger import ScoreLogger  
  
ENV_NAME = "CartPole-v1"  
  
GAMMA = 0.9  
LEARNING_RATE = 0.005  
  
MEMORY_SIZE = 1000000  
BATCH_SIZE = 20  
  
EXPLORATION_MAX = 0.9  
EXPLORATION_MIN = 0.02  
EXPLORATION_DECAY = 0.99  
  
  
class DQNSolver:  
  
    def __init__(self, observation_space, action_space):  
        self.exploration_rate = EXPLORATION_MAX  
  
        self.action_space = action_space  
        self.memory = deque(maxlen=MEMORY_SIZE)  
  
        self.model = Sequential()  
        self.model.add(Dense(24, input_shape=(observation_space,), activation="relu"))  
        self.model.add(Dense(24, activation="relu"))  
        self.model.add(Dense(self.action_space, activation="linear"))  
        self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))  
  
    def remember(self, state, action, reward, next_state, done):  
        self.memory.append((state, action, reward, next_state, done))  
  
    def act(self, state):  
        if np.random.rand() < self.exploration_rate:  
            return random.randrange(self.action_space)  
        q_values = self.model.predict(state)  
        return np.argmax(q_values[0])  
  
    def experience_replay(self):  
        if len(self.memory) < BATCH_SIZE:  
            return  
        batch = random.sample(self.memory, BATCH_SIZE)  
        for state, action, reward, state_next, terminal in batch:  
            q_update = reward  
            if not terminal:  
                q_update = (reward + GAMMA * np.amax(self.model.predict(state_next)[0]))  
            q_values = self.model.predict(state)  
            q_values[0][action] = q_update  
            self.model.fit(state, q_values, verbose=0)  
        self.exploration_rate *= EXPLORATION_DECAY  
        self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)  
  
  
def cartpole():  
    env = gym.make(ENV_NAME)  
    score_logger = ScoreLogger(ENV_NAME)  
    observation_space = env.observation_space.shape[0]  
    action_space = env.action_space.n  
    dqn_solver = DQNSolver(observation_space, action_space)  
    run = 0  
    while True:  
        run += 1  
        state = env.reset()  
        state = np.reshape(state, [1, observation_space])  
        step = 0  
        while True:  
            step += 1  
            #env.render()  
            action = dqn_solver.act(state)  
            state_next, reward, terminal, info = env.step(action)  
            reward = reward if not terminal else -reward  
            state_next = np.reshape(state_next, [1, observation_space])  
            dqn_solver.remember(state, action, reward, state_next, terminal)  
            state = state_next  
            if terminal:  
                print ("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))  
                score_logger.add_score(step, run)  
                break  
            dqn_solver.experience_replay()  



In [None]:
cartpole()

Run: 1, exploration: 0.9, score: 17
Scores: (min: 17, avg: 17, max: 17)

Run: 2, exploration: 0.6657303360494523, score: 33
Scores: (min: 17, avg: 25, max: 33)

Run: 3, exploration: 0.5611728554020801, score: 18
Scores: (min: 17, avg: 22.666666666666668, max: 33)

Run: 4, exploration: 0.5024395246930774, score: 12
Scores: (min: 12, avg: 20, max: 33)

Run: 5, exploration: 0.41094877296952315, score: 21
Scores: (min: 12, avg: 20.2, max: 33)

Run: 6, exploration: 0.3792007999392913, score: 9
Scores: (min: 9, avg: 18.333333333333332, max: 33)

Run: 7, exploration: 0.3464064710041361, score: 10
Scores: (min: 9, avg: 17.142857142857142, max: 33)

Run: 8, exploration: 0.3101509650127659, score: 12
Scores: (min: 9, avg: 16.5, max: 33)

Run: 9, exploration: 0.27216399210211945, score: 14
Scores: (min: 9, avg: 16.22222222222222, max: 33)

Run: 10, exploration: 0.2486265009292875, score: 10
Scores: (min: 9, avg: 15.6, max: 33)

Run: 11, exploration: 0.2226048173044814, score: 12
Scores: (min: 9, 

Run: 90, exploration: 0.02, score: 11
Scores: (min: 8, avg: 10.866666666666667, max: 33)

Run: 91, exploration: 0.02, score: 9
Scores: (min: 8, avg: 10.846153846153847, max: 33)

Run: 92, exploration: 0.02, score: 14
Scores: (min: 8, avg: 10.880434782608695, max: 33)

Run: 93, exploration: 0.02, score: 9
Scores: (min: 8, avg: 10.86021505376344, max: 33)

Run: 94, exploration: 0.02, score: 22
Scores: (min: 8, avg: 10.97872340425532, max: 33)

Run: 95, exploration: 0.02, score: 12
Scores: (min: 8, avg: 10.989473684210527, max: 33)

Run: 96, exploration: 0.02, score: 9
Scores: (min: 8, avg: 10.96875, max: 33)

Run: 97, exploration: 0.02, score: 10
Scores: (min: 8, avg: 10.958762886597938, max: 33)

Run: 98, exploration: 0.02, score: 10
Scores: (min: 8, avg: 10.948979591836734, max: 33)

Run: 99, exploration: 0.02, score: 14
Scores: (min: 8, avg: 10.97979797979798, max: 33)

Run: 100, exploration: 0.02, score: 13
Scores: (min: 8, avg: 11, max: 33)

Run: 101, exploration: 0.02, score: 11
Sc

Run: 194, exploration: 0.02, score: 10
Scores: (min: 8, avg: 29.34, max: 119)

Run: 195, exploration: 0.02, score: 10
Scores: (min: 8, avg: 29.32, max: 119)

Run: 196, exploration: 0.02, score: 9
Scores: (min: 8, avg: 29.32, max: 119)

Run: 197, exploration: 0.02, score: 9
Scores: (min: 8, avg: 29.31, max: 119)

Run: 198, exploration: 0.02, score: 10
Scores: (min: 8, avg: 29.31, max: 119)

Run: 199, exploration: 0.02, score: 8
Scores: (min: 8, avg: 29.25, max: 119)

Run: 200, exploration: 0.02, score: 10
Scores: (min: 8, avg: 29.22, max: 119)

Run: 201, exploration: 0.02, score: 10
Scores: (min: 8, avg: 29.21, max: 119)

Run: 202, exploration: 0.02, score: 9
Scores: (min: 8, avg: 29.12, max: 119)

Run: 203, exploration: 0.02, score: 9
Scores: (min: 8, avg: 28.7, max: 119)

Run: 204, exploration: 0.02, score: 10
Scores: (min: 8, avg: 28.68, max: 119)

Run: 205, exploration: 0.02, score: 9
Scores: (min: 8, avg: 28.66, max: 119)

Run: 206, exploration: 0.02, score: 9
Scores: (min: 8, avg:

Run: 299, exploration: 0.02, score: 22
Scores: (min: 8, avg: 19.86, max: 126)

Run: 300, exploration: 0.02, score: 15
Scores: (min: 8, avg: 19.91, max: 126)

Run: 301, exploration: 0.02, score: 42
Scores: (min: 8, avg: 20.23, max: 126)

Run: 302, exploration: 0.02, score: 14
Scores: (min: 8, avg: 20.28, max: 126)

Run: 303, exploration: 0.02, score: 65
Scores: (min: 8, avg: 20.84, max: 126)

Run: 304, exploration: 0.02, score: 36
Scores: (min: 8, avg: 21.1, max: 126)

Run: 305, exploration: 0.02, score: 20
Scores: (min: 8, avg: 21.21, max: 126)

Run: 306, exploration: 0.02, score: 23
Scores: (min: 8, avg: 21.35, max: 126)

Run: 307, exploration: 0.02, score: 26
Scores: (min: 8, avg: 21.51, max: 126)

Run: 308, exploration: 0.02, score: 50
Scores: (min: 8, avg: 21.92, max: 126)

Run: 309, exploration: 0.02, score: 22
Scores: (min: 8, avg: 22.05, max: 126)

Run: 310, exploration: 0.02, score: 14
Scores: (min: 8, avg: 22.09, max: 126)

Run: 311, exploration: 0.02, score: 27
Scores: (min: 

Run: 403, exploration: 0.02, score: 116
Scores: (min: 9, avg: 35.16, max: 119)

Run: 404, exploration: 0.02, score: 88
Scores: (min: 9, avg: 35.68, max: 119)

Run: 405, exploration: 0.02, score: 101
Scores: (min: 9, avg: 36.49, max: 119)

Run: 406, exploration: 0.02, score: 15
Scores: (min: 9, avg: 36.41, max: 119)

Run: 407, exploration: 0.02, score: 151
Scores: (min: 9, avg: 37.66, max: 151)

Run: 408, exploration: 0.02, score: 111
Scores: (min: 9, avg: 38.27, max: 151)

Run: 409, exploration: 0.02, score: 152
Scores: (min: 9, avg: 39.57, max: 152)

Run: 410, exploration: 0.02, score: 170
Scores: (min: 9, avg: 41.13, max: 170)

Run: 411, exploration: 0.02, score: 10
Scores: (min: 9, avg: 40.96, max: 170)

Run: 412, exploration: 0.02, score: 14
Scores: (min: 9, avg: 41, max: 170)

Run: 413, exploration: 0.02, score: 20
Scores: (min: 9, avg: 40.45, max: 170)

Run: 414, exploration: 0.02, score: 14
Scores: (min: 9, avg: 40.45, max: 170)

Run: 415, exploration: 0.02, score: 10
Scores: (m

Run: 506, exploration: 0.02, score: 65
Scores: (min: 9, avg: 156.04, max: 500)

Run: 507, exploration: 0.02, score: 124
Scores: (min: 9, avg: 155.77, max: 500)

Run: 508, exploration: 0.02, score: 192
Scores: (min: 9, avg: 156.58, max: 500)

Run: 509, exploration: 0.02, score: 141
Scores: (min: 9, avg: 156.47, max: 500)

Run: 510, exploration: 0.02, score: 199
Scores: (min: 9, avg: 156.76, max: 500)

Run: 511, exploration: 0.02, score: 238
Scores: (min: 9, avg: 159.04, max: 500)

Run: 512, exploration: 0.02, score: 152
Scores: (min: 9, avg: 160.42, max: 500)

Run: 513, exploration: 0.02, score: 151
Scores: (min: 9, avg: 161.73, max: 500)

Run: 514, exploration: 0.02, score: 217
Scores: (min: 9, avg: 163.76, max: 500)

Run: 515, exploration: 0.02, score: 201
Scores: (min: 9, avg: 165.67, max: 500)

Run: 516, exploration: 0.02, score: 125
Scores: (min: 9, avg: 166.81, max: 500)

Run: 517, exploration: 0.02, score: 253
Scores: (min: 12, avg: 169.25, max: 500)

Run: 518, exploration: 0.02,

Run: 607, exploration: 0.02, score: 500
Scores: (min: 11, avg: 171.13, max: 500)

Run: 608, exploration: 0.02, score: 179
Scores: (min: 11, avg: 171, max: 500)

Run: 609, exploration: 0.02, score: 141
Scores: (min: 11, avg: 171, max: 500)

Run: 610, exploration: 0.02, score: 176
Scores: (min: 11, avg: 170.77, max: 500)

Run: 611, exploration: 0.02, score: 19
Scores: (min: 11, avg: 168.58, max: 500)

Run: 612, exploration: 0.02, score: 16
Scores: (min: 11, avg: 167.22, max: 500)

Run: 613, exploration: 0.02, score: 12
Scores: (min: 11, avg: 165.83, max: 500)

Run: 614, exploration: 0.02, score: 143
Scores: (min: 11, avg: 165.09, max: 500)

Run: 615, exploration: 0.02, score: 75
Scores: (min: 11, avg: 163.83, max: 500)

Run: 616, exploration: 0.02, score: 115
Scores: (min: 11, avg: 163.73, max: 500)

Run: 617, exploration: 0.02, score: 95
Scores: (min: 11, avg: 162.15, max: 500)

Run: 618, exploration: 0.02, score: 114
Scores: (min: 11, avg: 161.49, max: 500)

Run: 619, exploration: 0.02

Run: 711, exploration: 0.02, score: 10
Scores: (min: 8, avg: 63.93, max: 422)

Run: 712, exploration: 0.02, score: 11
Scores: (min: 8, avg: 63.88, max: 422)

Run: 713, exploration: 0.02, score: 9
Scores: (min: 8, avg: 63.85, max: 422)

Run: 714, exploration: 0.02, score: 9
Scores: (min: 8, avg: 62.51, max: 422)

Run: 715, exploration: 0.02, score: 10
Scores: (min: 8, avg: 61.86, max: 422)

Run: 716, exploration: 0.02, score: 8
Scores: (min: 8, avg: 60.79, max: 422)

Run: 717, exploration: 0.02, score: 10
Scores: (min: 8, avg: 59.94, max: 422)

Run: 718, exploration: 0.02, score: 10
Scores: (min: 8, avg: 58.9, max: 422)

Run: 719, exploration: 0.02, score: 9
Scores: (min: 8, avg: 56.03, max: 422)

Run: 720, exploration: 0.02, score: 10
Scores: (min: 8, avg: 55.94, max: 422)

Run: 721, exploration: 0.02, score: 9
Scores: (min: 8, avg: 55.83, max: 422)

Run: 722, exploration: 0.02, score: 9
Scores: (min: 8, avg: 54.03, max: 422)

Run: 723, exploration: 0.02, score: 10
Scores: (min: 8, avg

Run: 815, exploration: 0.02, score: 42
Scores: (min: 8, avg: 103.11, max: 500)

Run: 816, exploration: 0.02, score: 19
Scores: (min: 8, avg: 103.22, max: 500)

Run: 817, exploration: 0.02, score: 55
Scores: (min: 8, avg: 103.67, max: 500)

Run: 818, exploration: 0.02, score: 46
Scores: (min: 8, avg: 104.03, max: 500)

Run: 819, exploration: 0.02, score: 44
Scores: (min: 8, avg: 104.38, max: 500)

Run: 820, exploration: 0.02, score: 13
Scores: (min: 8, avg: 104.41, max: 500)

Run: 821, exploration: 0.02, score: 15
Scores: (min: 8, avg: 104.47, max: 500)

Run: 822, exploration: 0.02, score: 130
Scores: (min: 8, avg: 105.68, max: 500)

Run: 823, exploration: 0.02, score: 270
Scores: (min: 8, avg: 108.28, max: 500)

Run: 824, exploration: 0.02, score: 84
Scores: (min: 8, avg: 109.03, max: 500)

Run: 825, exploration: 0.02, score: 135
Scores: (min: 8, avg: 110.29, max: 500)

Run: 826, exploration: 0.02, score: 181
Scores: (min: 8, avg: 112.01, max: 500)

Run: 827, exploration: 0.02, score: 

Run: 917, exploration: 0.02, score: 9
Scores: (min: 8, avg: 115.43, max: 500)

Run: 918, exploration: 0.02, score: 10
Scores: (min: 8, avg: 115.07, max: 500)

Run: 919, exploration: 0.02, score: 10
Scores: (min: 8, avg: 114.73, max: 500)

Run: 920, exploration: 0.02, score: 10
Scores: (min: 8, avg: 114.7, max: 500)

Run: 921, exploration: 0.02, score: 9
Scores: (min: 8, avg: 114.64, max: 500)

Run: 922, exploration: 0.02, score: 10
Scores: (min: 8, avg: 113.44, max: 500)

Run: 923, exploration: 0.02, score: 9
Scores: (min: 8, avg: 110.83, max: 500)

Run: 924, exploration: 0.02, score: 10
Scores: (min: 8, avg: 110.09, max: 500)

Run: 925, exploration: 0.02, score: 9
Scores: (min: 8, avg: 108.83, max: 500)

Run: 926, exploration: 0.02, score: 10
Scores: (min: 8, avg: 107.12, max: 500)

Run: 927, exploration: 0.02, score: 9
Scores: (min: 8, avg: 105.29, max: 500)

Run: 928, exploration: 0.02, score: 9
Scores: (min: 8, avg: 103.66, max: 500)

Run: 929, exploration: 0.02, score: 10
Scores: 

Run: 1022, exploration: 0.02, score: 9
Scores: (min: 8, avg: 11.1, max: 58)

Run: 1023, exploration: 0.02, score: 26
Scores: (min: 8, avg: 11.27, max: 58)

Run: 1024, exploration: 0.02, score: 11
Scores: (min: 8, avg: 11.28, max: 58)

Run: 1025, exploration: 0.02, score: 10
Scores: (min: 8, avg: 11.29, max: 58)

Run: 1026, exploration: 0.02, score: 67
Scores: (min: 8, avg: 11.86, max: 67)

Run: 1027, exploration: 0.02, score: 11
Scores: (min: 8, avg: 11.88, max: 67)

Run: 1028, exploration: 0.02, score: 10
Scores: (min: 8, avg: 11.89, max: 67)

Run: 1029, exploration: 0.02, score: 16
Scores: (min: 8, avg: 11.95, max: 67)

Run: 1030, exploration: 0.02, score: 13
Scores: (min: 8, avg: 11.98, max: 67)

Run: 1031, exploration: 0.02, score: 11
Scores: (min: 8, avg: 11.99, max: 67)

Run: 1032, exploration: 0.02, score: 21
Scores: (min: 8, avg: 12.1, max: 67)

Run: 1033, exploration: 0.02, score: 10
Scores: (min: 8, avg: 12.1, max: 67)

Run: 1034, exploration: 0.02, score: 24
Scores: (min: 8,

Run: 1125, exploration: 0.02, score: 115
Scores: (min: 8, avg: 103.94, max: 404)

Run: 1126, exploration: 0.02, score: 244
Scores: (min: 8, avg: 105.71, max: 404)

Run: 1127, exploration: 0.02, score: 86
Scores: (min: 8, avg: 106.46, max: 404)

Run: 1128, exploration: 0.02, score: 141
Scores: (min: 8, avg: 107.77, max: 404)

Run: 1129, exploration: 0.02, score: 268
Scores: (min: 8, avg: 110.29, max: 404)

Run: 1130, exploration: 0.02, score: 238
Scores: (min: 8, avg: 112.54, max: 404)

Run: 1131, exploration: 0.02, score: 230
Scores: (min: 8, avg: 114.73, max: 404)

Run: 1132, exploration: 0.02, score: 464
Scores: (min: 8, avg: 119.16, max: 464)

Run: 1133, exploration: 0.02, score: 116
Scores: (min: 8, avg: 120.22, max: 464)

Run: 1134, exploration: 0.02, score: 34
Scores: (min: 8, avg: 120.32, max: 464)

Run: 1135, exploration: 0.02, score: 149
Scores: (min: 8, avg: 121.64, max: 464)

Run: 1136, exploration: 0.02, score: 500
Scores: (min: 8, avg: 126.54, max: 500)

Run: 1137, explora

Run: 1225, exploration: 0.02, score: 118
Scores: (min: 10, avg: 164.65, max: 500)

Run: 1226, exploration: 0.02, score: 67
Scores: (min: 10, avg: 162.88, max: 500)

Run: 1227, exploration: 0.02, score: 299
Scores: (min: 10, avg: 165.01, max: 500)

Run: 1228, exploration: 0.02, score: 172
Scores: (min: 10, avg: 165.32, max: 500)

Run: 1229, exploration: 0.02, score: 220
Scores: (min: 10, avg: 164.84, max: 500)

Run: 1230, exploration: 0.02, score: 57
Scores: (min: 10, avg: 163.03, max: 500)

Run: 1231, exploration: 0.02, score: 13
Scores: (min: 10, avg: 160.86, max: 500)

Run: 1232, exploration: 0.02, score: 102
Scores: (min: 10, avg: 157.24, max: 500)

Run: 1233, exploration: 0.02, score: 104
Scores: (min: 10, avg: 157.12, max: 500)

Run: 1234, exploration: 0.02, score: 283
Scores: (min: 10, avg: 159.61, max: 500)

Run: 1235, exploration: 0.02, score: 408
Scores: (min: 10, avg: 162.2, max: 500)

Run: 1236, exploration: 0.02, score: 259
Scores: (min: 10, avg: 159.79, max: 408)

Run: 123

Note: If the code is running properly, you should begin to see output appearing above this code block. It will take several minutes, so it is recommended that you let this code run in the background while completing other work. When the code has finished, it will print output saying, "Solved in _ runs, _ total runs."

You may see an error about not having an exit command. This error does not affect the program's functionality and results from the steps taken to convert the code from Python 2.x to Python 3. Please disregard this error.

## Analysis:

   #### Explain how reinforcement learning concepts apply to the cartpole problem.
   
   * ###### What is the goal of the agent in this case?
   
   The goal of the agent in this case is to balance the pole for as long as possible, by moving 
the cart left or right (Surma 2019). The agent receives a reward for each time-step that the pole 
remains balanced.


   * ###### What are the various state values?
   
   The state values here are the positions and velocities of the cart and the pole. These values 
are used to represent the current state of the system, which the agent must use to make 
decisions.


   * ###### What are the possible actions that can be performed?
   
   The possible actions that can be performed by the agent are moving the cart left or right. 
These actions influence the state of the system, and the agent must choose the action that 
maximizes the reward over time.


   * ###### What reinforcement algorithm is used for this problem?
   
   The reinforcement algorithm used for this problem is the deep Q-Learning algorithm or 
DQN (Surma 2019), which is a model-free reinforcement learning algorithm. The DQN updates 
its estimates of the expected reward for each state-action pair over time, based on the 
observed rewards and the expected rewards for the next state. The agent chooses the action 
that maximizes the expected reward for each state, until it reaches a solution.
   
#### Analyze how experience replay is applied to the cartpole problem.
   
   
   * ###### How does experience replay work in this algorithm?
   
   Experience replay is a key component in the DQN, including those applied to the Cartpole 
problem. In the Cartpole problem, the experience replay works by storing the agent's 
experiences, which consist of the state, action, reward, and next state, in small batches to avoid 
data skew (Wang 2021).

   
   * ###### What is the effect of introducing a discount factor for calculating the future rewards?
   
   The effect of introducing a discount factor is that it determines the relative importance of 
future rewards compared to immediate rewards (SALLOUM 2021). A discount factor of 1 means 
that future rewards are equally important as immediate rewards, while a discount factor less 
than 1 means that future rewards are worth less than immediate rewards. This allows the agent 
to trade off short-term rewards for long-term rewards and helps it to make more informed 
decisions.
   
#### Analyze how neural networks are used in deep Q-learning.
   
   
   * ###### Explain the neural network architecture that is used in the cartpole problem.
   
   Neural networks are often used in DQN to approximate the Q-function, which represents 
the expected reward for taking a particular action in a particular state. In this example, a simple 
feedforward neural network is used as the function approximator. The input layer consists of 
the state values (e.g., the positions and velocities of the cart and pole), and the output layer 
consists of the estimated expected reward for each action. There may be several hidden layers 
with non-linear activation functions, such as rectified linear units (ReLU), to help the network 
capture complex relationships between the inputs and outputs.


   * ###### How does the neural network make the Q-learning algorithm more efficient?
   
   The neural network makes the Q-learning algorithm more efficient by allowing it to 
generalize from its experiences, rather than memorizing them. By updating its parameters 
based on the observed rewards and the expected rewards, the network can learn to produce 
accurate estimates of the expected reward for each state-action pair, even in states that it has 
never seen before.
   
   
   * ###### What difference do you see in the algorithm performance when you increase or decrease the learning rate?
   

   The learning rate determines the speed at which the network updates its parameters based 
on the observed rewards and the expected rewards. A higher learning rate means that the 
network updates its parameters more quickly, while a lower learning rate means that the 
network updates its parameters slowly requiring more epochs (Brownlee 2019)
   
   Increasing the learning rate can cause the network to converge to a solution more quickly, 
but it may also lead to instability or oscillations in the estimated Q-values. Decreasing the 
learning rate can make the network converge more slowly, but it can also lead to a more stable 
and accurate solution. 
   
   
#### References:
   
   Surma, G. (2019, November 10). Cartpole - introduction to reinforcement learning (DQN - 
deep Q-learning). Medium. Retrieved February 4, 2023, from 
https://gsurma.medium.com/cartpole-introduction-to-reinforcement-learning-ed0eb5b58288 

   Wang, M. (2021, October 3). Deep Q-learning tutorial: Mindqn. Medium. Retrieved 
February 4, 2023, from https://towardsdatascience.com/deep-q-learning-tutorial-mindqn-2a4c855abffc 

   SALLOUM, Z. (2021, December 12). Basics of reinforcement learning, the easy way. 
Medium. Retrieved February 4, 2023, from https://zsalloum.medium.com/basics-of-reinforcement-learning-the-easy-way-fb3a0a44f30e 

   Brownlee, J. (2020, September 11). Understand the impact of learning rate on neural 
network performance. MachineLearningMastery.com. Retrieved February 4, 2023, from 
https://machinelearningmastery.com/understand-the-dynamics-of-learning-rate-on-deep-learning-neural-networks/ 
