# To build an agent using reinforcement learning, I chose a problem from the AI Gymnasium - the CartPole-v1 - that the agent will solve.

## The problem involves balancing a pole on top of a moving cart by controlling the cart's movement. The goal is to keep the pole upright for as long as possible.

### To build an agent that can solve this problem using reinforcement learning, the following steps were followed:

#### 1. Step to initialize the environment.

In [None]:
new_step_api=True

#### 2. Importing the necessary librarie:

In [None]:

import gym
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam


#### 3. Creating an environment for the problem:

In [None]:

env = gym.make('CartPole-v1')


#### 4. Define the neural network model:

In [None]:

def build_model(state_size, action_size):
    model = Sequential()
    model.add(Dense(24, input_dim=state_size, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(action_size, activation='linear'))
    model.compile(loss='mse', optimizer=Adam(learning_rate=0.001))
    return model

#### 5. Defining the Agent class:

In [None]:

class Agent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.model = build_model(state_size, action_size)

    def act(self, state):
        # Use the model to predict the Q values for each action
        q_values = self.model.predict(state)
        # Choose the action with the highest Q value
        action = np.argmax(q_values[0])
        return action

    def train(self, state, action, reward, next_state, done):
        # Use the model to predict the Q values for the current state
        q_values = self.model.predict(state)
        # Use the model to predict the Q values for the next state
        next_q_values = self.model.predict(next_state)
        # Update the Q value for the chosen action using the Bellman equation
        q_values[0][action] = reward + 0.99 * np.max(next_q_values) * (1 - done)
        # Train the model on the updated Q values
        self.model.fit(state, q_values, verbose=0)


#### 6. Test and training the agent using the Q-learning algorithm:

In [None]:

state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = Agent(state_size, action_size)

for episode in range(15):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    done = False
    time_step = 0
    while not done:
        # Choose an action
        action = agent.act(state)
        # Take the action and observe the next state and reward
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])
        # Train the agent on the experience
        agent.train(state, action, reward, next_state, done)
        # Update the state
        state = next_state
        time_step += 1
    print("Episode:", episode, "Time step:", time_step)
