In [3]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg


In [4]:


class GridWorldEnv:
    def __init__(self):
        self.board = np.zeros((4, 4))
        self.player_position = (0, 0)
        self.dinosaur_position = (1, 1)
        self.cave_position = (2, 2)
        self.goal_position = (3, 3)

    def reset(self):
        self.board = np.zeros((4, 4))
        self.player_position = (0, 0)
        return self.player_position

    def render(self):
        player_img = mpimg.imread('player.png')
        dinosaur_img = mpimg.imread('dinosaur.png')
        cave_img = mpimg.imread('cave.png')
        goal_img = mpimg.imread('goal.png')
        empty_img = mpimg.imread('empty.png')

        # Create a figure
        fig, ax = plt.subplots()
        # Loop over the grid
        for i in range(4):
            for j in range(4):
                if (i, j) == self.player_position:
                    img = player_img
                elif (i, j) == self.dinosaur_position:
                    img = dinosaur_img
                elif (i, j) == self.cave_position:
                    img = cave_img
                elif (i, j) == self.goal_position:
                    img = goal_img
                else:
                    img = empty_img

                # Display the image
                ax.imshow(img, extent=(i, i+1, j, j+1))

        # Display the figure
        plt.show()

        for i in range(4):
            for j in range(4):
                if (i, j) == self.player_position:
                    print("P", end=" ")
                elif (i, j) == self.dinosaur_position:
                    print("D", end=" ")
                elif (i, j) == self.cave_position:
                    print("C", end=" ")
                elif (i, j) == self.goal_position:
                    print("G", end=" ")
                else:
                    print(".", end=" ")
            print()

    def step(self, action):
        x, y = self.player_position
        if action == 0:  # up
            x = max(0, x-1)
        elif action == 1:  # right
            y = min(3, y+1)
        elif action == 2:  # down
            x = min(3, x+1)
        elif action == 3:  # left
            y = max(0, y-1)
        self.player_position = (x, y)

        if self.player_position == self.dinosaur_position:
            return self.player_position, -1, False, {}  # Negative reward for encountering dinosaur
        elif self.player_position == self.cave_position:
            return self.player_position, 1, False, {}  # Positive reward for encountering cave
        elif self.player_position == self.goal_position:
            return self.player_position, 100, True, {}  # Game ends when reaching goal
        else:
            return self.player_position, 0, False, {}  # No reward otherwise

In [5]:
class QLearningAgent:
    def __init__(self, env, discount_factor=0.95, learning_rate=0.1, exploration_rate=0.5):
        self.env = env
        self.q_table = np.zeros((4, 4, 4))  # Initialize Q-table
        self.discount_factor = discount_factor
        self.learning_rate = learning_rate
        self.exploration_rate = exploration_rate

    def get_action(self, state):
        if np.random.uniform(0, 1) < self.exploration_rate:  # Exploration
            return np.random.choice(4)  # 4 actions: up, right, down, left
        else:  # Exploitation
            return np.argmax(self.q_table[state])

    def train(self, episodes):
        for episode in range(episodes):
            state = self.env.reset()
            total_reward = 0
            done = False
            while not done:
                action = self.get_action(state)
                next_state, reward, done, _ = self.env.step(action)
                old_value = self.q_table[state][action]
                next_max = np.max(self.q_table[next_state])
                
                new_value = (1 - self.learning_rate) * old_value + self.learning_rate * (reward + self.discount_factor * next_max)
                self.q_table[state][action] = new_value
                state = next_state
                total_reward += reward

            if episode % 10 == 0:  # Print total reward every 100 episodes
                print(f"Episode: {episode}, Total reward: {total_reward}")

# Create an instance of the environment
env = GridWorldEnv()

# Create a Q-learning agent
agent = QLearningAgent(env)

# Train the agent
agent.train(100)

Episode: 0, Total reward: 143
Episode: 10, Total reward: 101
Episode: 20, Total reward: 103
Episode: 30, Total reward: 103
Episode: 40, Total reward: 101
Episode: 50, Total reward: 101
Episode: 60, Total reward: 103
Episode: 70, Total reward: 101
Episode: 80, Total reward: 101
Episode: 90, Total reward: 102


In [6]:
# Reset the environment and get the initial state
state = env.reset()

# Initialize the total reward
total_reward = 0

# Run the simulation until the game ends
done = False
while not done:
    # Choose an action based on the current state
    action = agent.get_action(state)

    # Take the action and get the new state and reward
    state, reward, done, info = env.step(action)

    # Add the reward to the total reward
    total_reward += reward

    # Print the current state of the environment
    env.render()
    print()

# Print the final total reward
print(f"Final total reward: {total_reward}")

SyntaxError: not a PNG file (<string>)