In [1]:
import numpy as np
import gym
from gym import spaces

In [2]:
import gym
from gym import spaces
import numpy as np

class CurriculumEnv(gym.Env):
    def __init__(self, difficulty=1):
        super().__init__()

        self.difficulty = difficulty
        self.max_steps = 100
        self.step_count = 0

        self.action_space = spaces.Discrete(4)  # Left, Right, Up, Down
        self.observation_space = spaces.Box(low=0, high=9, shape=(2,), dtype=np.int32)

        self.target = np.array([9, 9])
        self.agent_position = np.array([0, 0])

    def reset(self):
        self.agent_position = np.array([0, 0])
        self.step_count = 0
        self.update_target_position()
        return self.agent_position

    def step(self, action):
        if self.step_count >= self.max_steps:
            return self.agent_position, 0, True, {}

        # Update agent's position
        move = [(0, -1), (0, 1), (-1, 0), (1, 0)]  # Left, Right, Up, Down
        self.agent_position += move[action]
        self.agent_position = np.clip(self.agent_position, 0, 9)  # Keep within bounds
        self.step_count += 1

        # Calculate reward
        distance = np.linalg.norm(self.target - self.agent_position)
        reward = max(0, 1 - distance / 10)

        done = np.array_equal(self.agent_position, self.target)
        if done:
            reward += 1  # Bonus for reaching the target
            self.difficulty += 1
            self.update_target_position()

        return self.agent_position, reward, done, {}

    def update_target_position(self):
        positions = [(9, 9), (7, 7), (5, 5)]  # Easy, Medium, Hard positions
        self.target = np.array(positions[min(self.difficulty - 1, 2)])

    def render(self):
        grid = np.full((10, 10), '.', dtype=str)
        grid[self.agent_position[1], self.agent_position[0]] = 'A'
        grid[self.target[1], self.target[0]] = 'T'

        print("\n".join([" ".join(row) for row in grid]))
        print(f"Difficulty: {self.difficulty}")

In [3]:
env = CurriculumEnv(difficulty=1)

## Training Loop

In [4]:
# Number of episodes to run
num_episodes = 500

# Gradual increase in difficulty after every X episodes
difficulty_increase_interval = 50  # Increase difficulty every 50 episodes

def train():
    for episode in range(num_episodes):
        state = env.reset()  # Reset the environment at the start of each episode
        total_reward = 0
        done = False

        while not done:
            # Choose a random action
            action = env.action_space.sample()

            # Take the action, observe the next state and reward
            next_state, reward, done, _ = env.step(action)

            # Accumulate the reward for this episode
            total_reward += reward

            # Update the state
            state = next_state

        # Gradually increase the difficulty after every few episodes
        if episode % difficulty_increase_interval == 0 and env.difficulty < 3:
            env.difficulty += 1
            env.update_target_position()

        # Every 10 episodes, print the total reward and current difficulty
        if episode % 10 == 0:
            print(f"Episode {episode}: Total Reward: {total_reward}, Difficulty: {env.difficulty}")

In [5]:
# Run the training loop
train()

Episode 0: Total Reward: 52.17904328283545, Difficulty: 3
Episode 10: Total Reward: 31.5290751809918, Difficulty: 6
Episode 20: Total Reward: 50.95322721335424, Difficulty: 9
Episode 30: Total Reward: 55.69061859466779, Difficulty: 12
Episode 40: Total Reward: 56.73208885050124, Difficulty: 13
Episode 50: Total Reward: 34.49027160341148, Difficulty: 20
Episode 60: Total Reward: 58.64947603099142, Difficulty: 23
Episode 70: Total Reward: 54.34688466578177, Difficulty: 25
Episode 80: Total Reward: 53.26550650414445, Difficulty: 27
Episode 90: Total Reward: 46.03125751213005, Difficulty: 31
Episode 100: Total Reward: 59.48644178107734, Difficulty: 34
Episode 110: Total Reward: 45.35316424907134, Difficulty: 39
Episode 120: Total Reward: 61.41921879427215, Difficulty: 42
Episode 130: Total Reward: 51.50696262335984, Difficulty: 45
Episode 140: Total Reward: 27.60220187415643, Difficulty: 48
Episode 150: Total Reward: 52.93046766562184, Difficulty: 49
Episode 160: Total Reward: 55.085664430

---