In [16]:
import os

# Specify the directory containing your custom modules
custom_module_path = ""

# Get the current value of PYTHONPATH (if any)
existing_pythonpath = os.environ.get("PYTHONPATH", "")

# Append your custom module path to PYTHONPATH
os.environ["PYTHONPATH"] = f"{custom_module_path}:{existing_pythonpath}"

# Now Python will search for modules in the specified directory
print(existing_pythonpath)

:


In [17]:
import gym as gym
from gym import spaces
import numpy as np

class SimpleMultiAgentEnv(gym.Env):
    def __init__(self):
        super(SimpleMultiAgentEnv, self).__init__()
        self.observation_space = spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32)
        self.action_space = spaces.Discrete(2)
        self.state = np.array([0.5, 0.5])
        
    def reset(self):
        self.state = np.array([0.5, 0.5])
        return self.state
    
    def step(self, actions):
        rewards = [0, 0]
        done = False
        
        for i, action in enumerate(actions):
            if action == 0:
                self.state[i] -= 0.1
            else:
                self.state[i] += 0.1
                
            rewards[i] = -abs(self.state[i] - 0.5)  # reward is higher when closer to 0.5
        
        return self.state, rewards, done, {}
    
    def render(self):
        pass

# Instantiate the environment
env = SimpleMultiAgentEnv()


In [18]:
class RandomAgent:
    def __init__(self, action_space):
        self.action_space = action_space
    
    def act(self):
        return self.action_space.sample()

agent1 = RandomAgent(env.action_space)
agent2 = RandomAgent(env.action_space)

In [19]:
# Number of steps to simulate
num_steps = 10

# Reset the environment
state = env.reset()
print(f"Initial state: {state}")

for step in range(num_steps):
    # Agents take actions
    action1 = agent1.act()
    action2 = agent2.act()
    
    # Step the environment
    state, rewards, done, info = env.step([action1, action2])
    
    print(f"Step {step + 1} - State: {state}, Rewards: {rewards}")

print("Simulation completed.")


Initial state: [0.5 0.5]
Step 1 - State: [0.6 0.4], Rewards: [-0.09999999999999998, -0.09999999999999998]
Step 2 - State: [0.7 0.5], Rewards: [-0.19999999999999996, -0.0]
Step 3 - State: [0.8 0.6], Rewards: [-0.29999999999999993, -0.09999999999999998]
Step 4 - State: [0.9 0.5], Rewards: [-0.3999999999999999, -0.0]
Step 5 - State: [1.  0.4], Rewards: [-0.4999999999999999, -0.09999999999999998]
Step 6 - State: [1.1 0.3], Rewards: [-0.5999999999999999, -0.19999999999999996]
Step 7 - State: [1.  0.4], Rewards: [-0.4999999999999999, -0.09999999999999998]
Step 8 - State: [1.1 0.3], Rewards: [-0.5999999999999999, -0.19999999999999996]
Step 9 - State: [1.  0.2], Rewards: [-0.4999999999999999, -0.29999999999999993]
Step 10 - State: [0.9 0.1], Rewards: [-0.3999999999999999, -0.39999999999999997]
Simulation completed.


In [20]:


class SimpleMultiAgentEnv(gym.Env):
    def __init__(self, max_steps=20):
        super(SimpleMultiAgentEnv, self).__init__()
        self.observation_space = spaces.Box(low=0, high=1, shape=(2,), dtype=np.float32)
        self.action_space = spaces.Discrete(2)
        self.state = np.array([0.5, 0.5])
        self.max_steps = max_steps
        self.current_step = 0
        
    def reset(self):
        self.state = np.array([0.5, 0.5])
        self.current_step = 0
        return self.state
    
    def step(self, actions):
        rewards = [0, 0]
        self.current_step += 1
        
        for i, action in enumerate(actions):
            if action == 0:
                self.state[i] -= 0.1
            else:
                self.state[i] += 0.1
                
            rewards[i] = -abs(self.state[i] - 0.5)  # reward is higher when closer to 0.5
        
        done = self.current_step >= self.max_steps
        
        return self.state, rewards, done, {}
    
    def render(self):
        pass

class RandomAgent:
    def __init__(self, action_space):
        self.action_space = action_space
    
    def act(self, state):
        return [self.action_space.sample(), self.action_space.sample()]

class RuleBasedAgent:
    def __init__(self, action_space):
        self.action_space = action_space
    
    def act(self, state):
        # Simple rule: move towards the center value (0.5)
        actions = []
        for s in state:
            if s > 0.5:
                actions.append(0)  # move down
            else:
                actions.append(1)  # move up
        return actions

def evaluate_agent(agent, env, num_episodes=5):
    total_reward = 0
    for _ in range(num_episodes):
        state = env.reset()
        done = False
        episode_reward = 0
        a=0
        while not done:
            print(a)
            a+=1
            actions = agent.act(state)
            state, rewards, done, _ = env.step(actions)
            episode_reward += sum(rewards)
        total_reward += episode_reward
    return total_reward / num_episodes

# Instantiate the environment and agents
env = SimpleMultiAgentEnv(max_steps=20)
agent1 = RandomAgent(env.action_space)
agent2 = RuleBasedAgent(env.action_space)

# Initial agent selection
best_agent = agent1

# Parameters
num_steps = 100
evaluation_interval = 10  # Evaluate every 10 steps

# Reset the environment
state = env.reset()
print(f"Initial state: {state}")

for step in range(num_steps):
    # Periodically evaluate and switch agents if necessary
    if step % evaluation_interval == 0:
        performance1 = evaluate_agent(agent1, env)
        performance2 = evaluate_agent(agent2, env)
        
        print(f"Evaluation at step {step}:")
        print(f"Performance of RandomAgent: {performance1}")
        print(f"Performance of RuleBasedAgent: {performance2}")
        
        if performance1 > performance2:
            best_agent = agent1
            print("Selected RandomAgent as the best agent.")
        else:
            best_agent = agent2
            print("Selected RuleBasedAgent as the best agent.")
    
    # Best agent takes actions
    actions = best_agent.act(state)
    
    # Step the environment
    state, rewards, done, _ = env.step(actions)
    
    print(f"Step {step + 1} - State: {state}, Rewards: {rewards}")

print("Simulation completed.")



Initial state: [0.5 0.5]
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
Evaluation at step 0:
Performance of RandomAgent: -9.039999999999997
Performance of RuleBasedAgent: -1.9999999999999996
Selected RuleBasedAgent as the best agent.
Step 1 - State: [0.6 0.6], Rewards: [-0.09999999999999998, -0.09999999999999998]
Step 2 - State: [0.5 0.5], Rewards: [-0.0, -0.0]
Step 3 - State: [0.6 0.6], Rewards: [-0.09999999999999998, -0.09999999999999998]
Step 4 - State: [0.5 0.5], Rewards: [-0.0, -0.0]
Step 5 - State: [0.6 0.6], Rewards: [-0.099999999999

In [3]:
for i in range(20):
    lstm_action = i
    minrtt_action = 0
    if i % 5 == 0:
        current_action = lstm_action
    print(current_action)


0
0
0
0
0
5
5
5
5
5
10
10
10
10
10
15
15
15
15
15


: 