# MARL Collaborative Label Aggregation Demo
Train multiple agents to reach consensus on noisy labels.

In [None]:
from src.env.label_env import LabelAggregationEnv
from src.agents.dqn import DQNAgent
import matplotlib.pyplot as plt

In [None]:
# Initialize environment and agents
env = LabelAggregationEnv(num_agents=3)
agents = [DQNAgent(input_dim=1, output_dim=2) for _ in range(3)]

rewards_log = [[] for _ in agents]

In [None]:
# Run simulation
state = env.reset()
for step in range(100):
    actions = [agent.act(state) for agent in agents]
    next_state, rewards, done, _ = env.step(actions)
    for i, agent in enumerate(agents):
        agent.remember(state, actions[i], rewards[i], next_state)
        agent.learn()
        rewards_log[i].append(rewards[i])
    state = next_state
    if done:
        break
print("Training complete.")

In [None]:
# Plot rewards
for i, r in enumerate(rewards_log):
    plt.plot(r, label=f"Agent {i}")
plt.title("Agent Rewards Over Time")
plt.xlabel("Steps")
plt.ylabel("Reward")
plt.legend()
plt.grid(True)
plt.show()