diff --git a/Macodiac.ML/multiagent_main.py b/Macodiac.ML/multiagent_main.py index a3329d4..bf4bb18 100644 --- a/Macodiac.ML/multiagent_main.py +++ b/Macodiac.ML/multiagent_main.py @@ -25,7 +25,7 @@ def __init__(self): self.save_path = os.path.join(filePath,'saved_models', 'model') self.save_path_intermittent = os.path.join(filePath,'saved_models', 'intermittent_saved_models') self.numTrainingIterations = 1_000 - self.numEpisodes = 15 + self.numEpisodes = 150 self.envTimesteps = 15 self.numAgents = 10 @@ -87,7 +87,7 @@ def run_multiagent_project_with_rand_test(self, env:MultiAgentMacodiacEnvironmen iterator+=1 print(f'iterator:{iterator}') action_arr = env.action_space.sample() - + print(f'action for agents:\t{action_arr}') obs_arr, reward, isDone, info_arr = env.step(action_arr) diff --git a/Macodiac.ML/multiagentenvironment.py b/Macodiac.ML/multiagentenvironment.py index c8e80ad..bcd543b 100644 --- a/Macodiac.ML/multiagentenvironment.py +++ b/Macodiac.ML/multiagentenvironment.py @@ -54,7 +54,7 @@ def __init__(self, envTimesteps:int, numAgents: int): self.action_space = spaces.MultiDiscrete(arr) - # the observation space is a nAgents by nActions array of float32 numbers between 0-100 + # the observation space is a nAgents by nActions array of float32 numbers between -99-99 # also contains the static value for marginal cost and wholesale price self.observation_space = spaces.Box(low=-100,high=100, shape=(numAgents, 4), dtype=np.float32) @@ -80,7 +80,7 @@ def set_agent_action(self, action, agent, actionSpace): agentBaseVendingPriceAdjust = self.env_wholesale_price * (agent.state / 100) baseAgentVendingPrice = self.env_wholesale_price + agentBaseVendingPriceAdjust #agentMarginalCostAddedVendingPrice = agentBaseVendingPriceAdjust + self.env_agent_marginal_cost - agent.vendingPrice = baseAgentVendingPrice + agent.vendingPrice = max(baseAgentVendingPrice, 1) # print(f'agent vending price was {agent.vendingPrice}') def step_agent(self, agent): @@ -268,4 +268,4 @@ def get_consumer_quantity_demanded_at_price(self, price): def get_agent_default_observation_array(self): - return [0.0, 0.0, 60.0, 5.0] \ No newline at end of file + return [0.0, 0.0, self.env_wholesale_price, self.env_agent_marginal_cost] \ No newline at end of file