In [193]:
import mesa
import seaborn as sns
import numpy as np
import random
import pandas as pd

In [194]:
class PlantAgent(mesa.Agent):

    def __init__(self, unique_id, model):
        super().__init__(unique_id, model)

        self.alive = True

        self.age = 0
        self.harvested = False

    def step(self):
        if self.alive:
            if self.age != 10:
                self.age += 1

        if self.age == 10:
            self.harvested = True

        # print(f"Agent ID: {str(self.unique_id)} Agent Status: {self.alive} Agent Life: {str(self.age)} Harvest Status: {self.harvested}")

        if not self.alive or self.harvested:
            return


        death_outcome = random.choices([False, True], weights=[0.1, 0.9], k=1)[0]

        self.alive = death_outcome


In [None]:
# Development of an agent-based simulation model in combination with reinforcement learning in Python using Mesa library



# - At the beginning of an episode, 10 plants (as agents) are planted

# - Plants must grow for 10 days (steps) before they can be harvested.

# - Each plant has a 10% chance of dying every day.

# - A new (fresh) plant can be bought every day (cost $10) to be planted

# - The aim is to harvest 10 plants that each grew for 10 days. When the goal is reached, there is a reward of $20 per plant harvested and the episode ends

# - Each day of the episode costs $5

# - Reinforcement learning is now used to find a strategy when to plant new trees that minimizes total costs.

In [195]:
class PlantModel(mesa.Model):

    def __init__(self, N):
        super().__init__()

        self.num_agents = N

        self.number_of_days = 0
        self.number_of_plants_harvested = 0

        self.schedule = mesa.time.RandomActivation(self)

        for i in range(self.num_agents):
            a = PlantAgent(i, self)
            self.schedule.add(a)

    def get_state(self):
        agents = [a for a in self.schedule.agents]
        state = [(0, 0) for i in range(len(agents))]

        for a in agents:
            print(
                f"Agent ID: {str(a.unique_id)}     Agent Status: {a.alive}     Agent Life: {str(a.age)}     Harvest Status: {a.harvested}"
            )

            state[a.unique_id] = (a.alive, a.age)

        return state

    def check_terminated(self, observation):
        return list(map(lambda x: x[1] == 10, observation)).count(True)

    def step(self, action):

        self.number_of_days += 1
        terminated = 0
        truncated = False
        reward = -5

        if action == 1:
            self.num_agents += 1
            a = PlantAgent(self.num_agents - 1, self)
            self.schedule.add(a)

            reward -= 10

        self.schedule.step()

        observation = self.get_state()


        number_of_plants_harvested = list(map(lambda x: x[0] and x[1] == 10, observation)).count(True)
        difference = number_of_plants_harvested - self.number_of_plants_harvested
        reward += difference * 20

        self.number_of_plants_harvested = number_of_plants_harvested

        if self.number_of_days == 500:
            truncated = True

        if self.number_of_plants_harvested == 10:
            terminated = 1

        return observation, reward, terminated, truncated

Thera are a few things to consider

Terminal State: This would be achieved when we have 10 plants  
Truncation: It should eventually converge in 500 iterations. No more than 500 steps.  
Reward: Positive for harvesting and negative for buying a plant plus on each step you get a negative reward  
Observation: This would be the current state. The state would be a list of tuples (status_of_death, age)  

In [None]:
# Essentially what you want the state space to be is that you want to know the status of each plant (alive, life, harvested) and the total cost so far

# And, so we need to construct our state space.

# There must be a function that constructs this state space for the model. The state space can also be infinite as we need to see what kind of things we have to make it in there.b


Question is how many plants are alive at the moment and what is their lifespan at each category. We need some good strategies to reduce the state space of each plant too.

Once they are dead they are also removed from the equation at each step.

But, the state space would exponentially increase if we allow it to be.

We want the status, age and this would contain all the information that we need. This contains everything we need.
And, hence our data structure should accomodate those changes. The changes should be that it is a list of tuples. The tuple would be of (status, age)

In [196]:
model = PlantModel(10)

In [229]:
for i in range(500):
    observation, reward, terminated, truncated = model.step(0)
    print()

Agent ID: 18     Agent Status: True     Agent Life: 10     Harvest Status: True
Agent ID: 4     Agent Status: False     Agent Life: 7     Harvest Status: False
Agent ID: 13     Agent Status: True     Agent Life: 10     Harvest Status: True
Agent ID: 15     Agent Status: True     Agent Life: 10     Harvest Status: True
Agent ID: 1     Agent Status: False     Agent Life: 6     Harvest Status: False
Agent ID: 12     Agent Status: False     Agent Life: 2     Harvest Status: False
Agent ID: 16     Agent Status: False     Agent Life: 1     Harvest Status: False
Agent ID: 3     Agent Status: False     Agent Life: 1     Harvest Status: False
Agent ID: 8     Agent Status: False     Agent Life: 1     Harvest Status: False
Agent ID: 17     Agent Status: True     Agent Life: 10     Harvest Status: True
Agent ID: 9     Agent Status: True     Agent Life: 10     Harvest Status: True
Agent ID: 10     Agent Status: False     Agent Life: 5     Harvest Status: False
Agent ID: 20     Agent Status: True   

In [230]:
observation

[(True, 10),
 (False, 6),
 (True, 10),
 (False, 1),
 (False, 7),
 (True, 10),
 (True, 10),
 (False, 4),
 (False, 1),
 (True, 10),
 (False, 5),
 (False, 3),
 (False, 2),
 (True, 10),
 (True, 10),
 (True, 10),
 (False, 1),
 (True, 10),
 (True, 10),
 (False, 9),
 (True, 8)]

In [231]:
reward, terminated, truncated

(15, 1, False)

In [98]:
agents = [a for a in model.schedule.agents]

In [99]:
observation = [(0, 0) for i in range(len(agents))]

for a in agents:
    print(f"Agent ID: {str(a.unique_id)}     Agent Status: {a.alive}     Agent Life: {str(a.age)}     Harvest Status: {a.harvested}")

    observation[a.unique_id] = (a.alive, a.age)

Agent ID: 1     Agent Status: True     Agent Life: 10     Harvest Status: True
Agent ID: 8     Agent Status: False     Agent Life: 3     Harvest Status: False
Agent ID: 9     Agent Status: False     Agent Life: 5     Harvest Status: False
Agent ID: 6     Agent Status: False     Agent Life: 4     Harvest Status: False
Agent ID: 4     Agent Status: False     Agent Life: 9     Harvest Status: False
Agent ID: 0     Agent Status: True     Agent Life: 10     Harvest Status: True
Agent ID: 7     Agent Status: True     Agent Life: 10     Harvest Status: True
Agent ID: 2     Agent Status: False     Agent Life: 2     Harvest Status: False
Agent ID: 5     Agent Status: False     Agent Life: 4     Harvest Status: False
Agent ID: 3     Agent Status: False     Agent Life: 8     Harvest Status: False


In [101]:
observation

[(True, 10),
 (True, 10),
 (False, 2),
 (False, 8),
 (False, 9),
 (False, 4),
 (False, 4),
 (True, 10),
 (False, 3),
 (False, 5)]

In [104]:
list(map(lambda x: x[1] == 10, observation)).count(True)

3