In [41]:
import sys
IN_COLAB = 'google.colab' in sys.modules

! git clone https://github.com/swarnabha13/ai-economist.git
%cd ai-economist
! pip install -e .

Cloning into 'ai-economist'...
remote: Enumerating objects: 335, done.[K
remote: Counting objects: 100% (74/74), done.[K
remote: Compressing objects: 100% (66/66), done.[K
remote: Total 335 (delta 30), reused 25 (delta 8), pack-reused 261[K
Receiving objects: 100% (335/335), 1.16 MiB | 12.89 MiB/s, done.
Resolving deltas: 100% (165/165), done.
/content/ai-economist/ai-economist/ai-economist
Obtaining file:///content/ai-economist/ai-economist/ai-economist
Installing collected packages: ai-economist
  Found existing installation: ai-economist 1.1.1
    Can't uninstall 'ai-economist'. No files were found to uninstall.
  Running setup.py develop for ai-economist
Successfully installed ai-economist


In [42]:
from ai_economist import foundation
from ai_economist.foundation.base.base_env import BaseEnvironment, scenario_registry
test_env_cls = scenario_registry.get("layout_from_file/simple_wood_and_stone")

In [43]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from IPython import display
if IN_COLAB:
    from tutorials.utils import plotting  # plotting utilities for visualizing env state
else:
    from utils import plotting

In [44]:
from ai_economist.foundation.entities.resources import Resource, resource_registry

@resource_registry.add
class Widget(Resource):
    name = "Widget"
    color = [1, 1, 1]
    collectible = False # <--- Goes in agent inventory, but not in the world

In [45]:
import matplotlib.pyplot as plt
from ai_economist.foundation.base.base_component import BaseComponent, component_registry
import random

@component_registry.add
class BuyWidgetFromVirtualStore(BaseComponent):
    name = "BuyWidgetFromVirtualStore"
    required_entities = ["Coin", "Widget"]  # <--- We can now look up "Widget" in the resource registry
    agent_subclasses = ["BasicMobileAgent"]

    def __init__(
        self,
        *base_component_args,
        widget_refresh_rate=0.1,
        **base_component_kwargs
    ):
        super().__init__(*base_component_args, **base_component_kwargs)
        self.widget_refresh_rate = widget_refresh_rate
        self.available_widget_units = 0
        self.widget_price = 5

    def get_additional_state_fields(self, agent_cls_name):
        return {}

    def additional_reset_steps(self):
        self.available_wood_units = 0

    def get_n_actions(self, agent_cls_name):
        if agent_cls_name == "BasicMobileAgent":
            return 1
        return None

    def generate_masks(self, completions=0):
        masks = {}
        for agent in self.world.agents:
            masks[agent.idx] = np.array([
                agent.state["inventory"]["Coin"] >= self.widget_price and self.available_widget_units > 0
            ])

        return masks

    def component_step(self):
        if random.random() < self.widget_refresh_rate: 
            self.available_widget_units += 1

        for agent in self.world.get_random_order_agents():

            action = agent.get_component_action(self.name)

            if action == 0: # NO-OP. Agent is not interacting with this component.
                continue

            if action == 1: # Agent wants to buy. Execute a purchase if possible.
                if self.available_widget_units > 0 and agent.state["inventory"]["Coin"] >= self.widget_price: 
                    agent.state["inventory"]["Coin"] -= self.widget_price
                    agent.state["inventory"]["Widget"] += 1
                    self.available_widget_units -= 1

            else: # We only declared 1 action for this agent type, so action > 1 is an error.
                raise ValueError

    def generate_observations(self):
        obs_dict = dict()
        for agent in self.world.agents:
            obs_dict[agent.idx] = {
                "widget_refresh_rate": self.widget_refresh_rate,
                "available_widget_units": self.available_widget_units,
                "widget_price": self.widget_price
            }

        return obs_dict

In [46]:
# Define the configuration of the environment that will be built

env_config = {
    # ===== STANDARD ARGUMENTS ======
    'n_agents': 4,          # Number of non-planner agents
    'world_size': [25, 25], # [Height, Width] of the env world
    'episode_length': 1000, # Number of timesteps per episode
    
    # In multi-action-mode, the policy selects an action for each action subspace (defined in component code)
    # Otherwise, the policy selects only 1 action
    'multi_action_mode_agents': False,
    'multi_action_mode_planner': True,
    
    # When flattening observations, concatenate scalar & vector observations before output
    # Otherwise, return observations with minimal processing
    'flatten_observations': False,
    # When Flattening masks, concatenate each action subspace mask into a single array
    # Note: flatten_masks = True is recommended for masking action logits
    'flatten_masks': True,
    
    
    # ===== COMPONENTS =====
    # Which components to use (specified as list of {"component_name": {component_kwargs}} dictionaries)
    #   "component_name" refers to the component class's name in the Component Registry
    #   {component_kwargs} is a dictionary of kwargs passed to the component class
    # The order in which components reset, step, and generate obs follows their listed order below
    'components': [
        # (1) Building houses
        ('Build', dict(skill_dist="beta", payment_max_skill_multiplier=3)),
        # (2) Trading collectible resources
        ('ContinuousDoubleAuction', dict(max_num_orders=5)),
        # (3) Movement and resource collection
        ('Gather', dict()),
    ],
    
    # ===== SCENARIO =====
    # Which scenario class to use (specified by the class's name in the Scenario Registry)
    'scenario_name': 'uniform/simple_wood_and_stone',
    #'env_layout_file': 'quadrant_25x25_20each_30clump.txt',
    
    # (optional) kwargs of the chosen scenario class
    'starting_agent_coin': 10,
    'starting_stone_coverage': 0.10,
    'starting_wood_coverage':  0.10,
}

In [47]:
from copy import deepcopy
new_env_config = deepcopy(env_config)

# Compared to env_config, new_env_config simply adds our new Component
new_env_config['components'] = [
    # (1) Building houses
    ('Build', dict(skill_dist="beta", payment_max_skill_multiplier=3)),
    # (2) Trading collectible resources
    ('ContinuousDoubleAuction', dict(max_num_orders=5)),
    # (3) Movement and resource collection
    ('Gather', dict(skill_dist="pareto")),
    # (4) Let each mobile agent buy widgets from a virtual store.
    {'BuyWidgetFromVirtualStore': {'widget_refresh_rate': 0.1}},  # <--- This.
]

In [None]:
new_env = foundation.make_env_instance(**env_config)
obs = new_env.reset()

# Policy function

In [None]:
from collections import defaultdict

In [None]:
def make_epsilon_greedy_policy(Q, epsilon, env):
    def policy_fn(obs):
        agent_wise_action_prob = {}
        for a_idx, a_obs in obs.items():
            agent = env.get_agent(a_idx)
            nA = agent.action_spaces
            action_prob = np.ones(nA, dtype=float) * epsilon / nA
            mask = a_obs['action_mask']
            best_action = np.random.choice(Q[a_idx], p=mask/mask.sum())
            best_action_index = Q[a_idx][a_obs].index(best_action)
            best_action_index = np.argmax(Q[a_idx][a_obs])
            action_prob[best_action_index] = action_prob[best_action_index] + (1.0 - epsilon)
            agent_wise_action_prob[a_idx] = action_prob
        return agent_wise_action_prob

    return policy_fn

In [None]:
def select_agent_wise_next_best_action(env, agent_wise_action_probs):
    actions = {}
    for a_idx in agent_wise_action_probs:
        actions = {a_idx: np.random.choice(np.arange(len(next_action_values)), p=next_action_values)}
    return actions

In [None]:
def do_plot(env, ax, fig):
    """Plots world state during episode sampling."""
    plotting.plot_env_state(env, ax)
    ax.set_aspect('equal')
    display.display(fig)
    display.clear_output(wait=True)     

In [None]:
def play_sarsa(env, plot_every=100, do_dense_logging=False, epsilon=0.1):
    obs = env.reset(force_dense_logging=do_dense_logging)
    Q = {
        a_idx: defaultdict(lambda: np.zeros(env.get_agent(a_idx).action_spaces))
        for a_idx, a_obs in obs.items()
    }
    sarsa_policy = make_epsilon_greedy_policy(Q, epsilon, env)
    agent_wise_action_probs = sarsa_policy(obs)   #the policy function : Here obs contains state for every agent
    agent_wise_best_actions = select_agent_wise_next_best_action(env, agent_wise_action_probs) #choose best action for very agent

    for t in range(env.episode_length):
        next_obs, reward, done, _ = env.step(agent_wise_best_actions)
        agent_wise_next_action_probs = sarsa_policy(env, next_obs)
        agent_wise_next_actions = select_agent_wise_next_best_action(env, agent_wise_next_action_probs) 

        td_target = reward + discount_factor * Q[next_state][next_action]
        td_delta = td_target - Q[state][action]
        Q[state][action] += alpha * td_delta

        if done:
            break

        agent_wise_best_actions = agent_wise_next_actions
        obs = next_obs

        if ((t+1) % plot_every) == 0:
            do_plot(env, ax, fig)

    if ((t+1) % plot_every) != 0:
        do_plot(env, ax, fig)  

In [None]:
play_sarsa(new_env, plot_every=100)