In [1]:
!pip install mesa
!pip install gymnasium

Collecting mesa
  Downloading mesa-3.4.0-py3-none-any.whl.metadata (11 kB)
Downloading mesa-3.4.0-py3-none-any.whl (241 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m241.2/241.2 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mesa
Successfully installed mesa-3.4.0


In [2]:
!pip install spektral

Collecting spektral
  Downloading spektral-1.3.1-py3-none-any.whl.metadata (5.9 kB)
Downloading spektral-1.3.1-py3-none-any.whl (140 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m140.1/140.1 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: spektral
Successfully installed spektral-1.3.1


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import mesa
import gymnasium as gym
import pandas as pd
import numpy as np
import random

In [5]:
def min_max_scaler(x,a,b):
    return (x-a)/(b-a)
def log_scaler(x):
    return np.log(x+1)

def sigmoid(max_effect, saturation_rate, t):
        return max_effect / (1 + np.exp(-saturation_rate * (t - 10)))

def sigmoid_transfer(x, steepness=10):
    return 1 / (1 + np.exp(-steepness * np.array(x, dtype=float)))

def get_effect_growth(value, ranges):
    ranges = sorted(ranges, key=lambda x: x[0])

    for i in range(len(ranges)):
        low, high, effect, growth = ranges[i]

        if low <= value < high:
            proportion = (value - low) / (high - low)
            if i + 1 < len(ranges):
                next_effect = ranges[i+1][2]
                next_growth = ranges[i+1][3]
                smooth_effect = effect + (proportion * (next_effect - effect))
                smooth_growth = growth + (proportion * (next_growth - growth))
                return smooth_effect, smooth_growth

            return effect, growth

    return 0, 0

In [6]:
import gymnasium as gym
import numpy as np
class GeographicRegion(gym.Env):
    def __init__(self, seed=None):
        self.seed = seed
        np.random.seed(seed)

        # Observation space
        self.state_space = gym.spaces.Dict({
            "Temperature": gym.spaces.Box(low=-30, high=60, dtype=np.float32),
            "Precipitation": gym.spaces.Box(low=0.25, high=50, dtype=np.float32),
            "Humidity": gym.spaces.Box(low=0, high=100,  dtype=np.float32),
            "Air Pollution Index": gym.spaces.Box(low=0, high=500,dtype=np.int16),
            "Water Quality Index": gym.spaces.Box(low=0, high=100, dtype=np.int16),
            "Carbon Dioxide Emissions Per Capita": gym.spaces.Box(low=1, high=15, dtype=np.float32),
            "Electricity Consumption Per Capita": gym.spaces.Box(low=250, high=12000, dtype=np.float32),
            "Renewable Share": gym.spaces.Box(low=0, high=1, dtype=np.float32),
            "Water Consumption Per Capita": gym.spaces.Box(low=20, high=350, dtype=np.float32),
            "Population": gym.spaces.Box(low=1, high=37e6, dtype=np.float32),
            "GDP": gym.spaces.Box(low=1e9, high=1e12,dtype=np.float32),
            "Employment Rate": gym.spaces.Box(low=0, high=1, dtype=np.float32),
            "Waste Management Efficiency": gym.spaces.Box(low=0, high=1, dtype=np.float32),
            "Energy Efficiency": gym.spaces.Box(low=0, high=1,dtype=np.float32),
        })

        # Action space
        self.action_space = gym.spaces.Dict({
            "Carbon Tax": gym.spaces.Box(low=0, high=200, dtype=np.float32),
            "Renewable energy subsidies": gym.spaces.Box(low=0, high=1, dtype=np.float32),
            "Water Consumption Tax": gym.spaces.Box(low=0.05, high=6, dtype=np.float32),
            "Energy Efficiency Incentives": gym.spaces.Box(low=0, high=1,dtype=np.float32),
            "Fossil Fuel Phase-Out Regulations": gym.spaces.Discrete(3),
            "Electric Vehicle (EV) Subsidies": gym.spaces.Box(low=0, high=1,dtype=np.float32),
            "Fuel Economy Standards": gym.spaces.Discrete(4),
            "Urban Green Space Expansion": gym.spaces.Box(low=1, high=200,dtype=np.float32),
            "Climate-Resilient Infrastructure Investment": gym.spaces.Box(low=1e6, high=500e6, dtype=np.float32),
            "Sustainable Land-Use Zoning": gym.spaces.Box(low=0.1e6, high=100e6,dtype=np.float32),
            "Waste Management Reforms": gym.spaces.Box(low=1e6, high=100e6,dtype=np.float32),
            "Public transport expansion": gym.spaces.Box(low=1e6, high=1e9,dtype=np.float32),
            "Vehicle emission standards": gym.spaces.Discrete(4),
            "Flood defense infrastructure": gym.spaces.Box(low=0.25e6, high=300e6,dtype=np.float32),
            "Heatwave resilience": gym.spaces.Box(low=0.2e6, high=200e6, dtype=np.float32),
            "Water conservation measures": gym.spaces.Box(low=0.1e6, high=100e6, dtype=np.float32),
            "Recycling Rate": gym.spaces.Box(low=0, high=1,  dtype=np.float32),
            "Single-use plastics bans": gym.spaces.Discrete(3),
            "Green Business Investments": gym.spaces.Box(low=0.1e6, high=100e6,dtype=np.float32)
        })

        self.reset()

    def reset(self):
        self.state = {
            "Temperature": np.random.uniform(15, 30),
            "Precipitation": np.random.uniform(0.5, 5),
            "Humidity": np.random.uniform(40, 70),
            "Air Pollution Index": np.random.uniform(100,200),
            "Water Quality Index": np.random.uniform(30,60),
            "Carbon Dioxide Emissions Per Capita": np.random.uniform(5, 8),
            "Electricity Consumption Per Capita": np.random.uniform(500, 2000),
            "Renewable Share": np.random.uniform(0.2, 0.4),
            "Water Consumption Per Capita": np.random.uniform(50, 150),
            "Population": np.random.randint(1e6, 10e6),
            "GDP": np.random.uniform(1e10, 5e11),
            "Employment Rate": np.random.uniform(0.8, 0.95),
            "Waste Management Efficiency": np.random.uniform(0.6, 0.8),
            "Energy Efficiency": np.random.uniform(0.5, 0.7),
            "Urban Green Space Expansion": np.random.uniform(20, 50),
            "Flood defense infrastructure": np.random.uniform(1e6, 50e6),
            "Heatwave resilience": np.random.uniform(1e5, 20e6),
            "Sustainable Land-Use Zoning": np.random.uniform(1e5, 20e6),
            "Single-use plastics bans": 0,
            "Green Business Investments": np.random.uniform(1e5, 20e6)
        }
        self.t = 0
        return self.state,{}

    def scale_state(self):
        state=self.state.copy()
        for key, value in self.state.items():
            if key in ["Temperature", "Precipitation", "Humidity", "Air Pollution Index",
                      "Water Quality Index", "Carbon Dioxide Emissions Per Capita",
                      "Electricity Consumption Per Capita","Water Consumption Per Capita"]:
                min_value = self.state_space[key].low
                max_value = self.state_space[key].high
                state[key] = min_max_scaler(value, min_value, max_value)
            elif key in ["Population", "GDP"]:
                state[key] = log_scaler(value)
            else:
                state[key] = value
        return state

    def scale_action(self, action):
      for key, value in action.items():
          if key in ["Carbon Tax", "Water Consumption Tax", "Urban Green Space Expansion",
                      "Climate-Resilient Infrastructure Investment",  "Sustainable Land-Use Zoning",
                      "Waste Management Reforms", "Public transport expansion", "Flood defense infrastructure",
                      "Heatwave resilience", "Water conservation measures", "Green Business Investments"]:
              min_value = self.action_space[key].low
              max_value = self.action_space[key].high
              if isinstance(value, np.ndarray):
                  value = value.item()
              action[key] = min_max_scaler(value, min_value, max_value)
              action[key] = np.clip(value, min_value, max_value)
      return action

    def environmental_impact(self, action, t):

        carbon = -((action["Carbon Tax"] - 150) / 100)**2
        vehicle= -((action["Vehicle emission standards"]-3)/100)**2
        fossil= -((action["Fossil Fuel Phase-Out Regulations"]-2)/100)**2
        plastic= -((action["Single-use plastics bans"]-2)/100)**2

        total_environmental_impact = (
            0.3 * carbon +
            0.2 * vehicle +
            0.2 * fossil +
            0.1 * plastic
        )
        return total_environmental_impact

    def energy_efficiency(self, action, t):
        gdp = self.state["GDP"]
        consumption = self.state["Electricity Consumption Per Capita"]
        intensity_score = (np.log1p(gdp / (consumption + 1e-6)) * 0.5)

        env = self.environmental_impact(action, t)
        initial_base = 4000
        current_base = initial_base
        efficiency_rate = 0.05

        efficiency_rate = 0.3 * (sigmoid_transfer(env) - 0.5) + 0.05
        current_base = initial_base * (1.2 - sigmoid_transfer(env) * 0.4)
        electricity_consumption = current_base * (1 - efficiency_rate * t)
        consumption_efficiency_score = (initial_base - electricity_consumption) / initial_base

        subsidy_input = action["Renewable energy subsidies"]
        renewable_impact = np.log1p(subsidy_input) / 5.0
        renewable = renewable_impact * sigmoid(0.7, 0.05, t)

        total_energy_impact = (
            0.4 * intensity_score +
            0.3 * float(consumption_efficiency_score) +
            0.3 * renewable
        )

        return total_energy_impact

    def resource_management(self, action, t):
        water_val = action["Water conservation measures"]
        recycling_val = action["Recycling Rate"]
        waste_val = action["Waste Management Reforms"]

        water = (2 / (1 + np.exp(-5 * (water_val - 0.4))) - 1) * (1 + 0.1 * t)
        recycling = (2 / (1 + np.exp(-5 * (recycling_val - 0.4))) - 1) * (1 + 0.1 * t)
        waste = (2 / (1 + np.exp(-5 * (waste_val / 1e8 - 0.5))) - 1) * (1 + 0.1 * t)

        return (water**0.4 * recycling**0.35 * waste**0.25) if (water > 0 and recycling > 0 and waste > 0) else (0.4 * water + 0.35 * recycling + 0.25 * waste)

    def economic_growth(self, action, t):
        infra = action["Climate-Resilient Infrastructure Investment"]
        transport = action["Public transport expansion"]
        green = action["Green Business Investments"]

        infra_score = (np.log1p(infra / 1e8) / 2.0 - 0.5) * (1 + 0.05 * t)
        transport_score = (np.log1p(transport / 2e8) / 2.0 - 0.4) * (1 + 0.05 * t)
        green_score = (np.log1p(green / 5e7) / 2.0 - 0.3) * (1 + 0.05 * t)

        normalized_gdp = np.log1p(self.state["GDP"]) / 25.0
        employment = self.state["Employment Rate"]

        economic_utility = (normalized_gdp**0.6 * employment**0.4)
        policy_impact = (0.4 * infra_score + 0.3 * transport_score + 0.3 * green_score)

        return economic_utility + policy_impact

    def climate_resilience(self, action, t):
        flood_val = np.clip(action["Flood defense infrastructure"] / 3e8, 1e-6, 1.0)
        heat_val = np.clip(action["Heatwave resilience"] / 2e8, 1e-6, 1.0)
        urban_val = np.clip(action["Urban Green Space Expansion"] / 200, 1e-6, 1.0)

        resilience_base = (flood_val * heat_val * urban_val) ** (1/3)
        boosted_base = (resilience_base ** 2) * 5.0
        time_growth = 1 + (0.15 * t / 50)
        return (boosted_base * time_growth) - 0.5

    def _get_info(self, action=None):
        return {
            "environmental_impact": self.environmental_impact(action, self.t),
            "energy_efficiency": self.energy_efficiency(action, self.t),
            "resource_management": self.resource_management(action, self.t),
            "economic_growth": self.economic_growth(action, self.t),
            "climate_resilience": self.climate_resilience(action, self.t),
            # raw state
            "air_pollution_index": self.state["Air Pollution Index"],
            "carbon_emissions_per_capita": self.state["Carbon Dioxide Emissions Per Capita"],
            "gdp": self.state["GDP"],
            "employment_rate": self.state["Employment Rate"],
            "electricity_consumption": self.state["Electricity Consumption Per Capita"],
            "water_quality": self.state["Water Quality Index"],
            "waste_efficiency": self.state["Waste Management Efficiency"],
        }

    def reward(self, action):
        s = self.state
        env_score = self.environmental_impact(action, self.t)
        econ_score = self.economic_growth(action, self.t)
        energy_score = self.energy_efficiency(action, self.t)
        res_score = self.resource_management(action, self.t)
        clim_score = self.climate_resilience(action, self.t)

        total_utility = (
        (env_score * 5.0) +
        (energy_score * 2.0) +
        (res_score * 2.0) +
        (clim_score * 2.0)
    )
        current_potential = (0.4 * s["Employment Rate"] + 0.6 * (1 - s["Carbon Dioxide Emissions Per Capita"]/12))
        policy_momentum = (current_potential - getattr(self, "prev_potential", current_potential)) * 20 # Multiplier
        self.prev_potential = current_potential

        boundary_penalty = 0
        if s["Temperature"] > 35:
            boundary_penalty -= np.exp((s["Temperature"] - 35) / 5)
        if s["Carbon Dioxide Emissions Per Capita"] > 9:
            boundary_penalty -= np.exp(s["Carbon Dioxide Emissions Per Capita"] - 9)

        return total_utility + policy_momentum + boundary_penalty

    def step(self, action):
      self.t += 1
      self.last_action = action

      n_act = {}
      for k, v in action.items():
          val = v.item() if isinstance(v, (np.ndarray, np.int64)) else v
          if k in self.action_space.spaces and isinstance(self.action_space[k], gym.spaces.Box):
              low, high = self.action_space[k].low[0], self.action_space[k].high[0]
              n_act[k] = np.clip((val - low) / (high - low + 1e-8), 0, 1)
          else:
              n_act[k] = val

      state = self.state

      old_state_val = (state["Employment Rate"] + (1 - state["Carbon Dioxide Emissions Per Capita"]/12)) / 2
      state["Carbon Dioxide Emissions Per Capita"] *= (1.0 - (0.02 * n_act["Carbon Tax"] + 0.05 * n_act["Renewable energy subsidies"]))
      state["Electricity Consumption Per Capita"] *= (1.0 - (0.01 * n_act["Energy Efficiency Incentives"] + 0.008 * n_act["Renewable energy subsidies"]))

      state["Air Pollution Index"] *= (1.0 - (0.04 * n_act["Vehicle emission standards"] + 0.03 * n_act["Fossil Fuel Phase-Out Regulations"]))
      state["Water Quality Index"] *= (1.0 + 0.003 * n_act["Water conservation measures"] + 0.002 * n_act["Water Consumption Tax"])

      pollution_drag = np.log1p(state["Air Pollution Index"] / 100) * 0.005
      growth_multiplier = (1.0 + 0.000000005 * n_act["Climate-Resilient Infrastructure Investment"]
                              + 0.00000001 * n_act["Public transport expansion"]
                              + 0.000000015 * n_act["Green Business Investments"]) - pollution_drag
      state["GDP"] *= growth_multiplier

      norm_gdp = state["GDP"] / 1e11
      state["Employment Rate"] = np.clip((norm_gdp**0.1) * (state["Waste Management Efficiency"]**0.05), 0.0, 1.0)

      state["Waste Management Efficiency"] *= (1.0 + 0.01 * n_act["Recycling Rate"] + 0.005 * n_act["Waste Management Reforms"])
      state["Urban Green Space Expansion"] *= (1.0 + 0.01 * n_act["Urban Green Space Expansion"])
      state["Flood defense infrastructure"] *= (1.0 + 0.01 * n_act["Flood defense infrastructure"])
      state["Heatwave resilience"] *= (1.0 + 0.01 * n_act["Heatwave resilience"])
      state["Sustainable Land-Use Zoning"] *= (1.0 + 0.01 * n_act["Sustainable Land-Use Zoning"])
      state["Single-use plastics bans"] *= (1.0 + 0.05 * n_act["Single-use plastics bans"])
      state["Green Business Investments"] *= (1.0 + 0.01 * n_act["Green Business Investments"])

      cooling = (0.1 * n_act["Carbon Tax"] + 0.2 * n_act["Renewable energy subsidies"] +
                0.05 * n_act["Fossil Fuel Phase-Out Regulations"] + 0.1 * n_act["Heatwave resilience"] +
                0.2 * n_act["Urban Green Space Expansion"] + 0.05 * n_act["Sustainable Land-Use Zoning"])
      heating = 0.1 * n_act["Green Business Investments"] + (state["Air Pollution Index"] / 1000)

      state["Temperature"] += (heating - cooling) + 0.5 * np.sin(np.pi * (self.t / 365))

      carbon_barrier = -np.log(max(1e-6, 12 - state["Carbon Dioxide Emissions Per Capita"])) * 0.2
      temp_barrier = -np.log(max(1e-6, 50 - state["Temperature"])) * 0.2

      for key, value in state.items():
          if key in self.state_space:
              state[key] = np.clip(value, self.state_space[key].low[0], self.state_space[key].high[0])

      scaled_state = self.scale_state().copy()

      new_state_val = (state["Employment Rate"] + (1 - state["Carbon Dioxide Emissions Per Capita"]/12)) / 2
      policy_change_reward = (new_state_val - old_state_val) * 10

      reward = self.reward(action) + policy_change_reward + carbon_barrier + temp_barrier

      done = (state["Temperature"] > 50 or state["GDP"] < 1e9 or state["Air Pollution Index"] > 300 or self.t >= 50)
      truncated = self.t >= 50

      return state, scaled_state, reward, truncated, done, self._get_info(action)


#**Synthetic Dataset**

In [7]:
import pandas as pd
import numpy as np
import random

np.random.seed(42)
N = 400
unique_id = [i for i in range(N)]
business_name = [f"Company_{i+1}" for i in range(N)]

sectors = [
    "Technology",
    "Telecommunications",
    "Industrial Manufacturing",
    "Automotive & Mobility",
    "Aerospace",
    "Defense & Security",
    "Construction",
    "Real Estate & Infrastructure",
    "Energy & Utilities",
    "Mining & Natural Resources",
    "Chemicals & Advanced Materials",
    "Healthcare & Life Sciences",
    "Agriculture & Agri-business",
    "Food & Beverage",
    "Finance & Banking",
    "Transportation & Logistics"
]

business_type = np.tile(sectors, N // len(sectors))
np.random.shuffle(business_type)
revenue = []
valuation = []
growth_rate = []
sustainability_index = []
energy_consumption = []

for sector in business_type:
    if sector == "Technology":
        revenue.append(np.random.lognormal(mean=20, sigma=1.0))
        growth_rate.append(np.random.randint(10, 50))
        sustainability_index.append(np.random.randint(40, 80))
        energy_consumption.append(np.random.randint(50_000, 500_000))
        valuation.append(revenue[-1] * np.random.uniform(5, 20))
    elif sector == "Telecommunications":
        revenue.append(np.random.lognormal(mean=18, sigma=0.8))
        growth_rate.append(np.random.randint(5, 30))
        sustainability_index.append(np.random.randint(30, 70))
        energy_consumption.append(np.random.randint(100_000, 800_000))
        valuation.append(revenue[-1] * np.random.uniform(3, 12))
    elif sector == "Industrial Manufacturing":
        revenue.append(np.random.lognormal(mean=19, sigma=0.9))
        growth_rate.append(np.random.randint(2, 20))
        sustainability_index.append(np.random.randint(10, 40))
        energy_consumption.append(np.random.randint(200_000, 1_000_000))
        valuation.append(revenue[-1] * np.random.uniform(2, 8))
    elif sector == "Automotive & Mobility":
        revenue.append(np.random.lognormal(mean=21, sigma=1.2))
        growth_rate.append(np.random.randint(5, 25))
        sustainability_index.append(np.random.randint(20, 60))
        energy_consumption.append(np.random.randint(100_000, 900_000))
        valuation.append(revenue[-1] * np.random.uniform(5, 15))
    elif sector == "Aerospace":
        revenue.append(np.random.lognormal(mean=22, sigma=1.5))
        growth_rate.append(np.random.randint(5, 30))
        sustainability_index.append(np.random.randint(10, 40))
        energy_consumption.append(np.random.randint(200_000, 1_000_000))
        valuation.append(revenue[-1] * np.random.uniform(10, 25))
    elif sector == "Defense & Security":
        revenue.append(np.random.lognormal(mean=23, sigma=1.3))
        growth_rate.append(np.random.randint(5, 15))
        sustainability_index.append(np.random.randint(5, 25))
        energy_consumption.append(np.random.randint(150_000, 800_000))
        valuation.append(revenue[-1] * np.random.uniform(8, 20))
    elif sector == "Construction":
        revenue.append(np.random.lognormal(mean=19, sigma=1.0))
        growth_rate.append(np.random.randint(5, 20))
        sustainability_index.append(np.random.randint(30, 60))
        energy_consumption.append(np.random.randint(200_000, 900_000))
        valuation.append(revenue[-1] * np.random.uniform(4, 12))
    elif sector == "Real Estate & Infrastructure":
        revenue.append(np.random.lognormal(mean=18.5, sigma=0.9))
        growth_rate.append(np.random.randint(5, 30))
        sustainability_index.append(np.random.randint(40, 70))
        energy_consumption.append(np.random.randint(100_000, 500_000))
        valuation.append(revenue[-1] * np.random.uniform(5, 15))
    elif sector == "Energy & Utilities":
        revenue.append(np.random.lognormal(mean=22, sigma=1.1))
        growth_rate.append(np.random.randint(2, 15))
        sustainability_index.append(np.random.randint(10, 50))
        energy_consumption.append(np.random.randint(300_000, 1_500_000))
        valuation.append(revenue[-1] * np.random.uniform(3, 10))
    elif sector == "Mining & Natural Resources":
        revenue.append(np.random.lognormal(mean=20, sigma=1.0))
        growth_rate.append(np.random.randint(0, 20))
        sustainability_index.append(np.random.randint(5, 30))
        energy_consumption.append(np.random.randint(400_000, 2_000_000))
        valuation.append(revenue[-1] * np.random.uniform(5, 18))
    elif sector == "Chemicals & Advanced Materials":
        revenue.append(np.random.lognormal(mean=19, sigma=1.1))
        growth_rate.append(np.random.randint(5, 25))
        sustainability_index.append(np.random.randint(20, 60))
        energy_consumption.append(np.random.randint(200_000, 1_200_000))
        valuation.append(revenue[-1] * np.random.uniform(4, 12))
    elif sector == "Healthcare & Life Sciences":
        revenue.append(np.random.lognormal(mean=18, sigma=0.8))
        growth_rate.append(np.random.randint(10, 40))
        sustainability_index.append(np.random.randint(50, 90))
        energy_consumption.append(np.random.randint(50_000, 500_000))
        valuation.append(revenue[-1] * np.random.uniform(3, 10))
    elif sector == "Agriculture & Agri-business":
        revenue.append(np.random.lognormal(mean=17, sigma=1.0))
        growth_rate.append(np.random.randint(0, 20))
        sustainability_index.append(np.random.randint(30, 70))
        energy_consumption.append(np.random.randint(100_000, 600_000))
        valuation.append(revenue[-1] * np.random.uniform(4, 12))
    elif sector == "Food & Beverage":
        revenue.append(np.random.lognormal(mean=19, sigma=0.9))
        growth_rate.append(np.random.randint(5, 30))
        sustainability_index.append(np.random.randint(40, 80))
        energy_consumption.append(np.random.randint(100_000, 800_000))
        valuation.append(revenue[-1] * np.random.uniform(4, 15))
    elif sector == "Finance & Banking":
        revenue.append(np.random.lognormal(mean=22, sigma=1.2))
        growth_rate.append(np.random.randint(2, 15))
        sustainability_index.append(np.random.randint(30, 60))
        energy_consumption.append(np.random.randint(50_000, 300_000))
        valuation.append(revenue[-1] * np.random.uniform(10, 30))
    elif sector == "Transportation & Logistics":
        revenue.append(np.random.lognormal(mean=20, sigma=1.0))
        growth_rate.append(np.random.randint(5, 30))
        sustainability_index.append(np.random.randint(10, 50))
        energy_consumption.append(np.random.randint(300_000, 1_200_000))
        valuation.append(revenue[-1] * np.random.uniform(5, 15))

business = pd.DataFrame({
    "unique_id": unique_id,
    "business_name": business_name,
    "business_type": business_type,
    "revenue": np.round(revenue, 2),
    "valuation": np.round(valuation, 2),
    "growth_rate": growth_rate,
    "sustainability_index": sustainability_index,
    "energy_consumption": energy_consumption
})

print(business.head())
print(business["business_type"].value_counts())
print(business.shape)

energy_types = ['Solar', 'Nuclear', 'Hydro', 'Fossil Fuels', 'Experimental', 'Geothermal', 'Wind', 'Biomass']
energy_type = np.tile(energy_types, N // len(energy_types))
np.random.shuffle(energy_type)

capacity = []
efficiency = []
production = []
carbon_intensity = []
emissions = []

for etype in energy_type:
    if etype == 'Solar':
        capacity.append(np.random.uniform(10, 100))
        efficiency.append(np.random.uniform(0.15, 0.25))
        carbon_intensity.append(np.random.normal(0.05, 0.02))
        emissions.append(np.random.normal(0.01, 0.005))
        production.append(np.random.lognormal(mean=5, sigma=0.8))
    elif etype == 'Nuclear':
        capacity.append(np.random.uniform(1000, 5000))
        efficiency.append(np.random.uniform(0.85, 0.95))
        carbon_intensity.append(np.random.normal(0.1, 0.05))
        emissions.append(np.random.normal(0.05, 0.02))
        production.append(np.random.lognormal(mean=6, sigma=0.7))
    elif etype == 'Hydro':
        capacity.append(np.random.uniform(500, 3000))
        efficiency.append(np.random.uniform(0.6, 0.85))
        carbon_intensity.append(np.random.normal(0.1, 0.05))
        emissions.append(np.random.normal(0.05, 0.02))
        production.append(np.random.lognormal(mean=5, sigma=0.8))
    elif etype == 'Fossil Fuels':
        capacity.append(np.random.uniform(500, 5000))
        efficiency.append(np.random.uniform(0.3, 0.45))
        carbon_intensity.append(np.random.normal(0.9, 0.1))
        emissions.append(np.random.normal(2.5, 0.5))
        production.append(np.random.lognormal(mean=6, sigma=1.0))
    elif etype == 'Experimental':
        if np.random.rand() < 0.5:
            capacity.append(np.random.uniform(500, 3000))
            efficiency.append(np.random.uniform(0.7, 0.9))
            carbon_intensity.append(np.random.normal(0.2, 0.05))
            emissions.append(np.random.normal(1.0, 0.3))
            production.append(np.random.lognormal(mean=5, sigma=0.8))
        else:
            capacity.append(np.random.uniform(100, 1000))
            efficiency.append(np.random.uniform(0.2, 0.5))
            carbon_intensity.append(np.random.normal(1.0, 0.2))
            emissions.append(np.random.normal(3.0, 0.7))
            production.append(np.random.lognormal(mean=5, sigma=0.8))
    elif etype == 'Geothermal':
        capacity.append(np.random.uniform(300, 2000))
        efficiency.append(np.random.uniform(0.7, 0.85))
        carbon_intensity.append(np.random.normal(0.1, 0.05))
        emissions.append(np.random.normal(0.1, 0.03))
        production.append(np.random.lognormal(mean=5, sigma=0.8))
    elif etype == 'Wind':
        capacity.append(np.random.uniform(10, 1000))
        efficiency.append(np.random.uniform(0.3, 0.5))
        carbon_intensity.append(np.random.normal(0.05, 0.02))  #
        emissions.append(np.random.normal(0.02, 0.01))
        production.append(np.random.lognormal(mean=5, sigma=0.8))
    elif etype == 'Biomass':
        capacity.append(np.random.uniform(100, 1500))
        efficiency.append(np.random.uniform(0.4, 0.7))
        carbon_intensity.append(np.random.normal(0.3, 0.1))
        emissions.append(np.random.normal(1.5, 0.5))
        production.append(np.random.lognormal(mean=5, sigma=0.8))

power_plant = pd.DataFrame({
    "unique_id": unique_id,
    "energy_type": energy_type,
    "capacity": capacity,
    "efficiency": efficiency,
    "production": production,
    "carbon_intensity": carbon_intensity,
    "emissions": emissions
})

print(power_plant.head())
print(power_plant["energy_type"].value_counts())
print(power_plant.shape)


authority_names = [
    "Department of Health", "Department of Energy", "Department of Education",
    "Department of Transport", "Department of Urban Planning", "Department of Environment",
    "Department of Agriculture", "Department of Housing", "Public Safety Department",
    "Department of Finance"
]

sectors = [
    "Health", "Energy", "Education", "Transport", "Urban Planning", "Environment",
    "Agriculture", "Housing", "Public Safety", "Finance"
]

# -----------------------------
# Parameter generator
# -----------------------------
def generate_sector_based_params(sector):
    if sector == "Health":
        budget = np.random.uniform(1e6, 10e6)
        economic_growth_priority = np.random.uniform(0.5, 1.0)
        emissions_priority = np.random.uniform(0, 0.2)
        energy_priority = np.random.uniform(0, 0.1)
        social_welfare_priority = np.random.uniform(0.7, 1.0)
        sustainability_priority = np.random.uniform(0.4, 0.7)
    elif sector == "Energy":
        budget = np.random.uniform(10e6, 50e6)
        economic_growth_priority = np.random.uniform(0.3, 0.7)
        emissions_priority = np.random.uniform(0.5, 1.0)
        energy_priority = np.random.uniform(0.7, 1.0)
        social_welfare_priority = np.random.uniform(0.2, 0.5)
        sustainability_priority = np.random.uniform(0.3, 0.6)
    elif sector == "Education":
        budget = np.random.uniform(5e6, 20e6)
        economic_growth_priority = np.random.uniform(0.6, 1.0)
        emissions_priority = np.random.uniform(0, 0.1)
        energy_priority = np.random.uniform(0, 0.1)
        social_welfare_priority = np.random.uniform(0.6, 1.0)
        sustainability_priority = np.random.uniform(0.5, 0.8)
    elif sector == "Environment":
        budget = np.random.uniform(5e6, 25e6)
        economic_growth_priority = np.random.uniform(0.3, 0.7)
        emissions_priority = np.random.uniform(0.7, 1.0)
        energy_priority = np.random.uniform(0.2, 0.5)
        social_welfare_priority = np.random.uniform(0.5, 0.8)
        sustainability_priority = np.random.uniform(0.7, 1.0)
    else:
        budget = np.random.uniform(1e6, 50e6)
        economic_growth_priority = np.random.uniform(0.3, 0.9)
        emissions_priority = np.random.uniform(0, 1)
        energy_priority = np.random.uniform(0, 1)
        social_welfare_priority = np.random.uniform(0, 1)
        sustainability_priority = np.random.uniform(0, 1)

    return {
        "budget": budget,
        "tax_capacity": np.random.uniform(0.05, 0.25),
        "subsidy_capacity": np.random.uniform(1e5, 1e7),
        "investment_ceiling": np.random.uniform(1e6, 1e7),
        "investment_floor": np.random.uniform(1e5, 5e6),
        "economic_growth_priority": economic_growth_priority,
        "emissions_priority": emissions_priority,
        "sustainability_priority": sustainability_priority,
        "energy_priority": energy_priority,
        "social_welfare_priority": social_welfare_priority,
        "regulation_strictness": np.random.uniform(0, 1),
        "penalty_severity": np.random.uniform(0, 1),
        "incentive_intensity": np.random.uniform(0, 1)
    }

# -----------------------------
# Build dataframe safely
# -----------------------------
rows = []
for uid, (name, sector) in enumerate(zip(authority_names, sectors)):
    params = generate_sector_based_params(sector)
    rows.append({
        "unique_id": uid,
        "authority": name,
        "sector": sector,
        **params
    })

authority_data = pd.DataFrame(rows)

print(authority_data)




   unique_id business_name             business_type       revenue  \
0          0     Company_1        Telecommunications  4.258325e+07   
1          1     Company_2        Energy & Utilities  8.425303e+09   
2          2     Company_3        Telecommunications  6.026394e+07   
3          3     Company_4  Industrial Manufacturing  1.152258e+08   
4          4     Company_5           Food & Beverage  8.868283e+08   

      valuation  growth_rate  sustainability_index  energy_consumption  
0  4.504545e+08           16                    34              711523  
1  6.653113e+10            5                    39              762894  
2  2.233446e+08           23                    57              727769  
3  2.863636e+08            6                    35              208308  
4  8.905081e+09            5                    40              277247  
business_type
Telecommunications                25
Energy & Utilities                25
Industrial Manufacturing          25
Food & Beverage 

In [8]:
import numpy as np
import pandas as pd

business_type = np.tile(sectors, N // len(sectors))
np.random.shuffle(business_type)

production_level = []
net_emissions = []
net_carbon_intensity = []
energy_consumption = []
water_consumption = []
resources_consumption = []
waste_creation = []
waste_recycling = []

for sector in business_type:
    if sector == "Energy & Utilities":
        production_level.append(np.random.uniform(500, 5000))
        net_emissions.append(np.random.normal(1000, 100))
        energy_consumption.append(np.random.normal(1000000, 200000))
        water_consumption.append(np.random.normal(3000, 500))
        resources_consumption.append(np.random.normal(8000, 1000))
        waste_creation.append(np.random.normal(4000, 500))
        waste_recycling.append(np.random.normal(3000, 400))
    elif sector == "Mining & Natural Resources":
        production_level.append(np.random.uniform(1000, 5000))
        net_emissions.append(np.random.normal(1200, 150))
        energy_consumption.append(np.random.normal(1200000, 250000))
        water_consumption.append(np.random.normal(5000, 800))
        resources_consumption.append(np.random.normal(10000, 1500))
        waste_creation.append(np.random.normal(5000, 800))
        waste_recycling.append(np.random.normal(2000, 300))
    elif sector == "Industrial Manufacturing":
        production_level.append(np.random.uniform(200, 3000))
        net_emissions.append(np.random.normal(800, 100))
        energy_consumption.append(np.random.normal(700000, 150000))
        water_consumption.append(np.random.normal(2000, 300))
        resources_consumption.append(np.random.normal(6000, 1000))
        waste_creation.append(np.random.normal(3000, 400))
        waste_recycling.append(np.random.normal(2000, 200))
    elif sector == "Automotive & Mobility":
        production_level.append(np.random.uniform(500, 3000))
        net_emissions.append(np.random.normal(900, 120))
        energy_consumption.append(np.random.normal(800000, 200000))
        water_consumption.append(np.random.normal(2000, 500))
        resources_consumption.append(np.random.normal(7000, 1200))
        waste_creation.append(np.random.normal(3500, 600))
        waste_recycling.append(np.random.normal(2500, 300))
    elif sector == "Aerospace":
        production_level.append(np.random.uniform(200, 1500))
        net_emissions.append(np.random.normal(700, 80))
        energy_consumption.append(np.random.normal(600000, 100000))
        water_consumption.append(np.random.normal(1500, 400))
        resources_consumption.append(np.random.normal(5000, 800))
        waste_creation.append(np.random.normal(2500, 300))
        waste_recycling.append(np.random.normal(2000, 250))
    elif sector == "Defense & Security":
        production_level.append(np.random.uniform(100, 2000))
        net_emissions.append(np.random.normal(1000, 200))
        energy_consumption.append(np.random.normal(1000000, 250000))
        water_consumption.append(np.random.normal(3000, 500))
        resources_consumption.append(np.random.normal(9000, 1200))
        waste_creation.append(np.random.normal(4000, 700))
        waste_recycling.append(np.random.normal(3000, 400))
    elif sector == "Construction":
        production_level.append(np.random.uniform(100, 2000))
        net_emissions.append(np.random.normal(400, 80))
        energy_consumption.append(np.random.normal(500000, 100000))
        water_consumption.append(np.random.normal(3000, 600))
        resources_consumption.append(np.random.normal(7000, 1000))
        waste_creation.append(np.random.normal(4000, 500))
        waste_recycling.append(np.random.normal(2000, 300))
    elif sector == "Healthcare & Life Sciences":
        production_level.append(np.random.uniform(50, 1000))
        net_emissions.append(np.random.normal(200, 50))
        energy_consumption.append(np.random.normal(200000, 50000))
        water_consumption.append(np.random.normal(1000, 300))
        resources_consumption.append(np.random.normal(3000, 500))
        waste_creation.append(np.random.normal(1000, 200))
        waste_recycling.append(np.random.normal(900, 150))
    elif sector == "Food & Beverage":
        production_level.append(np.random.uniform(200, 3000))
        net_emissions.append(np.random.normal(500, 70))
        energy_consumption.append(np.random.normal(600000, 100000))
        water_consumption.append(np.random.normal(3000, 600))
        resources_consumption.append(np.random.normal(5000, 800))
        waste_creation.append(np.random.normal(2500, 300))
        waste_recycling.append(np.random.normal(2000, 250))
    elif sector == "Agriculture & Agri-business":
        production_level.append(np.random.uniform(1000, 5000))
        net_emissions.append(np.random.normal(1200, 300))
        energy_consumption.append(np.random.normal(900000, 150000))
        water_consumption.append(np.random.normal(6000, 1000))
        resources_consumption.append(np.random.normal(12000, 2000))
        waste_creation.append(np.random.normal(7000, 1000))
        waste_recycling.append(np.random.normal(3000, 400))
    else:
        production_level.append(np.random.uniform(100, 2000))
        net_emissions.append(np.random.normal(500, 60))
        energy_consumption.append(np.random.normal(400000, 80000))
        water_consumption.append(np.random.normal(1000, 300))
        resources_consumption.append(np.random.normal(4000, 700))
        waste_creation.append(np.random.normal(1500, 300))
        waste_recycling.append(np.random.normal(1200, 200))

    net_carbon_intensity.append(np.round(net_emissions[-1] / (production_level[-1] if production_level[-1] > 0 else 1), 2))

sustainability_data = pd.DataFrame({
    "unique_id": unique_id,
    "sector": business_type,
    "production_level": np.round(production_level, 2),
    "net_emissions": np.round(net_emissions, 2),
    "net_carbon_intensity": net_carbon_intensity,
    "energy_consumption": np.round(energy_consumption, 2),
    "water_consumption": np.round(water_consumption, 2),
    "resources_consumption": np.round(resources_consumption, 2),
    "waste_creation": np.round(waste_creation, 2),
    "waste_recycling": np.round(waste_recycling, 2),
})

print(sustainability_data.head())



   unique_id     sector  production_level  net_emissions  \
0          0  Transport           1629.83         615.41   
1          1    Housing           1872.45         500.28   
2          2    Housing            739.01         598.60   
3          3  Transport           1266.95         494.00   
4          4     Energy           1059.33         440.90   

   net_carbon_intensity  energy_consumption  water_consumption  \
0                  0.38           338030.80             493.25   
1                  0.27           465591.05            1354.97   
2                  0.81           428851.83             740.95   
3                  0.39           272349.06             771.87   
4                  0.42           415894.40             976.28   

   resources_consumption  waste_creation  waste_recycling  
0                3670.12          907.35          1350.22  
1                3398.83         1491.08          1090.78  
2                3978.16         1505.41          1294.53  
3 

In [9]:
import pandas as pd
import numpy as np

np.random.seed(42)
N = 400

consumer_classes = [
    "Lower Class",
    "Lower Middle Class",
    "Middle Class",
    "Upper Middle Class",
    "Upper Class",
    "Elite"
]

class_proportions = {
    "Lower Class": 0.35,
    "Lower Middle Class": 0.25,
    "Middle Class": 0.2,
    "Upper Middle Class": 0.15,
    "Upper Class": 0.04,
    "Elite": 0.01
}

class_population = {cls: int(N * proportion) for cls, proportion in class_proportions.items()}

class_wealth = []

for consumer_class in consumer_classes:
    if consumer_class == "Lower Class":
        wealth_range = (1000, 5000)
    elif consumer_class == "Lower Middle Class":
        wealth_range = (5000, 15000)
    elif consumer_class == "Middle Class":
        wealth_range = (15000, 35000)
    elif consumer_class == "Upper Middle Class":
        wealth_range = (35000, 70000)
    elif consumer_class == "Upper Class":
        wealth_range = (70000, 150000)
    elif consumer_class == "Elite":
        wealth_range = (150000, 1000000)

    for _ in range(class_population[consumer_class]):
        class_wealth.append(np.random.randint(wealth_range[0], wealth_range[1]))

consumer_data = pd.DataFrame({
    "unique_id": range(6),
    "consumer_class": list(class_population.keys()),
    "class_population": list(class_population.values()),
    "class_wealth": [
        np.mean(class_wealth[i:i + class_population[class_name]]) for i, class_name in enumerate(class_population.keys())
    ]
})

print(consumer_data)


   unique_id      consumer_class  class_population  class_wealth
0          0         Lower Class               140   3111.728571
1          1  Lower Middle Class               100   3125.070000
2          2        Middle Class                80   3070.050000
3          3  Upper Middle Class                60   3065.566667
4          4         Upper Class                16   2996.625000
5          5               Elite                 4   3399.250000


In [10]:
dataset={
    "business": business,
    "energy": power_plant,
    "decision": authority_data,
    "sustainability": sustainability_data,
    "consumer": consumer_data
}

#**Linear Machine Learning**

In [11]:
import numpy as np
import random
import mesa
from mesa import Agent


class BusinessAgents(Agent):
    def __init__(self, model, unique_id):
        super().__init__(model)
        self.model = model
        self.unique_id = unique_id
        self.business_name = ""
        self.business_type = ""
        self.valuation = 0.0
        self.revenue = 0.0
        self.growth_rate = 0.0
        self.sustainability_index = 0.0
        self.energy_consumption = 0.0

        dummy_state = self.get_state()
        self.state_dim = len(dummy_state)

        self.policy_weights = {}
        for name, space in self.model.env.action_space.spaces.items():
            if isinstance(space, gym.spaces.Discrete):
                self.policy_weights[name] = np.random.uniform(-0.2, 0.5, size=(self.state_dim, space.n))
            else:
                self.policy_weights[name] = np.random.uniform(-0.2, 0.5, size=(self.state_dim,))

        self.epsilon = 0.3
        self.gamma = 0.99
        self.learning_rate = 0.001
        self.trajectory = []

    def softmax(self, x):
        x_clean = np.nan_to_num(x, nan=0.0, posinf=10.0, neginf=-10.0)
        e_x = np.exp(x_clean - np.max(x_clean))
        return e_x / (e_x.sum(axis=-1) + 1e-8)

    def get_action_probs(self, state_dict):
        state_values = []
        for v in state_dict.values():
            try:
                val = float(np.nan_to_num(v, nan=0.0))
                state_values.append(val)
            except:
                state_values.append(0.0)

        if len(state_values) > self.state_dim:
            state_values = state_values[:self.state_dim]
        elif len(state_values) < self.state_dim:
            state_values += [0.0] * (self.state_dim - len(state_values))

        state_vector = np.array(state_values, dtype=np.float64)
        state_vector = np.sign(state_vector) * np.log1p(np.abs(state_vector))
        state_vector = np.nan_to_num(state_vector)

        std = np.std(state_vector)
        if std > 1e-6:
            state_vector = (state_vector - np.mean(state_vector)) / (std + 1e-8)
        else:
            state_vector = state_vector - np.mean(state_vector)

        probs = {}
        for action_name, action_space in self.model.env.action_space.spaces.items():
            weights = self.policy_weights.get(action_name)
            if isinstance(action_space, gym.spaces.Discrete):
                logits = np.dot(state_vector, weights)
                probs[action_name] = self.softmax(logits)
            else:
                dot_val = np.dot(state_vector, weights)
                dot_val = np.clip(np.nan_to_num(dot_val), -15, 15)
                probs[action_name] = 1.0 / (1.0 + np.exp(-dot_val))
        return probs

    def business_definition(self, row):
        self.business_name = row["business_name"]
        self.business_type = row["business_type"]
        self.valuation = float(row["valuation"])
        self.revenue = float(row["revenue"])
        self.growth_rate = float(row["growth_rate"])
        self.sustainability_index = float(row["sustainability_index"])
        self.energy_consumption = float(row["energy_consumption"])

    def business_state(self):
        self.growth_rate *= (1 + random.uniform(-0.2, 0.5))
        self.sustainability_index *= (1 + random.uniform(-0.2, 0.5))
        self.valuation *= (1 + self.growth_rate)
        self.revenue *= (1 + self.growth_rate)

    def get_state(self):
        agent_state = {
            "valuation": np.sign(self.valuation) * np.log1p(abs(self.valuation)),
            "revenue": np.sign(self.revenue) * np.log1p(abs(self.revenue)),
            "growth_rate": self.growth_rate,
            "sustainability_index": self.sustainability_index,
            "energy_consumption": np.log1p(abs(self.energy_consumption)),
        }
        scaled_env_state = {k: (np.sign(v) * np.log1p(abs(v)) if isinstance(v, (int, float)) else v)
                           for k, v in self.model.env.state.items()}
        return {**agent_state, **scaled_env_state}

    def choose_action(self):
        state_dict = self.get_state()
        action = {}
        all_probs = self.get_action_probs(state_dict)
        for action_name, action_type in self.model.env.action_space.spaces.items():
            if isinstance(action_type, gym.spaces.Discrete):
                action_probs = all_probs[action_name]
                action_probs = np.nan_to_num(action_probs, nan=1.0/len(action_probs))
                action_probs /= (action_probs.sum() + 1e-8)
                action[action_name] = np.random.choice(len(action_probs), p=action_probs)
            else:
                p_val = all_probs[action_name]
                low, high = action_type.low[0], action_type.high[0]
                val = low + (high - low) * p_val
                action[action_name] = np.array([val], dtype=np.float32)
        return action

    def store_trajectory(self, state, action, reward):
        self.trajectory.append((state, action, reward))

    def compute_gradients(self):
        gradient_pairs = []
        total_return = 0
        norm_reward = 1e-6
        for i in reversed(range(len(self.trajectory))):
            state, action_dict, reward = self.trajectory[i]
            reward_scaled = np.nan_to_num(reward) * norm_reward
            state_values = [float(v) for v in state.values()]

            if len(state_values) > self.state_dim:
                state_values = state_values[:self.state_dim]
            elif len(state_values) < self.state_dim:
                state_values += [0.0] * (self.state_dim - len(state_values))

            state_vector = np.array(state_values, dtype=np.float64)
            state_vector = np.sign(state_vector) * np.log1p(np.abs(state_vector))
            state_vector = np.nan_to_num(state_vector)

            total_return = reward_scaled + self.gamma * total_return
            all_probs = self.get_action_probs(state)
            for name, val in action_dict.items():
                if name in self.policy_weights:
                    if isinstance(self.model.env.action_space[name], gym.spaces.Discrete):
                        probs = all_probs[name]
                        grad_logits = -probs.copy()
                        grad_logits[int(val)] += 1.0
                        full_grad = np.outer(state_vector, grad_logits) * total_return
                        gradient_pairs.append((name, np.clip(np.nan_to_num(full_grad), -1, 1)))
                    else:
                        full_grad = np.outer(state_vector, np.atleast_1d(total_return))
                        gradient_pairs.append((name, np.clip(np.nan_to_num(full_grad.flatten()), -1, 1)))
        return gradient_pairs

    def update(self):
        grads = self.compute_gradients()
        if not grads: return
        for action_name, g in grads:
            if g.shape == self.policy_weights[action_name].shape:
                self.policy_weights[action_name] += self.learning_rate * g
        self.trajectory = []

    def step(self):
        self.business_state()

class EnergyAgents(Agent):
    def __init__(self, model, unique_id):
        super().__init__(model)
        self.model = model
        self.unique_id = unique_id
        self.energy_type = ""
        self.capacity = 0.0
        self.efficiency = 0.0
        self.production = 0.0
        self.carbon_intensity = 0.0
        self.emissions = 0.0

        dummy_state = self.get_state()
        self.state_dim = len(dummy_state)

        self.policy_weights = {}
        for name, space in self.model.env.action_space.spaces.items():
            if isinstance(space, gym.spaces.Discrete):
                self.policy_weights[name] = np.random.uniform(-0.2, 0.5, size=(self.state_dim, space.n))
            else:
                self.policy_weights[name] = np.random.uniform(-0.2, 0.5, size=(self.state_dim,))

        self.epsilon = 0.3
        self.gamma = 0.99
        self.learning_rate = 0.001
        self.trajectory = []

    def softmax(self, x):
        x_clean = np.nan_to_num(x, nan=0.0, posinf=10.0, neginf=-10.0)
        e_x = np.exp(x_clean - np.max(x_clean))
        return e_x / (e_x.sum(axis=-1) + 1e-8)

    def get_action_probs(self, state_dict):
        state_values = []
        for v in state_dict.values():
            try:
                val = float(np.nan_to_num(v, nan=0.0))
                state_values.append(val)
            except:
                state_values.append(0.0)

        if len(state_values) > self.state_dim:
            state_values = state_values[:self.state_dim]
        elif len(state_values) < self.state_dim:
            state_values += [0.0] * (self.state_dim - len(state_values))

        state_vector = np.array(state_values, dtype=np.float64)
        state_vector = np.sign(state_vector) * np.log1p(np.abs(state_vector))
        state_vector = np.nan_to_num(state_vector)
        std = np.std(state_vector)
        if std > 1e-6:
            state_vector = (state_vector - np.mean(state_vector)) / (std + 1e-8)
        else:
            state_vector = state_vector - np.mean(state_vector)
        probs = {}
        for action_name, action_space in self.model.env.action_space.spaces.items():
            weights = self.policy_weights.get(action_name)
            if isinstance(action_space, gym.spaces.Discrete):
                logits = np.dot(state_vector, weights)
                probs[action_name] = self.softmax(logits)
            else:
                dot_val = np.dot(state_vector, weights)
                dot_val = np.clip(np.nan_to_num(dot_val), -15, 15)
                probs[action_name] = 1.0 / (1.0 + np.exp(-dot_val))
        return probs

    def choose_action(self):
        state_dict = self.get_state()
        action = {}
        all_probs = self.get_action_probs(state_dict)
        for action_name, action_type in self.model.env.action_space.spaces.items():
            if isinstance(action_type, gym.spaces.Discrete):
                action_probs = all_probs[action_name]
                action_probs = np.nan_to_num(action_probs, nan=1.0/len(action_probs))
                action_probs /= (action_probs.sum() + 1e-8)
                action[action_name] = np.random.choice(len(action_probs), p=action_probs)
            else:
                p_val = all_probs[action_name]
                low, high = action_type.low[0], action_type.high[0]
                val = low + (high - low) * p_val
                action[action_name] = np.array([val], dtype=np.float32)
        return action

    def store_trajectory(self, state, action, reward):
        self.trajectory.append((state, action, reward))

    def compute_gradients(self):
        gradient_pairs = []
        total_return = 0
        norm_reward = 1e-6
        for i in reversed(range(len(self.trajectory))):
            state, action_dict, reward = self.trajectory[i]
            reward_scaled = np.nan_to_num(reward) * norm_reward
            state_values = [float(v) for v in state.values()]

            if len(state_values) > self.state_dim:
                state_values = state_values[:self.state_dim]
            elif len(state_values) < self.state_dim:
                state_values += [0.0] * (self.state_dim - len(state_values))

            state_vector = np.array(state_values, dtype=np.float64)
            state_vector = np.sign(state_vector) * np.log1p(np.abs(state_vector))
            state_vector = np.nan_to_num(state_vector)
            total_return = reward_scaled + self.gamma * total_return
            all_probs = self.get_action_probs(state)
            for name, val in action_dict.items():
                if name in self.policy_weights:
                    if isinstance(self.model.env.action_space[name], gym.spaces.Discrete):
                        probs = all_probs[name]
                        grad_logits = -probs.copy()
                        grad_logits[int(val)] += 1.0
                        full_grad = np.outer(state_vector, grad_logits) * total_return
                        gradient_pairs.append((name, np.clip(np.nan_to_num(full_grad), -1, 1)))
                    else:
                        full_grad = np.outer(state_vector, np.atleast_1d(total_return))
                        gradient_pairs.append((name, np.clip(np.nan_to_num(full_grad.flatten()), -1, 1)))
        return gradient_pairs

    def update(self):
        grads = self.compute_gradients()
        if not grads: return
        for action_name, g in grads:
            if g.shape == self.policy_weights[action_name].shape:
                self.policy_weights[action_name] += self.learning_rate * g
        self.trajectory = []

    def power_plant_definition(self, row):
        self.energy_type = row["energy_type"]
        self.capacity = float(row["capacity"])
        self.efficiency = float(row["efficiency"])
        self.production = float(row["production"])
        self.carbon_intensity = float(row["carbon_intensity"])
        self.emissions = float(row["emissions"])

    def power_plant_state(self):
        self.production *= (1 + random.uniform(-0.2, 0.5))
        self.carbon_intensity *= (1 + random.uniform(-0.2, 0.5))
        self.emissions *= (1 + random.uniform(-0.2, 0.5))

    def get_state(self):
        agent_state = {
            "capacity": np.log1p(abs(self.capacity)),
            "production": np.log1p(abs(self.production)),
            "efficiency": self.efficiency,
            "carbon_intensity": self.carbon_intensity,
            "emissions": np.log1p(abs(self.emissions)),
        }
        scaled_env_state = {k: (np.sign(v) * np.log1p(abs(v)) if isinstance(v, (int, float)) else v)
                           for k, v in self.model.env.state.items()}
        return {**agent_state, **scaled_env_state}

    def step(self):
        self.power_plant_state()

class DecisionAgents(Agent):
    def __init__(self, model, unique_id):
        super().__init__(model)
        self.model = model
        self.unique_id = unique_id
        self.budget = 0.0
        self.tax_capacity = 0.0
        self.subsidy_capacity = 0.0
        self.investment_ceiling = 0.0
        self.investment_floor = 0.0
        self.economic_growth_priority = 0.0
        self.emissions_priority = 0.0
        self.sustainability_priority = 0.0
        self.energy_priority = 0.0
        self.social_welfare_priority = 0.0
        self.regulation_strictness = 0.0
        self.penalty_severity = 0.0
        self.incentive_intensity = 0.0

        dummy_state = self.get_state()
        self.state_dim = len(dummy_state)

        self.policy_weights = {}
        for name, space in self.model.env.action_space.spaces.items():
            if isinstance(space, gym.spaces.Discrete):
                self.policy_weights[name] = np.random.uniform(-0.2, 0.5, size=(self.state_dim, space.n))
            else:
                self.policy_weights[name] = np.random.uniform(-0.2, 0.5, size=(self.state_dim,))

        self.epsilon = 0.3
        self.gamma = 0.99
        self.learning_rate = 0.001
        self.trajectory = []

    def softmax(self, x):
        x_clean = np.nan_to_num(x, nan=0.0, posinf=10.0, neginf=-10.0)
        e_x = np.exp(x_clean - np.max(x_clean))
        return e_x / (e_x.sum(axis=-1) + 1e-8)

    def get_action_probs(self, state_dict):
        state_values = []
        for v in state_dict.values():
            try:
                val = float(np.nan_to_num(v, nan=0.0))
                state_values.append(val)
            except:
                state_values.append(0.0)

        if len(state_values) > self.state_dim:
            state_values = state_values[:self.state_dim]
        elif len(state_values) < self.state_dim:
            state_values += [0.0] * (self.state_dim - len(state_values))

        state_vector = np.array(state_values, dtype=np.float64)
        state_vector = np.sign(state_vector) * np.log1p(np.abs(state_vector))
        state_vector = np.nan_to_num(state_vector)
        std = np.std(state_vector)
        if std > 1e-6:
            state_vector = (state_vector - np.mean(state_vector)) / (std + 1e-8)
        else:
            state_vector = state_vector - np.mean(state_vector)
        probs = {}
        for action_name, action_space in self.model.env.action_space.spaces.items():
            weights = self.policy_weights.get(action_name)
            if isinstance(action_space, gym.spaces.Discrete):
                logits = np.dot(state_vector, weights)
                probs[action_name] = self.softmax(logits)
            else:
                dot_val = np.dot(state_vector, weights)
                dot_val = np.clip(np.nan_to_num(dot_val), -15, 15)
                probs[action_name] = 1.0 / (1.0 + np.exp(-dot_val))
        return probs

    def choose_action(self):
        state_dict = self.get_state()
        action = {}
        all_probs = self.get_action_probs(state_dict)
        for action_name, action_type in self.model.env.action_space.spaces.items():
            if isinstance(action_type, gym.spaces.Discrete):
                action_probs = all_probs[action_name]
                action_probs = np.nan_to_num(action_probs, nan=1.0/len(action_probs))
                action_probs /= (action_probs.sum() + 1e-8)
                action[action_name] = np.random.choice(len(action_probs), p=action_probs)
            else:
                p_val = all_probs[action_name]
                low, high = action_type.low[0], action_type.high[0]
                val = low + (high - low) * p_val
                action[action_name] = np.array([val], dtype=np.float32)
        return action

    def store_trajectory(self, state, action, reward):
        self.trajectory.append((state, action, reward))

    def compute_gradients(self):
        gradient_pairs = []
        total_return = 0
        norm_reward = 1e-6
        for i in reversed(range(len(self.trajectory))):
            state, action_dict, reward = self.trajectory[i]
            reward_scaled = np.nan_to_num(reward) * norm_reward
            state_values = [float(v) for v in state.values()]

            if len(state_values) > self.state_dim:
                state_values = state_values[:self.state_dim]
            elif len(state_values) < self.state_dim:
                state_values += [0.0] * (self.state_dim - len(state_values))

            state_vector = np.array(state_values, dtype=np.float64)
            state_vector = np.sign(state_vector) * np.log1p(np.abs(state_vector))
            state_vector = np.nan_to_num(state_vector)
            total_return = reward_scaled + self.gamma * total_return
            all_probs = self.get_action_probs(state)
            for name, val in action_dict.items():
                if name in self.policy_weights:
                    if isinstance(self.model.env.action_space[name], gym.spaces.Discrete):
                        probs = all_probs[name]
                        grad_logits = -probs.copy()
                        grad_logits[int(val)] += 1.0
                        full_grad = np.outer(state_vector, grad_logits) * total_return
                        gradient_pairs.append((name, np.clip(np.nan_to_num(full_grad), -1, 1)))
                    else:
                        full_grad = np.outer(state_vector, np.atleast_1d(total_return))
                        gradient_pairs.append((name, np.clip(np.nan_to_num(full_grad.flatten()), -1, 1)))
        return gradient_pairs

    def update(self):
        grads = self.compute_gradients()
        if not grads: return
        for action_name, g in grads:
            if g.shape == self.policy_weights[action_name].shape:
                self.policy_weights[action_name] += self.learning_rate * g
        self.trajectory = []

    def decision_definition(self, row):
        self.name = row["authority"]
        self.budget = float(row["budget"])
        self.sector = row["sector"]
        self.tax_capacity = float(row["tax_capacity"])
        self.subsidy_capacity = float(row["subsidy_capacity"])
        self.investment_ceiling = float(row["investment_ceiling"])
        self.investment_floor = float(row["investment_floor"])
        self.economic_growth_priority = float(row["economic_growth_priority"])
        self.emissions_priority = float(row["emissions_priority"])
        self.sustainability_priority = float(row["sustainability_priority"])
        self.energy_priority = float(row["energy_priority"])
        self.social_welfare_priority = float(row["social_welfare_priority"])
        self.regulation_strictness = float(row["regulation_strictness"])
        self.penalty_severity = float(row["penalty_severity"])
        self.incentive_intensity = float(row["incentive_intensity"])

    def decision_state(self):
        self.budget *= (1 + random.uniform(-0.2, 0.5))
        self.economic_growth_priority *= (1 + random.uniform(-0.2, 0.5))
        self.emissions_priority *= (1 + random.uniform(-0.2, 0.5))
        self.sustainability_priority *= (1 + random.uniform(-0.2, 0.5))
        self.energy_priority *= (1 + random.uniform(-0.2, 0.5))
        self.social_welfare_priority *= (1 + random.uniform(-0.2, 0.5))

    def get_state(self):
        agent_state = {
            "budget": np.sign(self.budget) * np.log1p(abs(self.budget)),
            "tax_capacity": self.tax_capacity,
            "subsidy_capacity": self.subsidy_capacity,
            "investment_ceiling": np.log1p(abs(self.investment_ceiling)),
            "investment_floor": np.log1p(abs(self.investment_floor)),
            "economic_growth_priority": self.economic_growth_priority,
            "emissions_priority": self.emissions_priority,
            "sustainability_priority": self.sustainability_priority,
            "energy_priority": self.energy_priority,
            "social_welfare_priority": self.social_welfare_priority,
            "regulation_strictness": self.regulation_strictness,
            "penalty_severity": self.penalty_severity,
            "incentive_intensity": self.incentive_intensity,
        }
        scaled_env_state = {k: (np.sign(v) * np.log1p(abs(v)) if isinstance(v, (int, float)) else v)
                           for k, v in self.model.env.state.items()}
        return {**agent_state, **scaled_env_state}


    def step(self):
        self.decision_state()

class Sustainability(Agent):
    def __init__(self, model, unique_id):
        super().__init__(model)
        self.model = model
        self.unique_id = unique_id
        self.production_level = 0.0
        self.net_emissions = 0.0
        self.net_carbon_intensity = 0.0
        self.energy_consumption = 0.0
        self.water_consumption = 0.0
        self.resources_consumption = 0.0
        self.waste_creation = 0.0
        self.waste_recycling = 0.0

        dummy_state = self.get_state()
        self.state_dim = len(dummy_state)

        self.policy_weights = {}
        for name, space in self.model.env.action_space.spaces.items():
            if isinstance(space, gym.spaces.Discrete):
                self.policy_weights[name] = np.random.uniform(-0.2, 0.5, size=(self.state_dim, space.n))
            else:
                self.policy_weights[name] = np.random.uniform(-0.2, 0.5, size=(self.state_dim,))

        self.epsilon = 0.3
        self.gamma = 0.99
        self.learning_rate = 0.001
        self.trajectory = []

    def softmax(self, x):
        x_clean = np.nan_to_num(x, nan=0.0, posinf=10.0, neginf=-10.0)
        e_x = np.exp(x_clean - np.max(x_clean))
        return e_x / (e_x.sum(axis=-1) + 1e-8)

    def store_trajectory(self, state, action, reward):
        self.trajectory.append((state, action, reward))

    def get_action_probs(self, state_dict):
        state_values = []
        for v in state_dict.values():
            try:
                val = float(np.nan_to_num(v, nan=0.0))
                state_values.append(val)
            except:
                state_values.append(0.0)

        if len(state_values) > self.state_dim:
            state_values = state_values[:self.state_dim]
        elif len(state_values) < self.state_dim:
            state_values += [0.0] * (self.state_dim - len(state_values))

        state_vector = np.array(state_values, dtype=np.float64)
        state_vector = np.sign(state_vector) * np.log1p(np.abs(state_vector))
        state_vector = np.nan_to_num(state_vector)
        std = np.std(state_vector)
        if std > 1e-6:
            state_vector = (state_vector - np.mean(state_vector)) / (std + 1e-8)
        else:
            state_vector = state_vector - np.mean(state_vector)
        probs = {}
        for action_name, action_space in self.model.env.action_space.spaces.items():
            weights = self.policy_weights.get(action_name)
            if isinstance(action_space, gym.spaces.Discrete):
                logits = np.dot(state_vector, weights)
                probs[action_name] = self.softmax(logits)
            else:
                dot_val = np.dot(state_vector, weights)
                dot_val = np.clip(np.nan_to_num(dot_val), -15, 15)
                probs[action_name] = 1.0 / (1.0 + np.exp(-dot_val))
        return probs

    def choose_action(self):
        state_dict = self.get_state()
        action = {}
        all_probs = self.get_action_probs(state_dict)
        for action_name, action_type in self.model.env.action_space.spaces.items():
            if isinstance(action_type, gym.spaces.Discrete):
                action_probs = all_probs[action_name]
                action_probs = np.nan_to_num(action_probs, nan=1.0/len(action_probs))
                action_probs /= (action_probs.sum() + 1e-8)
                action[action_name] = np.random.choice(len(action_probs), p=action_probs)
            else:
                p_val = all_probs[action_name]
                low, high = action_type.low[0], action_type.high[0]
                val = low + (high - low) * p_val
                action[action_name] = np.array([val], dtype=np.float32)
        return action

    def sustainable_definition(self, row):
        self.sector = row["sector"]
        self.production_level = float(row["production_level"])
        self.net_emissions = float(row["net_emissions"])
        self.net_carbon_intensity = float(row["net_carbon_intensity"])
        self.energy_consumption = float(row["energy_consumption"])
        self.water_consumption = float(row["water_consumption"])
        self.resources_consumption = float(row["resources_consumption"])
        self.waste_creation = float(row["waste_creation"])
        self.waste_recycling = float(row["waste_recycling"])

    def sustainable_state(self):
        self.production_level *= 1 + random.uniform(-0.2, 0.5)
        self.energy_consumption *= 1 + random.uniform(-0.2, 0.5)
        self.net_emissions *= 1 + random.uniform(-0.2, 0.5)
        self.net_carbon_intensity *= 1 + random.uniform(-0.2, 0.5)
        self.water_consumption *= 1 + random.uniform(-0.2, 0.5)
        self.resources_consumption *= 1 + random.uniform(-0.2, 0.5)
        self.waste_creation *= 1 + random.uniform(-0.2, 0.5)
        self.waste_recycling *= 1 + random.uniform(-0.2, 0.5)

    def get_state(self):
        agent_state = {
            "production_level": np.log1p(abs(self.production_level)),
            "net_emissions": np.log1p(abs(self.net_emissions)),
            "net_carbon_intensity": self.net_carbon_intensity,
            "energy_consumption": np.log1p(abs(self.energy_consumption)),
            "water_consumption": np.log1p(abs(self.water_consumption)),
            "resources_consumption": np.log1p(abs(self.resources_consumption)),
            "waste_creation": np.log1p(abs(self.waste_creation)),
            "waste_recycling": np.log1p(abs(self.waste_recycling)),
        }
        scaled_env_state = {k: (np.sign(v) * np.log1p(abs(v)) if isinstance(v, (int, float)) else v)
                           for k, v in self.model.env.state.items()}
        return {**agent_state, **scaled_env_state}

    def step(self):
        self.sustainable_state()
        businesses = [a for a in self.model.agent_list if isinstance(a, BusinessAgents)]
        power_plants = [a for a in self.model.agent_list if isinstance(a, EnergyAgents)]
        if businesses:
            self.production_level = sum(b.revenue for b in businesses)
            self.energy_consumption = sum(b.energy_consumption for b in businesses)
        if power_plants:
            self.net_emissions = sum(p.emissions for p in power_plants)
        self.net_carbon_intensity = (self.net_emissions / (self.production_level + 1e-6))

    def update(self):
        grads = []
        total_return = 0
        for s, a, r in reversed(self.trajectory):
            total_return = r + self.gamma * total_return
            all_probs = self.get_action_probs(s)
            for name, val in a.items():
                weights = self.policy_weights[name]
                if isinstance(self.model.env.action_space[name], gym.spaces.Discrete):
                    probs = all_probs[name]
                    grad_logits = -probs.copy()
                    grad_logits[int(val)] += 1.0
                    grads.append((name, np.outer(list(s.values())[:self.state_dim], grad_logits) * total_return))
                else:
                    grads.append((name, np.outer(list(s.values())[:self.state_dim], [total_return]).flatten()))
        # Apply gradients
        for name, g in grads:
            if g.shape == self.policy_weights[name].shape:
                self.policy_weights[name] += self.learning_rate * g
        self.trajectory = []

    def step(self):
        self.sustainable_state()  # update internal state

class ConsumerAgents(mesa.Agent):
    def __init__(self, model, unique_id):
        super().__init__(model)
        self.unique_id = unique_id
        self.class_population = 0
        self.class_wealth = 0.0
        self.public_satisfaction = 0.0

        self.policy_weights = {}
        dummy_state = self.get_state()
        self.state_dim = len(dummy_state)
        for name, space in self.model.env.action_space.spaces.items():
            if isinstance(space, gym.spaces.Discrete):
                self.policy_weights[name] = np.random.uniform(-0.2, 0.5, size=(self.state_dim, space.n))
            else:
                self.policy_weights[name] = np.random.uniform(-0.2, 0.5, size=(self.state_dim,))
        self.trajectory = []
        self.epsilon = 0.3
        self.gamma = 0.99
        self.learning_rate = 0.01

    def consumer_definition(self, row):
        self.consumer_class = row["consumer_class"]
        self.class_population = int(row["class_population"])
        self.class_wealth = float(row["class_wealth"])
        self.public_satisfaction = float(self.class_population * self.class_wealth)

    def consumer_state(self):
        gdp = self.model.env.state["GDP"]
        employment = self.model.env.state["Employment Rate"]
        self.class_wealth += 0.5 * gdp * random.uniform(-0.2, 0.5)
        self.class_wealth += 0.2 * employment * random.uniform(-0.2, 0.5)
        self.class_population += random.randint(0, 10 if gdp > 1e11 else 5)

        policy_impact = {
            "Carbon Tax": (-0.5, -0.1),
            "Renewable energy subsidies": (0.1, 0.05),
            "Water conservation measures": (0.02, 0.2)
        }

        current_actions = getattr(self.model, 'current_action', {})
        for policy_name, (g_f, e_f) in policy_impact.items():
            if policy_name in current_actions:
                val = current_actions[policy_name]
                p_val = float(val[0]) if isinstance(val, (np.ndarray, list)) else float(val)
                gdp += p_val * random.uniform(g_f, g_f * 1.1)
                employment += p_val * random.uniform(e_f, e_f * 1.1)

        self.model.env.state["GDP"] = gdp
        self.model.env.state["Employment Rate"] = np.clip(employment, 0.0, 1.0)
        self.public_satisfaction = np.log1p(abs(self.class_population * self.class_wealth))

    def store_trajectory(self, state, action, reward):
        self.trajectory.append((state, action, reward))

    def get_action_probs(self, state_dict):
      state_values = [float(np.nan_to_num(v, nan=0.0)) for v in state_dict.values()]
      if len(state_values) < self.state_dim:
          state_values += [0.0] * (self.state_dim - len(state_values))
      state_vector = np.array(state_values[:self.state_dim], dtype=np.float64)
      state_vector = np.sign(state_vector) * np.log1p(np.abs(state_vector))
      state_vector = np.nan_to_num(state_vector)
      std = np.std(state_vector)
      if std > 1e-6:
          state_vector = (state_vector - np.mean(state_vector)) / (std + 1e-8)
      else:
          state_vector = state_vector - np.mean(state_vector)
      probs = {}
      for name, space in self.model.env.action_space.spaces.items():
          weights = self.policy_weights[name]
          if isinstance(space, gym.spaces.Discrete):
              logits = np.dot(state_vector, weights)
              e_x = np.exp(logits - np.max(logits))
              probs[name] = e_x / (e_x.sum() + 1e-8)
          else:
              dot_val = np.dot(state_vector, weights)
              dot_val = np.clip(dot_val, -15, 15)
              probs[name] = 1.0 / (1.0 + np.exp(-dot_val))
      return probs

    def choose_action(self):
        state_dict = self.get_state()
        action = {}
        all_probs = self.get_action_probs(state_dict)
        for name, space in self.model.env.action_space.spaces.items():
            if isinstance(space, gym.spaces.Discrete):
                probs = all_probs[name]
                probs = np.nan_to_num(probs, nan=1.0/len(probs))
                probs /= probs.sum() + 1e-8
                action[name] = np.random.choice(len(probs), p=probs)
            else:
                p_val = all_probs[name]
                low, high = space.low[0], space.high[0]
                action[name] = np.array([low + (high - low) * p_val], dtype=np.float32)
        return action

    def store_trajectory(self, state, action, reward):
        if not hasattr(self, "trajectory"):
            self.trajectory = []
        self.trajectory.append((state, action, reward))

    def update(self):
        grads = []
        total_return = 0
        for s, a, r in reversed(self.trajectory):
            total_return = r + self.gamma * total_return
            all_probs = self.get_action_probs(s)
            for name, val in a.items():
                weights = self.policy_weights[name]
                if isinstance(self.model.env.action_space[name], gym.spaces.Discrete):
                    probs = all_probs[name]
                    grad_logits = -probs.copy()
                    grad_logits[int(val)] += 1.0
                    grads.append((name, np.outer(list(s.values())[:self.state_dim], grad_logits) * total_return))
                else:
                    grads.append((name, np.outer(list(s.values())[:self.state_dim], [total_return]).flatten()))
        for name, g in grads:
            if g.shape == self.policy_weights[name].shape:
                self.policy_weights[name] += self.learning_rate * g
        self.trajectory = []

    def step(self):
        self.consumer_state()


    def get_state(self):
        agent_state = {
            "class_population": np.log1p(abs(self.class_population)),
            "class_wealth": np.log1p(abs(self.class_wealth)),
            "public_satisfaction": self.public_satisfaction,
        }
        scaled_env_state = {k: (np.sign(v) * np.log1p(abs(v)) if isinstance(v, (int, float)) else v)
                           for k, v in self.model.env.state.items()}
        return {**agent_state, **scaled_env_state}

    def step(self):
        self.consumer_state()

In [12]:
import numpy as np
import random
import mesa
from concurrent.futures import ThreadPoolExecutor
import concurrent.futures
class ClimateModel(mesa.Model):
    def __init__(self, env, dataset):
        super().__init__()
        self.env = env
        self.dataset = dataset
        self.agent_list = []
        self.current_action = {}

    def environment(self, env):
        self.state = self.env.state
        self.t = self.env.t
        return self.state, self.t

    def build_agents(self, dataset):
        self.agent_list = []
        for name in self.dataset:
            data = self.dataset[name]
            for i, row in data.iterrows():
                if name == "business":
                    agent = BusinessAgents(self, row['unique_id'])
                    agent.business_definition(row)
                elif name == "energy":
                    agent = EnergyAgents(self, row['unique_id'])
                    agent.power_plant_definition(row)
                elif name == "sustainability":
                    agent = Sustainability(self, row['unique_id'])
                    agent.sustainable_definition(row)
                elif name == "decision":
                    agent = DecisionAgents(self, row['unique_id'])
                    agent.decision_definition(row)
                elif name == "consumer":
                    agent = ConsumerAgents(self, row['unique_id'])
                    agent.consumer_definition(row)
                self.agent_list.append(agent)

    def scaling(self, action_dict):
        scaling_map = {
            "Climate-Resilient Infrastructure Investment": 1e9,
            "Public transport expansion": 2e9,
            "Waste Management Reforms": 2e8,
            "Flood defense infrastructure": 1.5e9,
            "Heatwave resilience": 1.2e9,
            "Sustainable Land-Use Zoning": 1e8,
            "Green Business Investments": 8e7,
            "Carbon Tax": 1,
            "Water Consumption Tax": 0.5,
            "Fuel Economy Standards": 2,
            "Vehicle emission standards": 3
        }
        discrete_keys = [
            "Fossil Fuel Phase-Out Regulations",
            "Single-use plastics bans",
            "Urban Green Space Expansion"
        ]
        processed_action = action_dict.copy()
        for k, v in processed_action.items():
            val_clean = float(np.nan_to_num(v, nan=0.0))
            if k in scaling_map and val_clean <= 1.05:
                processed_action[k] = val_clean * scaling_map[k]
            else:
                processed_action[k] = val_clean
            if k in discrete_keys:
                processed_action[k] = int(round(processed_action[k]))
            if k in self.env.action_space.spaces:
                space = self.env.action_space[k]
                if hasattr(space, "low"):
                    processed_action[k] = np.clip(processed_action[k], space.low[0], space.high[0])
                elif hasattr(space, "n"):
                    processed_action[k] = np.clip(int(round(processed_action[k])), 0, space.n - 1)
        return processed_action

    def log_agent_step(self, dataset, agent, agent_action, next_state, batch_data, params):
        c_name = agent.__class__.__name__
        state = agent.get_state()

        row = {f"state_{k}": v for k, v in state.items()}
        row.update({f"action_{k}": float(v[0] if isinstance(v, (np.ndarray, list)) else v)
                    for k, v in agent_action.items()})
        row.update(params) # This adds LR, Epsilon, Gamma, Episode, Step

        if isinstance(next_state, dict):
            row.update({f"next_{k}": v for k, v in next_state.items()})
        else:
            row['next_state'] = next_state

        if c_name not in batch_data:
            batch_data[c_name] = []
        batch_data[c_name].append(row)

    def log_episode_results(self, dataset, batch_data, episode, lr=None, eps=None):
        suffix = f"_LR{lr}_EPS{eps}" if lr is not None else ""

        for agent_class, data in batch_data.items():
            df = pd.DataFrame(data)
            file_name = f"log_{agent_class}{suffix}.csv"

            if episode == 0:
                df.to_csv(file_name, index=False)
            else:
                df.to_csv(file_name, mode='a', header=False, index=False)

        batch_data.clear()


    def learn(self, episodes=100, lr=None, eps=None, gamma=None):
        trackable_classes = ["DecisionAgents", "Sustainability", "BusinessAgents", "EnergyAgents", "ConsumerAgents"]
        if not hasattr(self, 'model_history'):
            self.model_history = []

        agent_cache = []
        for agent in self.agent_list:
            c_name = agent.__class__.__name__

            if lr is not None and hasattr(agent, 'learning_rate'):
                if isinstance(agent,ConsumerAgents):
                  if lr!=0.1:
                    agent.learning_rate=lr*10
                  else:
                    agent.learning_rate=lr*5
                else:
                  agent.learning_rate = lr
            if eps is not None and hasattr(agent, 'epsilon'):
                agent.epsilon = eps
            if gamma is not None and hasattr(agent, 'gamma'):
                agent.gamma = gamma

            agent_cache.append({
                'obj': agent,
                'name': c_name,
                'id': agent.unique_id,
                'trackable': c_name in trackable_classes
            })

        for episode in range(episodes):
            self.env.reset()
            done, total_reward, steps = False, 0.0, 0
            attribute_tracker = {cls: {} for cls in trackable_classes}
            batch_data = {}
            decay_rate = 0.95
            current_eps = max(0.01, eps * (decay_rate ** episode))

            for item in agent_cache:
                if hasattr(item['obj'], 'epsilon'):
                    item['obj'].epsilon = current_eps

            for item in agent_cache:
                item['cur_lr'] = getattr(item['obj'], 'learning_rate', 0)
                item['cur_eps'] = getattr(item['obj'], 'epsilon', 0)
                item['cur_gamma'] = getattr(item['obj'], 'gamma', 0)

            while not done:
                combined_action = {k: 0.0 for k in self.env.action_space.spaces}
                weight_sum = {k: 0.0 for k in combined_action}
                agent_actions_step = {}


                for item in agent_cache:
                    agent = item['obj']

                    if item['trackable']:
                        u_id, c_name = item['id'], item['name']
                        for key, val in agent.__dict__.items():
                            if isinstance(val, (int, float)) and key not in ('unique_id', 'state_dim'):
                                attr_id = f"{u_id}_{key}"
                                if attr_id not in attribute_tracker[c_name]:
                                    attribute_tracker[c_name][attr_id] = []
                                attribute_tracker[c_name][attr_id].append(val)

                    if hasattr(agent, "choose_action"):
                        act = agent.choose_action()
                        agent_actions_step[item['id']] = act
                        for k, v in act.items():
                            val = v[0] if isinstance(v, (np.ndarray, list)) else v
                            combined_action[k] += float(val)
                            weight_sum[k] += 1.0

                # 2. Env Step
                scaled_action = {k: (combined_action[k]/weight_sum[k] if weight_sum[k] > 0 else 0.0)
                                 for k in combined_action}
                obs, _, reward, term, trunc, _ = self.env.step(self.scaling(scaled_action))
                total_reward += reward

                # 3. LOGGING
                for item in agent_cache:
                    params = {
                        'lr': item['cur_lr'],
                        'eps': item['cur_eps'],
                        'gamma': item['cur_gamma'],
                        'episode': episode,
                        'step': steps,
                        'reward': reward
                    }
                    self.log_agent_step(self.dataset, item['obj'],
                                        agent_actions_step.get(item['id'], {}),
                                        obs, batch_data, params)

                    if hasattr(item['obj'], "step"):
                        item['obj'].step()

                steps += 1
                done = term or trunc or steps >= 50

            inf_scores = self.calculate_influence_scores(attribute_tracker)
            self.print_influence_scores(inf_scores)
            self.model_history.append({'episode': episode, 'reward': total_reward, **inf_scores})
            self.log_episode_results(self.dataset, batch_data, episode,lr,eps)

            print(f"EP {episode + 1} | Reward: {total_reward:.2f} | LR: {lr} EPS: {eps}")

    def calculate_influence_scores(self, attribute_tracker):
        influence_scores = {}
        for cls, agents_attrs in attribute_tracker.items():
            class_variations = []
            for attr_id, history in agents_attrs.items():
                if len(history) > 1:
                    arr = np.array(history, dtype=np.float64)
                    safe_arr = np.sign(arr) * np.log1p(np.abs(arr))
                    mu = np.nanmean(safe_arr)
                    sigma = np.nanstd(safe_arr)
                    mu = 0.0 if not np.isfinite(mu) else mu
                    sigma = 0.0 if not np.isfinite(sigma) else sigma

                    if abs(mu) > 1e-9:
                        class_variations.append(sigma / (abs(mu) + 1e-10))

            influence_scores[cls] = np.mean(class_variations) if class_variations else 0.0

        return influence_scores

    def print_influence_scores(self, influence_scores):
        total_inf = sum(influence_scores.values()) + 1e-10
        print(f"{'Agent Class':<20} | {'Influence (%)':>15}")
        print("-" * 40)
        for cls, val in sorted(influence_scores.items(), key=lambda x: x[1], reverse=True):
            share = (val / total_inf) * 100
            print(f"{cls:<20} | {share:>14.2f}%")


    def step_env(self):
        attribute_tracker = {cls: {} for cls in ["DecisionAgents", "Sustainability", "BusinessAgents", "EnergyAgents", "ConsumerAgents"]}
        agent_states = {agent.unique_id: agent.get_state() if hasattr(agent, "get_state") else None
                        for agent in self.agent_list}

        combined_action = {k: 0.0 if hasattr(self.env.action_space[k], "low") else 0
                            for k in self.env.action_space.spaces}
        weight_sum = {k: 0.0 for k in combined_action}

        default_weights = {
            "BusinessAgents": 0.2,
            "EnergyAgents": 0.2,
            "DecisionAgents": 0.3,
            "Sustainability": 0.2,
            "ConsumerAgents": 0.1
        }

        agent_actions = {}
        batch_data = {}

        for agent in self.agent_list:
            c_name = agent.__class__.__name__
            for key, value in agent.__dict__.items():
                if isinstance(value, (int, float)) and key not in ['unique_id', 'state_dim']:
                    attr_id = f"{agent.unique_id}_{key}"
                    if attr_id not in attribute_tracker[c_name]:
                        attribute_tracker[c_name][attr_id] = []
                    attribute_tracker[c_name][attr_id].append(float(value))

            if hasattr(agent, "choose_action"):
                agent_action = agent.choose_action()
                for k, v in agent_action.items():
                    if k in combined_action:
                        val = float(v[0]) if isinstance(v, (np.ndarray, list)) else float(v)
                        combined_action[k] += val
                        weight_sum[k] += 1.0

        scaled_action = combined_action.copy()
        for k in combined_action:
            if weight_sum[k] > 0:
                scaled_action[k] /= weight_sum[k]

        state, scaled_obs, reward, terminated, truncated, info = self.env.step(self.scaling(scaled_action))

        for agent in self.agent_list:
            if hasattr(agent, "step"):
                agent.step()

            self.log_agent_step(dataset, agent, agent_actions.get(agent.unique_id, {}), _, batch_data)

        if terminated or truncated:
            self.log_episode_results(dataset, batch_data)
        print(f"State: {state}")
        print(f"Reward: {reward}")
        print(f"Info:{info}")
        if terminated or truncated:
            print("Episode Terminated!")
        return state, scaled_obs, reward, terminated, truncated, info





In [13]:
env=GeographicRegion(seed=42)
state,info=env.reset()
action=env.action_space.sample()
print("Sampled action:", action)

state, scaled_state, reward, truncated, done, info=env.step(action)
print(state)
print(scaled_state)
print(reward)
print(truncated)
print(done)
print(info)

Sampled action: {'Carbon Tax': array([44.903137], dtype=float32), 'Climate-Resilient Infrastructure Investment': array([1.644464e+08], dtype=float32), 'Electric Vehicle (EV) Subsidies': array([0.04410264], dtype=float32), 'Energy Efficiency Incentives': array([0.95554954], dtype=float32), 'Flood defense infrastructure': array([1.072968e+08], dtype=float32), 'Fossil Fuel Phase-Out Regulations': np.int64(0), 'Fuel Economy Standards': np.int64(0), 'Green Business Investments': array([35722292.], dtype=float32), 'Heatwave resilience': array([1.1469116e+08], dtype=float32), 'Public transport expansion': array([1.3152037e+08], dtype=float32), 'Recycling Rate': array([0.8117917], dtype=float32), 'Renewable energy subsidies': array([0.97453797], dtype=float32), 'Single-use plastics bans': np.int64(2), 'Sustainable Land-Use Zoning': array([72477640.], dtype=float32), 'Urban Green Space Expansion': array([137.13008], dtype=float32), 'Vehicle emission standards': np.int64(3), 'Waste Management Re

  0.3 * float(consumption_efficiency_score) +


In [None]:
env=GeographicRegion(seed=42)
model=ClimateModel(env,dataset)
model.build_agents(dataset)
model.learn(episodes=50,lr=0.001, eps=0.3, gamma=0.99)


In [14]:
import pandas as pd
agents= ["Sustainability", "DecisionAgents", "BusinessAgents", "EnergyAgents", "ConsumerAgents"]
data={}
suffix = f"_LR{0.001}_EPS{0.3}"
for agent_class in agents:
  file_name = f"log_{agent_class}{suffix}.csv"
  file_path = "/content/drive/MyDrive/" + file_name
  dataframe = pd.read_csv(file_path)
  data[agent_class]=dataframe
  print(dataframe.columns)


Index(['state_production_level', 'state_net_emissions',
       'state_net_carbon_intensity', 'state_energy_consumption',
       'state_water_consumption', 'state_resources_consumption',
       'state_waste_creation', 'state_waste_recycling', 'state_Temperature',
       'state_Precipitation', 'state_Humidity', 'state_Air Pollution Index',
       'state_Water Quality Index',
       'state_Carbon Dioxide Emissions Per Capita',
       'state_Electricity Consumption Per Capita', 'state_Renewable Share',
       'state_Water Consumption Per Capita', 'state_Population', 'state_GDP',
       'state_Employment Rate', 'state_Waste Management Efficiency',
       'state_Energy Efficiency', 'state_Urban Green Space Expansion',
       'state_Flood defense infrastructure', 'state_Heatwave resilience',
       'state_Sustainable Land-Use Zoning', 'state_Single-use plastics bans',
       'state_Green Business Investments', 'action_Carbon Tax',
       'action_Climate-Resilient Infrastructure Investment',
 

In [15]:
import tensorflow as tf
from tensorflow import keras
import spektral
from spektral.layers import GeneralConv
from spektral.models import GeneralGNN


class GraphConventionalModel(GeneralGNN):
    def __init__(self,input_dim, hidden_dim, output_dim):
        super().__init__()
        self.layer_1 = GeneralConv(input_dim,hidden_dim)
        self.layer_2 = GeneralConv(hidden_dim, output_dim)

    def call(self, inputs, training=False):
        x, edge_index, edge_attr = inputs
        x = self.layer_1([x, edge_index, edge_attr])
        x = tf.nn.relu(x)
        x= tf.nn.dropout(0.2)(x) if training else x
        x = self.layer_2([x, edge_index, edge_attr])
        return x


class GraphActorCriticModel(GeneralGNN):
    def __init__(self,input_dim, hidden_dim, output_dim,num_continuous_actions,num_discrete_actions,neurons=256):
        super().__init__()
        self.graph_layer=GraphConventionalModel(input_dim, hidden_dim, output_dim)

        self.layer_1=keras.layers.Dense(neurons,activation="relu")
        self.layer_2=keras.layers.Dense(neurons//2,activation="relu")
        self.layer_3=keras.layers.Dense(neurons//4,activation="relu")

        self.actor_continuous = keras.layers.Dense(num_continuous_actions, activation='tanh')
        self.actor_discrete = keras.layers.Dense(num_discrete_actions, activation='softmax')
        self.critic_layer=keras.layers.Dense(1)

    def call(self, inputs, training=False):
        x, edge_index, edge_attr = inputs
        x = tf.reduce_mean(x, axis=0, keepdims=True)
        x = self.graph_layer([x, edge_index, edge_attr],training=training)
        x = self.layer_1(x)
        x = self.layer_2(x)
        x = self.layer_3(x)

        actor_continuous = self.actor_continuous(x)
        actor_discrete = self.actor_discrete(x)
        critic = self.critic_layer(x)

        return actor_continuous, actor_discrete, critic



In [16]:
def action_spaces(env:GeographicRegion):
  continuous_actions = []
  discrete_actions = []

  for name, space in env.action_space.spaces.items():
      if isinstance(space, gym.spaces.Box):
          continuous_actions.append((name, space.low, space.high))
      else:
          discrete_actions.append((name, space.n))
  return continuous_actions, discrete_actions

def flatten_state(state_dict):
    return np.array(
        [state_dict[key] for key in env.state_space.spaces.keys()],
        dtype=np.float32
    )


In [17]:
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
sess = tf.compat.v1.Session()

def dense(x, units, activation=None, kernel_initializer=None, name=None,use_bias=None):
  layer = tf.keras.layers.Dense(
      units=units,
      activation=activation,
      kernel_initializer=kernel_initializer,
      use_bias=use_bias,
      name=name)
  return layer(x)

In [18]:
import multiprocessing
import threading
import numpy as np
import os
import shutil
import matplotlib.pyplot as plt
import tensorflow as tf


no_of_workers = multiprocessing.cpu_count()
no_of_ep_steps = 50
no_of_episodes = 100
global_net_scope = 'Global_Net'
update_global = 64
gamma = 0.99
entropy_beta = 0.12
lr_a = 0.00015
lr_c = 0.00015
render = False
log_dir = 'logs'

class A3C(object):
    def __init__(self, scope, session, MAX_STATE_NO, action_continuous, action_discrete, global_net=None):
        self.sess = session
        self.state_no = MAX_STATE_NO
        self.action_continuous = action_continuous
        self.action_discrete = action_discrete
        self.discrete_names = [d[0] for d in action_discrete]
        self.n_cont = len(action_continuous)
        self.cont_low = np.array([x[1] for x in action_continuous], dtype=np.float32)
        self.cont_high = np.array([x[2] for x in action_continuous], dtype=np.float32)
        self.unit_scales = [150.0, 2000000.0, 1.0, 1.0, 5000000.0, 2000000.0, 2000000.0, 3000000.0, 1.0, 1.0, 5.0, 2000000.0, 200.0, 1000000.0, 10.0, 2000000.0]
        self.percent_scale = tf.constant(self.unit_scales, dtype=tf.float32)

        self.actor_optimizer = tf.compat.v1.train.AdamOptimizer(lr_a)
        self.critic_optimizer = tf.compat.v1.train.AdamOptimizer(lr_c)

        with tf.compat.v1.variable_scope(scope):
            self.s = tf.compat.v1.placeholder(tf.float32, [None, self.state_no], 'S')
            self.mean_raw, self.delta, self.log_var, self.var_con, self.v, self.logits_dict, self.a_cont_params, self.a_dis_params, self.c_params = self._build_net(scope)
            self.a_params = self.a_cont_params + self.a_dis_params

            self.a_cont = tf.compat.v1.placeholder(tf.float32, [None, self.n_cont], 'A_continuous')
            self.a_dis = {name: tf.compat.v1.placeholder(tf.int32, [None]) for name in self.discrete_names}
            self.v_target = tf.compat.v1.placeholder(tf.float32, [None, 1], 'Vtarget')
            self.old_log_prob = tf.compat.v1.placeholder(tf.float32, [None], 'old_log_p_continuous')
            self.old_log_prob_d = tf.compat.v1.placeholder(tf.float32, [None], 'old_log_p_discrete')
            self.current_action = tf.compat.v1.placeholder(tf.float32, [None, self.n_cont], 'current_action')
            self.dataset_mean = tf.compat.v1.placeholder(tf.float32, [None,self.n_cont], 'dataset_mean')
            self.dataset_std = tf.compat.v1.placeholder(tf.float32, [None,self.n_cont], 'dataset_std')

            self.mean = tf.clip_by_value(self.mean_raw, self.cont_low, (self.cont_high*1.5))

            td = tf.subtract(self.v_target, self.v)
            self.critic_loss = tf.compat.v1.losses.huber_loss(self.v_target, self.v)

            std = tf.exp(self.log_var) + 1e-3
            normal_dist = tf.compat.v1.distributions.Normal(self.mean, std)

            self.log_prob_tensor = tf.reduce_sum(normal_dist.log_prob(self.a_cont), axis=1)
            entropy = tf.reduce_sum(normal_dist.entropy(), axis=1)
            entropy = tf.where(tf.math.is_nan(entropy), tf.zeros_like(entropy), entropy)
            entropy = tf.maximum(entropy, 1e-4)

            advantage = tf.stop_gradient(td[:, 0])

            ratio = tf.exp(self.log_prob_tensor - self.old_log_prob)
            surr1 = ratio * advantage
            surr2 = tf.clip_by_value(ratio, 0.8, 1.2) * advantage
            ppo_loss = tf.minimum(surr1, surr2)

            self.actor_loss_c = -tf.reduce_mean(ppo_loss + (entropy_beta * entropy))
            action_l2 = tf.reduce_mean(tf.square(self.delta))
            self.actor_loss_c += (0.00001 * action_l2)

            discrete_losses = []
            discrete_entropies = []
            for name in self.discrete_names:
                logits = self.logits_dict[name]
                loss_i = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.a_dis[name])
                discrete_losses.append(loss_i)
                probs = tf.nn.softmax(logits)
                ent_i = -tf.reduce_sum(probs * tf.math.log(probs + 1e-8), axis=1)
                discrete_entropies.append(ent_i)

            total_discrete_loss = tf.add_n(discrete_losses)
            self.discrete_log_prob_tensor = -total_discrete_loss
            total_discrete_entropy = tf.add_n(discrete_entropies)

            ratio_d = tf.exp(self.discrete_log_prob_tensor - self.old_log_prob_d)
            surr1_d = ratio_d * advantage
            surr2_d = tf.clip_by_value(ratio_d, 0.8, 1.2) * advantage

            self.actor_loss_d = -tf.reduce_mean(tf.minimum(surr1_d, surr2_d) + entropy_beta * total_discrete_entropy)

            if global_net is not None:
                total_actor_loss = self.actor_loss_c + self.actor_loss_d
                self.a_grads = tf.gradients(total_actor_loss, self.a_params)
                self.c_grads = tf.gradients(self.critic_loss, self.c_params)

                self.a_grads, _ = tf.clip_by_global_norm(self.a_grads, 5.0)
                self.c_grads, _ = tf.clip_by_global_norm(self.c_grads, 5.0)

                noise = tf.random.normal(shape=tf.shape(self.mean), mean=0.0, stddev=1e-3)
                self.mean = self.mean + tf.stop_gradient(noise * self.percent_scale)

                with tf.name_scope('push'):
                    a_grads_vars = [(g, v) for g, v in zip(self.a_grads, global_net.a_params) if g is not None]
                    c_grads_vars = [(g, v) for g, v in zip(self.c_grads, global_net.c_params) if g is not None]
                    self.update_a = self.actor_optimizer.apply_gradients(a_grads_vars) if a_grads_vars else tf.no_op()
                    self.update_c = self.critic_optimizer.apply_gradients(c_grads_vars) if c_grads_vars else tf.no_op()

                with tf.name_scope('pull'):
                    self.pull_a = [l.assign(g) for l, g in zip(self.a_params, global_net.a_params)]
                    self.pull_c = [l.assign(g) for l, g in zip(self.c_params, global_net.c_params)]
                    self.pull_op = tf.group(self.pull_a, self.pull_c)

                self.train_op = tf.group(self.update_a, self.update_c)
            else:
                self.train_op_actor = tf.compat.v1.train.AdamOptimizer(lr_a).minimize(self.actor_loss_c + self.actor_loss_d, var_list=self.a_params)
                self.train_op_critic = tf.compat.v1.train.AdamOptimizer(lr_c).minimize(self.critic_loss, var_list=self.c_params)
                self.train_op = tf.group(self.train_op_actor, self.train_op_critic)
                self.pull_op = tf.no_op()

    def _build_net(self, scope):
        w = tf.compat.v1.keras.initializers.he_normal()
        small_w = tf.compat.v1.initializers.truncated_normal(stddev=0.0001)

        with tf.compat.v1.variable_scope('actor_continuous'):
            l_a = dense(self.s, 512, activation=tf.nn.leaky_relu, kernel_initializer=w)
            l_a2 = dense(l_a, 256, activation=tf.nn.leaky_relu, kernel_initializer=w)

            log_var = dense(l_a2, self.n_cont, kernel_initializer=w, name='log_var')
            log_var = tf.clip_by_value(log_var, -1.0, 1.0)

            delta = dense(l_a2, self.n_cont, activation=tf.nn.tanh, kernel_initializer=small_w, use_bias=False)
            delta = tf.reshape(delta, [-1, self.n_cont])
            mean_con = delta * self.percent_scale
            var_con = dense(l_a2, self.n_cont, kernel_initializer=w)

        logits_dict = {}
        with tf.compat.v1.variable_scope('actor_discrete'):
            l_d = dense(self.s, 512, activation=tf.nn.leaky_relu, kernel_initializer=w)
            l_d2 = dense(l_d, 256, activation=tf.nn.leaky_relu, kernel_initializer=w)
            for name, n in self.action_discrete:
                clean_name = ''.join(c if c.isalnum() else '_' for c in name)
                logits_dict[name] = dense(l_d2, n, kernel_initializer=w, name=clean_name)

        with tf.compat.v1.variable_scope('critic'):
            l_c = dense(self.s, 512, activation=tf.nn.leaky_relu, kernel_initializer=w)
            l_c2 = dense(l_c, 256, activation=tf.nn.leaky_relu, kernel_initializer=w)
            v = dense(l_c2, 1, kernel_initializer=w)

        a_cont_params = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=tf.compat.v1.get_variable_scope().name + '/actor_continuous')
        a_dis_params = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=tf.compat.v1.get_variable_scope().name + '/actor_discrete')
        c_params = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=tf.compat.v1.get_variable_scope().name + '/critic')

        return mean_con, delta, log_var, var_con, v, logits_dict, a_cont_params, a_dis_params, c_params

In [19]:
def get_batches(states, actions_cont, actions_dis, rewards, next_states, batch_size=64):
    n = len(states)
    indices = np.arange(n)
    np.random.shuffle(indices)
    for start in range(0, n, batch_size):
        end = start + batch_size
        batch_idx = indices[start:end]
        batch = {
            "states": states[batch_idx],
            "actions_cont": actions_cont[batch_idx],
            "actions_dis": {k: v[batch_idx] for k, v in actions_dis.items()},
            "rewards": rewards[batch_idx],
            "next_states": next_states[batch_idx]
        }
        yield batch


In [None]:
tf.compat.v1.reset_default_graph()

In [21]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import tensorflow as tf
import joblib

tf.compat.v1.disable_eager_execution()

ALL_STATE_COLS = [
    'state_Air Pollution Index', 'state_Carbon Dioxide Emissions Per Capita', 'state_Employment Rate', 'state_Energy Efficiency',
    'state_Flood defense infrastructure', 'state_GDP','state_Green Business Investments','state_Humidity','state_Precipitation',
    'state_Population','state_Single-use plastics bans','state_Sustainable Land-Use Zoning','state_Temperature','state_Urban Green Space Expansion',
    'state_Waste Management Efficiency','state_budget', 'state_capacity', 'state_carbon_intensity', 'state_class_population',
    'state_class_wealth', 'state_economic_growth_priority', 'state_efficiency', 'state_emissions', 'state_incentive_intensity', 'state_investment_ceiling',
    'state_investment_floor', 'state_net_carbon_intensity', 'state_net_emissions', 'state_penalty_severity', 'state_production', 'state_production_level',
    'state_revenue', 'state_valuation', 'state_sustainability_index', 'state_tax_capacity', 'state_water_consumption', 'state_waste_creation', 'state_waste_recycling'
]

POLICY_STATE_FEATURES = [
    'state_Urban Green Space Expansion',
    'state_Flood defense infrastructure',
    'state_Heatwave resilience',
    'state_Sustainable Land-Use Zoning',
    'state_Single-use plastics bans',
    'state_Green Business Investments'
]

def preprocess_batch(df, state_cols, cont_actions, dis_actions):
    MAX_STATE_NO = len(state_cols)
    states = np.zeros((len(df), MAX_STATE_NO), dtype=np.float32)
    for i, col in enumerate(state_cols):
        if col in df.columns:
            states[:, i] = np.clip(df[col].values.astype(np.float64), -1e6, 1e6).astype(np.float32)

    n_cont = len(cont_actions)
    actions_cont = np.zeros((len(df), n_cont), dtype=np.float32)
    for i, name in enumerate(cont_actions):
        col_name = f"action_{name}"
        if col_name in df.columns:
            actions_cont[:, i] = df[col_name].values.astype(np.float32)
        else:
            actions_cont[:, i] = 0.0

    actions_dis = {}
    for item in dis_actions:
        name = item[0] if isinstance(item, tuple) else item
        col_name = name if name.startswith("action_") else f"action_{name}"
        if col_name in df.columns:
            actions_dis[name] = df[col_name].values.astype(np.int32)
        else:
            actions_dis[name] = np.zeros(len(df), dtype=np.int32)

    rewards = df["reward"].values.astype(np.float32).reshape(-1, 1)
    next_states = np.zeros((len(df), MAX_STATE_NO), dtype=np.float32)
    for i, col in enumerate(state_cols):
        next_col = col.replace("state_", "next_")
        if next_col in df.columns:
            next_states[:, i] = df[next_col].values.astype(np.float32)

    return states, actions_cont, actions_dis, rewards, next_states

def compute_td_target(a3c_model, next_states, rewards, gamma=0.99):
    v_next = a3c_model.sess.run(a3c_model.v, feed_dict={a3c_model.s: next_states})
    v_next = np.clip(v_next, -1e6, 1e6)
    td_target = rewards + gamma * v_next
    return td_target

for agent_class, df in data.items():
    state_scaler = StandardScaler()
    action_scaler = StandardScaler()

    summary = []
    print(agent_class, "Report")
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, shuffle=True)

    state_cols = [col for col in train_df.columns if col.startswith("state_") and col not in POLICY_STATE_FEATURES]
    dis_actions = [
        ('action_Fossil Fuel Phase-Out Regulations', 3),
        ('action_Fuel Economy Standards', 4),
        ('state_Single-use plastics bans', 3),
        ('action_Vehicle emission standards', 4)
    ]
    cont_actions = [col for col in train_df.columns if col.startswith("action_") and col not in [d[0] for d in dis_actions]]
    for col in train_df.columns:
      col_max = train_df[train_df[col] < np.inf][col].max()
      train_df.replace({col: np.inf}, col_max, inplace=True)
      test_df.replace({col: np.inf}, col_max, inplace=True)

    dataset_means = train_df[cont_actions].mean().values.astype(np.float32)
    dataset_stds = train_df[cont_actions].std().values.astype(np.float32)

    large_vals = df.columns[df.max() >= 1e3].tolist()
    log_cols = []
    for col in large_vals:
        if col.startswith("state_") or col.startswith("next_") or col.startswith("action_"):
            train_df[col] = np.log1p(train_df[col].clip(0, 1e6))
            test_df[col] = np.log1p(test_df[col].clip(0, 1e6))
            log_cols.append(col)

    large_scale = []
    for i, action_name in enumerate(cont_actions):
      median = np.median(train_df[action_name])
      if median > 1e4:
          large_scale.append(action_name)

    train_df[state_cols] = state_scaler.fit_transform(train_df[state_cols])
    test_df[state_cols] = state_scaler.transform(test_df[state_cols])
    train_df[cont_actions] = action_scaler.fit_transform(train_df[cont_actions])
    test_df[cont_actions] = action_scaler.transform(test_df[cont_actions])

    joblib.dump(state_scaler, f"{agent_class}_state_scaler.joblib")
    joblib.dump(action_scaler, f"{agent_class}_action_scaler.joblib")

    states, actions_cont, actions_dis, rewards, next_states = preprocess_batch(train_df, state_cols, cont_actions, dis_actions)
    tf.compat.v1.reset_default_graph()
    sess = tf.compat.v1.Session()

    action_continuous = []
    for name in cont_actions:
        col_min = train_df[name].min()
        col_max = train_df[name].max()
        if col_min == col_max:
            col_min -= 1.0
            col_max += 1.0
        action_continuous.append((name, col_min, col_max))

    global_a3c = A3C(scope="TrainNet", session=sess, MAX_STATE_NO=len(state_cols), action_continuous=action_continuous, action_discrete=dis_actions)
    worker = A3C(scope="WorkerNet", session=sess, MAX_STATE_NO=len(state_cols), action_continuous=action_continuous, action_discrete=dis_actions, global_net=global_a3c)
    global_target_net = A3C(scope="TargetNet", session=sess, MAX_STATE_NO=len(state_cols), action_continuous=action_continuous, action_discrete=dis_actions)

    sess.run(tf.compat.v1.global_variables_initializer())

    num_epochs = 50
    batch_size = 512
    total_steps = 0
    best_v_mean = -np.inf
    patience = 5
    wait = 0
    min_delta = 0.0005

    target_params = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope="TargetNet")
    global_params = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope="Global_Net")
    sync_target_op = [t.assign(g) for t, g in zip(target_params, global_params)]

    for epoch in range(num_epochs):
        idx = np.arange(len(states))
        np.random.shuffle(idx)
        states_shuffled = states[idx]
        actions_cont_shuffled = actions_cont[idx]
        actions_dis_shuffled = {name: val[idx] for name, val in actions_dis.items()}
        rewards_shuffled = rewards[idx]
        next_states_shuffled = next_states[idx]

        sess.run(sync_target_op)
        for start in range(0, len(states), batch_size):
            total_steps += 1
            end = start + batch_size
            batch = {
                "states": states_shuffled[start:end],
                "actions_cont": actions_cont_shuffled[start:end],
                "actions_dis": {name: val[start:end] for name, val in actions_dis_shuffled.items()},
                "rewards": rewards_shuffled[start:end],
                "next_states": next_states_shuffled[start:end],
            }
            sess.run(worker.pull_op)

            current_batch_size = len(batch["states"])
            dynamic_mean = np.tile(dataset_means, (current_batch_size, 1))
            dynamic_std = np.tile(dataset_stds, (current_batch_size, 1))

            old_lp_cont, old_lp_dis = sess.run(
                [worker.log_prob_tensor, worker.discrete_log_prob_tensor],
                feed_dict={
                    worker.s: batch["states"],
                    worker.a_cont: batch["actions_cont"],
                    worker.current_action: batch["actions_cont"],
                    worker.dataset_std: dynamic_std,
                    worker.dataset_mean: dynamic_mean,
                    **{worker.a_dis[name]: batch["actions_dis"][name] for name in worker.discrete_names}
                }
            )

            reward_std = np.std(batch["rewards"])
            if reward_std < 1e-6:
                batch_rewards_scaled = batch["rewards"] - np.mean(batch["rewards"])
            else:
                batch_rewards_scaled = (batch["rewards"] - np.mean(batch["rewards"])) / reward_std

            td_target = compute_td_target(global_target_net, batch["next_states"], batch_rewards_scaled, gamma=0.8)

            if total_steps % (150 + np.random.randint(0, 100)) == 0:
                global_a3c.sess.run(global_a3c.pull_op)

            feed_dict = {
                worker.s: batch["states"],
                worker.a_cont: batch["actions_cont"],
                worker.current_action: batch["actions_cont"],
                worker.v_target: td_target,
                worker.old_log_prob: old_lp_cont,
                worker.old_log_prob_d: old_lp_dis,
                worker.dataset_mean: dynamic_mean,
                worker.dataset_std: dynamic_std,
            }
            for name in worker.discrete_names:
                feed_dict[worker.a_dis[name]] = batch["actions_dis"][name]

            critic_loss, actor_loss_c, actor_loss_d, _ = sess.run(
                [worker.critic_loss, worker.actor_loss_c, worker.actor_loss_d, worker.train_op],
                feed_dict
            )

        print(f"Critic: {critic_loss:.4f} Actor_cont: {actor_loss_c:.4f} Actor_dis: {actor_loss_d:.4f}")
        print(f"Epoch {epoch+1}/{num_epochs} completed\n")

        test_states = test_df[state_cols].values
        v_preds = sess.run(global_a3c.v, {global_a3c.s: test_states})
        current_v_avg = np.mean(v_preds)
        v_change = abs(current_v_avg - best_v_mean)

        if v_change > min_delta:
            best_v_mean = current_v_avg
            wait = 0
        else:
            wait += 1
            print(f"--- Early Stopping Watch: {wait}/{patience} (Delta: {v_change:.5f}) ---")
        if wait >= patience: break

    global_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope="TrainNet")
    saver = tf.compat.v1.train.Saver(var_list=global_vars)
    model_path = saver.save(sess, f"./models/{agent_class}_policy_model.ckpt")

    cont_low = np.array([x[1] for x in action_continuous], dtype=np.float32)
    cont_high = np.array([x[2] for x in action_continuous], dtype=np.float32)
    test_actions_baseline = test_df[cont_actions].values.astype(np.float32)
    test_batch_size = len(test_df)
    test_dynamic_std = np.tile(dataset_stds, (test_batch_size, 1))
    test_dynamic_mean = np.tile(dataset_means, (test_batch_size, 1))

    predicted_means, predicted_deltas, log_vars = global_a3c.sess.run(
        [global_a3c.mean, global_a3c.delta, global_a3c.log_var],
        feed_dict={
            global_a3c.s: test_states,
            global_a3c.current_action: test_actions_baseline,
            global_a3c.dataset_std: test_dynamic_std,
            global_a3c.dataset_mean: test_dynamic_mean
        }
    )
    deltas = (predicted_means / test_actions_baseline) - 1
    a_pred_unscaled = action_scaler.inverse_transform(predicted_means)
    y_true_unscaled_all = action_scaler.inverse_transform(test_actions_baseline)

    for i in range(a_pred_unscaled.shape[1]):
      if np.std(a_pred_unscaled[:, i]) < 1e-6:
          mean_val = np.mean(a_pred_unscaled[:, i])
          noise = np.random.normal(0, 1e-4 * (mean_val + 1e-6), size=a_pred_unscaled[:, i].shape)
          a_pred_unscaled[:, i] += noise

    a_pred_real = np.zeros_like(a_pred_unscaled)
    y_true_real = np.zeros_like(y_true_unscaled_all)
    threshold = 1e4

    for i, action_name in enumerate(cont_actions):

        test_rewards = test_df["reward"].values.flatten()
        delta = predicted_means[:, i].astype(np.float64)

        y_true = test_df[action_name].values.astype(np.float64)
        is_large = np.median(np.expm1(y_true)) > threshold
        is_log = action_name in log_cols

        if action_name in log_cols:
            a_pred_real[:, i] = np.expm1(a_pred_unscaled[:, i])
            y_true_real[:, i] = np.expm1(y_true_unscaled_all[:, i])
        else:
            a_pred_real[:, i] = a_pred_unscaled[:, i]
            y_true_real[:, i] = y_true_unscaled_all[:, i]


        y_true = y_true_real[:, i]
        y_pred = a_pred_real[:, i]
        diff = y_pred - y_true
        test_rewards = test_df["reward"].values.flatten()

        p_std = np.std(y_pred)
        d_std = np.std(y_true)
        r_std = np.std(test_rewards)

        action_corr = np.corrcoef(a_pred_unscaled[:, i], y_true_unscaled_all[:, i])[0, 1]
        reward_corr = np.corrcoef(a_pred_unscaled[:, i], test_rewards)[0, 1]
        num_changed = np.sum(np.abs(diff) > (1e-4 * np.abs(np.mean(y_true) + 1e-8)))
        if p_std < 1e-7:
          vol_display = "Optimal"
          corr_display = "Converged"
        else:
            vol_display = f"{p_std:.4f}"
            corr_display = f"{reward_corr:.4f}"

        print(action_name)
        print(f"Mean Recommendation: {np.mean(y_pred):.4f}")
        print(f"Avg Change from Baseline: {np.mean(diff):+.4f}")
        print(f"MAE: {np.mean(np.abs(diff)):.4f}")
        print(f"Modified Rows: {num_changed}/{len(diff)}")
        print(f"Policy Volatility: {vol_display}")
        print(f"Correlation to Reward: {corr_display}")
        print(f"Model Confidence (LogVar): {np.mean(log_vars[:, i]):.4f}\n")

        summary.append({
            "Agent": agent_class,
            "Action": action_name,
            "Mean_Rec": np.mean(y_pred),
            "Avg_Change": np.mean(diff),
            "MAE": np.mean(np.abs(diff)),
            "Volatility": vol_display,
            "Reward_Corr": corr_display,
            "Confidence": np.mean(log_vars[:, i])
        })

        summary_df = pd.DataFrame(summary)
        summary_df.to_csv(f"{agent_class}_results.csv", index=False)

    sample_df = test_df.sample(frac=0.05)
    state_cols = [c for c in sample_df.columns if 'state_' in c and sample_df[c].var() > 1e-8]
    action_cols = [a for a in sample_df.columns if 'action_' in a and sample_df[a].var() > 1e-8]

    corr_matrix = sample_df[state_cols + action_cols].corr()

    signature_data = []

    for s_col in state_cols:
        for a_col in action_cols:
            corr_val = corr_matrix.loc[s_col, a_col]
            if abs(corr_val) > 0.005:
                direction = "INCREASES" if corr_val > 0 else "DECREASES"

                signature_data.append({
                    "Agent": agent_class,
                    "State_Variable": s_col,
                    "Action": a_col,
                    "Impact": direction,
                    "Correlation_r": round(corr_val, 4)
                })

    signature_df = pd.DataFrame(signature_data)
    signature_df.to_csv(f"{agent_class}_policy_signature.csv", index=False)


Sustainability Report


Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.


Critic: 0.5433 Actor_cont: -1.5646 Actor_dis: 0.2374
Epoch 1/50 completed

Critic: 0.2306 Actor_cont: -2.0355 Actor_dis: -0.0209
Epoch 2/50 completed

Critic: 0.1824 Actor_cont: -2.3408 Actor_dis: 0.1297
Epoch 3/50 completed

Critic: 0.2052 Actor_cont: -2.2819 Actor_dis: 0.2629
Epoch 4/50 completed

Critic: 0.0974 Actor_cont: -2.8126 Actor_dis: -0.0544
Epoch 5/50 completed

Critic: 0.0648 Actor_cont: -2.7851 Actor_dis: 0.0526
Epoch 6/50 completed

Critic: 0.0911 Actor_cont: -2.7501 Actor_dis: 0.0427
Epoch 7/50 completed

Critic: 0.1038 Actor_cont: -2.7277 Actor_dis: 0.0667
Epoch 8/50 completed

Critic: 0.0419 Actor_cont: -3.2803 Actor_dis: 0.0153
Epoch 9/50 completed

Critic: 0.0860 Actor_cont: -3.5812 Actor_dis: -0.0727
Epoch 10/50 completed

Critic: 0.0780 Actor_cont: -3.4559 Actor_dis: 0.0548
Epoch 11/50 completed

Critic: 0.1001 Actor_cont: -4.1430 Actor_dis: 0.0320
Epoch 12/50 completed

Critic: 0.0683 Actor_cont: -4.1774 Actor_dis: -0.0395
Epoch 13/50 completed

Critic: 0.0746 Ac

  c /= stddev[:, None]
  c /= stddev[None, :]


action_Carbon Tax
Mean Recommendation: 237.1055
Avg Change from Baseline: +139.0440
MAE: 139.0440
Modified Rows: 200000/200000
Policy Volatility: 0.0001
Correlation to Reward: nan
Model Confidence (LogVar): 0.8881

action_Climate-Resilient Infrastructure Investment
Mean Recommendation: 1040508.1250
Avg Change from Baseline: +40507.9883
MAE: 1098040.5000
Modified Rows: 200000/200000
Policy Volatility: 1521394.6250
Correlation to Reward: -0.2658
Model Confidence (LogVar): 0.9991

action_Electric Vehicle (EV) Subsidies
Mean Recommendation: 0.2978
Avg Change from Baseline: -0.1948
MAE: 0.2588
Modified Rows: 199944/200000
Policy Volatility: 0.1115
Correlation to Reward: -0.5294
Model Confidence (LogVar): 1.0000

action_Energy Efficiency Incentives
Mean Recommendation: 0.3605
Avg Change from Baseline: -0.1239
MAE: 0.2542
Modified Rows: 199980/200000
Policy Volatility: 0.1718
Correlation to Reward: -0.2540
Model Confidence (LogVar): 0.9953

action_Flood defense infrastructure
Mean Recommendat

  c /= stddev[:, None]
  c /= stddev[None, :]


action_Carbon Tax
Mean Recommendation: 205.4506
Avg Change from Baseline: +99.1630
MAE: 99.1630
Modified Rows: 5000/5000
Policy Volatility: 0.0000
Correlation to Reward: nan
Model Confidence (LogVar): -1.0000

action_Climate-Resilient Infrastructure Investment
Mean Recommendation: 4478401.5000
Avg Change from Baseline: +3478401.5000
MAE: 3479412.7500
Modified Rows: 5000/5000
Policy Volatility: 116309.6719
Correlation to Reward: -0.0093
Model Confidence (LogVar): -1.0000

action_Electric Vehicle (EV) Subsidies
Mean Recommendation: 0.7820
Avg Change from Baseline: +0.1737
MAE: 0.2005
Modified Rows: 5000/5000
Policy Volatility: 0.0046
Correlation to Reward: -0.2343
Model Confidence (LogVar): 0.9606

action_Energy Efficiency Incentives
Mean Recommendation: 0.2841
Avg Change from Baseline: -0.2551
MAE: 0.2926
Modified Rows: 5000/5000
Policy Volatility: 0.0026
Correlation to Reward: 0.1748
Model Confidence (LogVar): 1.0000

action_Flood defense infrastructure
Mean Recommendation: 4453718.500

  c /= stddev[:, None]
  c /= stddev[None, :]


Model Confidence (LogVar): 0.9197

action_Single-use plastics bans
Mean Recommendation: 1.1464
Avg Change from Baseline: +0.1388
MAE: 1.2039
Modified Rows: 166508/200000
Policy Volatility: 1.2102
Correlation to Reward: 0.2372
Model Confidence (LogVar): 0.8443

action_Sustainable Land-Use Zoning
Mean Recommendation: 4096557.7500
Avg Change from Baseline: +3096557.0000
MAE: 3214915.2500
Modified Rows: 200000/200000
Policy Volatility: 1198351.3750
Correlation to Reward: 0.2668
Model Confidence (LogVar): 0.9844

action_Urban Green Space Expansion
Mean Recommendation: 63.6343
Avg Change from Baseline: -38.0578
MAE: 105.6936
Modified Rows: 200000/200000
Policy Volatility: 99.6972
Correlation to Reward: -0.5609
Model Confidence (LogVar): -0.8836

action_Waste Management Reforms
Mean Recommendation: 3890825.2500
Avg Change from Baseline: +2890825.5000
MAE: 3072409.0000
Modified Rows: 200000/200000
Policy Volatility: 1442773.0000
Correlation to Reward: 0.4744
Model Confidence (LogVar): 0.9762



  c /= stddev[:, None]
  c /= stddev[None, :]


action_Single-use plastics bans
Mean Recommendation: 0.8460
Avg Change from Baseline: -0.1616
MAE: 0.9906
Modified Rows: 174018/200000
Policy Volatility: 0.9041
Correlation to Reward: -0.2301
Model Confidence (LogVar): -1.0000

action_Sustainable Land-Use Zoning
Mean Recommendation: 1338574.5000
Avg Change from Baseline: +338574.4375
MAE: 1304506.1250
Modified Rows: 200000/200000
Policy Volatility: 1746714.5000
Correlation to Reward: -0.3506
Model Confidence (LogVar): 0.8890

action_Urban Green Space Expansion
Mean Recommendation: 49.4053
Avg Change from Baseline: -52.2868
MAE: 103.4739
Modified Rows: 200000/200000
Policy Volatility: 90.2681
Correlation to Reward: -0.5770
Model Confidence (LogVar): 0.8694

action_Waste Management Reforms
Mean Recommendation: 2699629.7500
Avg Change from Baseline: +1699629.7500
MAE: 2247287.0000
Modified Rows: 200000/200000
Policy Volatility: 2038462.1250
Correlation to Reward: 0.6219
Model Confidence (LogVar): 0.8620

action_Water Consumption Tax
Mean 

  c /= stddev[:, None]
  c /= stddev[None, :]


In [118]:
results={}
for agent_class in agents:
  print(f"Agent: {agent_class}")
  file_name = f"{agent_class}_results.csv"
  summary = pd.read_csv(file_name)
  results[agent_class]=summary

Agent: Sustainability
Agent: DecisionAgents
Agent: BusinessAgents
Agent: EnergyAgents
Agent: ConsumerAgents


In [None]:
policy_signature={}
for agent_class in agents:
  print(f"Agent: {agent_class}")
  file_name = f"{agent_class}_policy_signature.csv"
  summary = pd.read_csv(file_name)
  policy_signature[agent_class]=summary
  print(summary.columns)

Agent: Sustainability
Index(['Agent', 'State_Variable', 'Action', 'Impact', 'Correlation_r'], dtype='object')
Agent: DecisionAgents
Index(['Agent', 'State_Variable', 'Action', 'Impact', 'Correlation_r'], dtype='object')
Agent: BusinessAgents
Index(['Agent', 'State_Variable', 'Action', 'Impact', 'Correlation_r'], dtype='object')
Agent: EnergyAgents
Index(['Agent', 'State_Variable', 'Action', 'Impact', 'Correlation_r'], dtype='object')
Agent: ConsumerAgents
Index(['Agent', 'State_Variable', 'Action', 'Impact', 'Correlation_r'], dtype='object')


In [23]:
!pip install -U spacy
!pip install spacy_transformers


Collecting spacy_transformers
  Downloading spacy_transformers-1.3.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting transformers<4.50.0,>=3.4.0 (from spacy_transformers)
  Downloading transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Collecting spacy-alignments<1.0.0,>=0.7.2 (from spacy_transformers)
  Downloading spacy_alignments-0.9.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.6 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers<4.50.0,>=3.4.0->spacy_transformers)
  Downloading tokenizers-0.21.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading spacy_transformers-1.3.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (795 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m795.8/795.8 kB[0m [31m30.7 MB/s[0m eta [36m0:00:00[

In [24]:
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m74.1 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [26]:
patterns = [

    {"label": "STATE_VARIABLE", "pattern": [{"LIKE_NUM": True}, {"LEMMA": {"IN": ["celsius", "kelvin", "fahrenheit", "c", "f", "k", "mm", "inch", "cm", "ton", "kg", "t", "liter", "m3"]}}]},
    {"label": "STATE_VARIABLE", "pattern": [{"LIKE_NUM": True}, {"TEXT": {"REGEX": r"^([°º][CcFf]?|[CcFfKk])$"}}]},
    {"label": "STATE_VARIABLE", "pattern": [{"LIKE_NUM": True}, {"TEXT": {"IN": ["%", "percent"]}}]},
    {"label": "STATE_VARIABLE", "pattern": [{"IS_CURRENCY": True, "OP": "?"}, {"LIKE_NUM": True}, {"LEMMA": {"IN": ["million", "billion", "trillion"]}}]},
    {"label": "STATE_VARIABLE", "pattern": [{"LIKE_NUM": True}, {"LEMMA": {"IN": ["hectare", "ha", "acre", "sqkm", "km2", "m2"]}}]},
    {"label": "STATE_VARIABLE", "pattern": [{"LIKE_NUM": True}, {"TEXT": {"REGEX": "(?i)kg/kwh|g/kwh|t/mwh|co2e"}}]},
    {"label": "STATE_VARIABLE", "pattern": [{"LIKE_NUM": True}, {"LEMMA": {"IN": ["m3", "liter", "gal", "tonne", "ton"]}}, {"TEXT": {"IN": ["/day", "/year", "capita"]}, "OP": "?"}]},
    {"label": "STATE_VARIABLE", "pattern": [{"LIKE_NUM": True}, {"LEMMA": {"IN": ["year", "month", "day", "week"]}}]},

    {"label": "STATE_VARIABLE", "pattern": [{"LEMMA": {"IN": ["temperature", "precipitation", "humidity", "population", "gdp", "employment", "growth"]}}, {"LEMMA": "rate", "OP": "?"}]},
    {"label": "STATE_VARIABLE", "pattern": [{"LEMMA": {"IN": ["air", "water"]}}, {"LEMMA": {"IN": ["pollution", "quality"]}}, {"LEMMA": "index"}]},
    {"label": "STATE_VARIABLE", "pattern": [{"LEMMA": "carbon"}, {"LEMMA": "dioxide"}, {"LEMMA": "emission"}]},
    {"label": "STATE_VARIABLE", "pattern": [{"LEMMA": "renewable"}, {"LEMMA": "share"}]},

    {"label": "ENERGY_VARIABLE", "pattern": [{"LIKE_NUM": True}, {"LEMMA": {"IN": ["mw", "gw", "kw", "kwh", "mwh", "gj"]}}]},
    {"label": "ENERGY_VARIABLE", "pattern": [{"LEMMA": "energy"}, {"LEMMA": {"IN": ["type", "capacity", "production", "intensity", "consumption"]}}]},

    {"label": "ACTION", "pattern": [{"LEMMA": {"IN": ["carbon", "water"]}}, {"LEMMA": "tax"}]},
    {"label": "ACTION", "pattern": [{"LEMMA": {"IN": ["renewable", "electric", "vehicle", "ev", "energy"]}}, {"LEMMA": {"IN": ["subsidy", "incentive", "standard"]}}]},
    {"label": "ACTION", "pattern": [{"LEMMA": "fossil"}, {"LEMMA": "fuel"}, {"LEMMA": "phase-out"}]},
    {"label": "ACTION", "pattern": [{"LEMMA": {"IN": ["urban", "public", "transport"]}}, {"LEMMA": "expansion"}]},
    {"label": "ACTION", "pattern": [{"LEMMA": {"IN": ["waste", "land-use", "flood", "heatwave"]}}, {"LEMMA": {"IN": ["reform", "zoning", "defense", "resilience", "management"]}}]},
    {"label": "ACTION", "pattern": [{"LEMMA": "single-use"}, {"LEMMA": "plastic"}, {"LEMMA": "ban"}]},

    {"label": "BUSINESS_VARIABLE", "pattern": [{"LEMMA": "business"}, {"LEMMA": {"IN": ["name", "type", "investment"]}}]},
    {"label": "BUSINESS_VARIABLE", "pattern": [{"LEMMA": {"IN": ["valuation", "revenue", "wealth"]}}]},
    {"label": "BUSINESS_VARIABLE", "pattern": [{"IS_CURRENCY": True}, {"LIKE_NUM": True}]},

    {"label": "DECISION_VARIABLE", "pattern": [{"LEMMA": {"IN": ["authority", "budget", "sector", "regulation", "penalty"]}}, {"LEMMA": {"IN": ["strictness", "severity", "capacity", "ceiling", "floor"]}, "OP": "?"}]},
    {"label": "DECISION_VARIABLE", "pattern": [{"LEMMA": {"IN": ["economic", "emission", "sustainability", "social"]}}, {"LEMMA": "priority"}]},
    {"label": "CONSUMER_VARIABLE", "pattern": [{"LEMMA": "consumer"}, {"LEMMA": "class"}]},
    {"label": "CONSUMER_VARIABLE", "pattern": [{"LEMMA": "public"}, {"LEMMA": "satisfaction"}]}
]

In [27]:
params = ["production_level", "net_emissions", "net_carbon_intensity", "energy_consumption",
"water_consumption", "resources_consumption", "waste_creation", "waste_recycling",
"Temperature", "Precipitation", "Humidity", "Air Pollution Index",
"Water Quality Index", "Carbon Dioxide Emissions Per Capita", "Electricity Consumption Per Capita", \
"Renewable Share", "Water Consumption Per Capita", "Population", "GDP",
"Employment Rate", "Waste Management Efficiency", "Energy Efficiency",
"Urban Green Space Expansion", "Flood defense infrastructure", "Heatwave resilience",
"Sustainable Land-Use Zoning", "Single-use plastics bans", "Green Business Investments",
"Carbon Tax", "Climate-Resilient Infrastructure Investment",
"Electric Vehicle (EV) Subsidies", "Energy Efficiency Incentives",
"Fossil Fuel Phase-Out Regulations", "Fuel Economy Standards", "Public transport expansion",
"Recycling Rate", "Renewable energy subsidies", "Vehicle emission standards",
"Waste Management Reforms", "Water Consumption Tax", "Water conservation measures",
"budget", "tax_capacity", "subsidy_capacity", "investment_ceiling", "investment_floor", "economic_growth_priority",
"emissions_priority", "sustainability_priority", "energy_priority", "social_welfare_priority",
"regulation_strictness", "penalty_severity", "incentive_intensity", "valuation", "revenue",
"growth_rate", "sustainability_index", "capacity", "production", "efficiency", "carbon_intensity",
"emissions", "class_population", "class_wealth", "public_satisfaction"]

In [83]:
import spacy
from spacy.pipeline import EntityRuler
from spacy.matcher import DependencyMatcher

def feature_extraction(text):
    nlp = spacy.load("en_core_web_sm")
    if "entity_ruler" not in nlp.pipe_names:
        ruler = nlp.add_pipe("entity_ruler", before="ner", config={"overwrite_ents": True})
        ruler.add_patterns(patterns)

    doc = nlp(text)
    unique_res = {}
    target_params = [p.lower() for p in params]

    for sent in doc.sents:
        sent_text = sent.text.lower()

        sent_ents = [
            ent for ent in sent.ents
            if ent.label_ in ["MONEY", "QUANTITY", "PERCENT", "CARDINAL"]
            or any(char.isdigit() for char in ent.text)
        ]

        if not sent_ents:
            continue

        for p in target_params:
            if p in sent_text:
                param_start_pos = sent_text.find(p)

                best_ent = None
                min_dist = float('inf')

                for ent in sent_ents:
                    ent_text = ent.text.lower()
                    if ent_text == p or ent_text.isdigit():
                        continue

                    ent_start_pos = sent.text.lower().find(ent_text)
                    dist = abs(ent_start_pos - param_start_pos)

                    if dist < min_dist:
                        min_dist = dist
                        best_ent = ent.text.strip()

                if best_ent:
                    unique_res[p] = best_ent

    return unique_res

In [29]:
import re

def quantify(text):
    if not text:
        return 0.0
    text = str(text).lower().replace(',', '').strip()

    number_match = re.search(r"[-+]?\d*\.\d+|\d+", text)
    if not number_match:
        return 0.0
    val = float(number_match.group())

    if "trillion" in text or "t" in text.split():
        val *= 1e12
    elif "billion" in text or "b" in text.split():
        val *= 1e9
    elif "million" in text or "m" in text.split():
        val *= 1e6
    if "%" in text or "percent" in text:
        val /= 100.0
    if "gw" in text:
        val *= 1000
    elif "kw" in text:
        val /= 1000
    elif "kwh" in text:
        val /= 1000
    if "ton" in text or "t" in text.split():
        pass
    if "mm" in text:
        pass
    if "sqkm" in text or "km2" in text:
        val = val *10000
    elif "hectare" in text or "ha" in text:
        val = val
    if "f" in text or "fahrenheit" in text:
        val = (val - 32) * 5/9
    elif "k" in text or "kelvin" in text:
        val = val - 273.15

    return val

In [43]:
from difflib import SequenceMatcher
MAPPING = {
    'Air Pollution Index': 'state_Air Pollution Index',
    'Carbon Dioxide Emissions Per Capita': 'state_Carbon Dioxide Emissions Per Capita',
    'Employment Rate': 'state_Employment Rate',
    'Energy Efficiency': 'state_Energy Efficiency',
    'Flood defense infrastructure': 'state_Flood defense infrastructure',
    'GDP': 'state_GDP',
    'Green Business Investments': 'state_Green Business Investments',
    'Humidity': 'state_Humidity',
    'Precipitation': 'state_Precipitation',
    'Population': 'state_Population',
    'Single-use plastics bans': 'state_Single-use plastics bans',
    'Sustainable Land-Use Zoning': 'state_Sustainable Land-Use Zoning',
    'Temperature': 'state_Temperature',
    'Urban Green Space Expansion': 'state_Urban Green Space Expansion',
    'Waste Management Efficiency': 'state_Waste Management Efficiency',
    'budget': 'state_budget',
    'capacity': 'state_capacity',
    'carbon_intensity': 'state_carbon_intensity',
    'class_population': 'state_class_population',
    'class_wealth': 'state_class_wealth',
    'economic_growth_priority': 'state_economic_growth_priority',
    'efficiency': 'state_efficiency',
    'emissions': 'state_emissions',
    'incentive_intensity': 'state_incentive_intensity',
    'investment_ceiling': 'state_investment_ceiling',
    'investment_floor': 'state_investment_floor',
    'net_carbon_intensity': 'state_net_carbon_intensity',
    'net_emissions': 'state_net_emissions',
    'penalty_severity': 'state_penalty_severity',
    'production': 'state_production',
    'production_level': 'state_production_level',
    'revenue': 'state_revenue',
    'valuation': 'state_valuation',
    'sustainability_index': 'state_sustainability_index',
    'tax_capacity': 'state_tax_capacity',
    'water_consumption': 'state_water_consumption',
    'waste_creation': 'state_waste_creation',
    'waste_recycling': 'state_waste_recycling',
    "tax rebates": "state_incentive_intensity",
    "green investment": "state_Green Business Investments",
    "factory output": "state_production_level"
}

POLICY_STATE_FEATURES_DICT = {
    'Urban Green Space Expansion': 'state_Urban Green Space Expansion',
    'Flood defense infrastructure': 'state_Flood defense infrastructure',
    'Heatwave resilience': 'state_Heatwave resilience',
    'Sustainable Land-Use Zoning': 'state_Sustainable Land-Use Zoning',
    'Single-use plastics bans': 'state_Single-use plastics bans',
    'Green Business Investments': 'state_Green Business Investments'
}

action_mapping = {
    "carbon_tax": "action_Carbon Tax",
    "climate_resilient_infrastructure_investment": "action_Climate-Resilient Infrastructure Investment",
    "electric_vehicle_subsidies": "action_Electric Vehicle (EV) Subsidies",
    "energy_efficiency_incentives": "action_Energy Efficiency Incentives",
    "flood_defense_infrastructure": "action_Flood defense infrastructure",
    "fossil_fuel_phase_out_regulations": "action_Fossil Fuel Phase-Out Regulations",
    "fuel_economy_standards": "action_Fuel Economy Standards",
    "green_business_investments": "action_Green Business Investments",
    "heatwave_resilience": "action_Heatwave resilience",
    "public_transport_expansion": "action_Public transport expansion",
    "recycling_rate": "action_Recycling Rate",
    "renewable_energy_subsidies": "action_Renewable energy subsidies",
    "single_use_plastics_bans": "action_Single-use plastics bans",
    "sustainable_land_use_zoning": "action_Sustainable Land-Use Zoning",
    "urban_green_space_expansion": "action_Urban Green Space Expansion",
    "vehicle_emission_standards": "action_Vehicle emission standards",
    "waste_management_reforms": "action_Waste Management Reforms",
    "water_consumption_tax": "action_Water Consumption Tax",
    "water_conservation_measures": "action_Water conservation measures"
}


def col_match(nlp_key,dataset_cols):
  max_match=None
  highest_score=0
  nlp_key_c=nlp_key.lower()
  for col in dataset_cols:
    if col.startswith('state_'):
      col_c=col.replace('state_',"").replace('_'," ").lower()
    elif col.startswith('action_'):
      col_c=col.replace('action_',"").replace('_'," ").lower()
    elif col.startswith('next_'):
      col_c=col.replace('next_',"").replace('_'," ").lower()

    if nlp_key_c in col_c or col_c in nlp_key_c:
      return col

    score=SequenceMatcher(None,nlp_key_c,col_c).ratio()
    if score>highest_score:
      highest_score=score
      max_match=col

  if highest_score>0.55:
    return max_match

def input_vector(nlp_res, agent_state_cols, agent_action_cols):
    final_states = {col: 0.0 for col in agent_state_cols}
    final_actions = {col: 0.0 for col in agent_action_cols}

    for k, v in nlp_res.items():
        k_lower = k.lower()
        target_state = MAPPING.get(k_lower)
        if not target_state:
            target_state = POLICY_STATE_FEATURES_DICT.get(k.title())

        target_action = action_mapping.get(k_lower.replace(" ", "_"))
        if not target_state and not target_action:
            potential_col = col_match(k, list(agent_state_cols) + list(agent_action_cols))
            if potential_col:
                if potential_col.startswith('state_'):
                    target_state = potential_col
                elif potential_col.startswith('action_'):
                    target_action = potential_col

        if target_state and target_state in final_states:
            final_states[target_state] = v
        elif target_action and target_action in final_actions:
            final_actions[target_action] = v

    return final_states, final_actions

def prepare_for_model(vector_data, state_scaler, log_cols):
  if isinstance(vector_data, dict):
        vector_df = pd.DataFrame([vector_data])
  else:
        vector_df = vector_data.copy()
  for col in log_cols:
      if col in vector_df.columns:
          vector_df[col] = np.log1p(vector_df[col].clip(0, 1e6))
  scaled_vector = state_scaler.transform(vector_df)
  return scaled_vector

def get_cont_info(agent_class):
    a_scaler = joblib.load(f"{agent_class}_action_scaler.joblib")
    cont_actions = a_scaler.feature_names_in_

    action_info = []
    for i, name in enumerate(cont_actions):
        a_min = a_scaler.data_min_[i] if hasattr(a_scaler, 'data_min_') else -1.0
        a_max = a_scaler.data_max_[i] if hasattr(a_scaler, 'data_max_') else 1.0
        if a_min == a_max:
            a_min -= 1.0
            a_max += 1.0

        action_info.append((name, a_min, a_max))

    return action_info

In [113]:
import pandas as pd
import numpy as np
import tensorflow as tf
import joblib

def extract_and_analyze(policy_text, agent_classes):
    extracted_features = feature_extraction(policy_text)
    print(extracted_features)
    nlp_res = {k: quantify(v) for k, v in extracted_features.items()}
    analysis_report = []

    for agent_class in agent_classes:
        agent_graph = tf.Graph()
        with agent_graph.as_default():
            try:
                s_scaler = joblib.load(f"{agent_class}_state_scaler.joblib")
                a_scaler = joblib.load(f"{agent_class}_action_scaler.joblib")
            except:
                continue

            state_cols = s_scaler.feature_names_in_.tolist()
            action_cols = a_scaler.feature_names_in_.tolist()
            state_dict, action_dict = input_vector(nlp_res, state_cols, action_cols)

            log_keywords = ['budget', 'gdp', 'emissions', 'population', 'revenue', 'valuation', 'infrastructure', 'capacity']

            s_df = pd.DataFrame([state_dict])[state_cols]
            for col in state_cols:
                if any(k in col.lower() for k in log_keywords):
                    s_df[col] = np.log1p(s_df[col].clip(0, None))
            s_scaled = s_scaler.transform(s_df).astype(np.float32)
            s_scaled = np.clip(s_scaled, -3.0, 3.0)

            a_df = pd.DataFrame([action_dict])[action_cols]
            for col in action_cols:
                if any(k in col.lower() for k in log_keywords):
                    a_df[col] = np.log1p(a_df[col].clip(0, None))
            a_scaled = a_scaler.transform(a_df).astype(np.float32)

            train_action_mean = a_scaler.mean_.reshape(1, -1).astype(np.float32)
            train_action_std = np.sqrt(a_scaler.var_).reshape(1, -1).astype(np.float32)

            sess = tf.compat.v1.Session()
            action_info = get_cont_info(agent_class)
            dis_actions = [
                ('action_Fossil Fuel Phase-Out Regulations', 3),
                ('action_Fuel Economy Standards', 4),
                ('state_Single-use plastics bans', 3),
                ('action_Vehicle emission standards', 4)
            ]

            model = A3C(scope="TrainNet", session=sess,
                        MAX_STATE_NO=len(state_cols),
                        action_continuous=action_info,
                        action_discrete=dis_actions)

            checkpoint_path = f"./models/{agent_class}_policy_model.ckpt"
            reader = tf.compat.v1.train.NewCheckpointReader(checkpoint_path)
            ckpt_vars = reader.get_variable_to_shape_map().keys()

            graph_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope="TrainNet")
            vars_to_restore = [v for v in graph_vars if v.name.split(':')[0] in ckpt_vars and "Adam" not in v.name]

            sess.run(tf.compat.v1.global_variables_initializer())
            if vars_to_restore:
                saver = tf.compat.v1.train.Saver(var_list=vars_to_restore)
                saver.restore(sess, checkpoint_path)

            v_val, a_mean = sess.run(
                [model.v, model.mean],
                feed_dict={
                    model.s: s_scaled,
                    model.current_action: a_scaled,
                    model.dataset_mean: train_action_mean,
                    model.dataset_std: train_action_std
                }
            )

            rec_unscaled = a_scaler.inverse_transform(a_mean)
            res = {"Agent": agent_class, "Overall_Policy_Score": float(v_val[0][0])}

            for i, col in enumerate(action_cols):
                val = rec_unscaled[0, i]
                if any(k in col.lower() for k in log_keywords):
                    val = np.expm1(val)
                res[f"Rec_{col}"] = float(val)
                res[f"Original_{col}"] = action_dict[col]

            analysis_report.append(res)
            sess.close()

    return pd.DataFrame(analysis_report)

df_results = extract_and_analyze(sample_policy_text, agents)
