# Example of using SpecialtyInsurance Simulator


In [1]:
### 1. Set Simulation Parameters

import os
from logger.arguments import get_arguments

# Read arguments from logger.arguments
sim_args, manager_args, broker_args, syndicate_args, reinsurancefirm_args, shareholder_args, risk_args = get_arguments()

# Reset arguments
sim_args["max_time"] = 30   # Simulation time span unit day
manager_args["lead_top_k"] = 3   # Number of syndicates competing for the lead quote
manager_args["follow_top_k"] = 2   # Number of syndicates following the lead strategy
broker_args["num_brokers"] = 1   # Number of brokers in the insurance market
syndicate_args["num_syndicates"] = 3   # Number of syndicates in the insurance market
shareholder_args["num_shareholders"] = 1   # Number of shareholders in the insurance market
risk_args["num_risks"] = 1  # Number of risks
risk_args["num_categories"] = 4  # Number of risk categories

# No reinsurance mechanism included in this stage
with_reinsurance = False   

# Nomber of risk models loaded to all syndicates
num_risk_models = 1   


In [2]:
### 2. Generate Catastrophes

from environment.risk_generator import RiskGenerator

# Create catastrophe list and catastrophe configurations
catastrophes, risk_model_configs = RiskGenerator(num_risk_models, sim_args, risk_args).generate_risks()
print("catastrophe starts at time", catastrophes[0].get("risk_start_time"))


catastrophe starts at time 20


In [3]:
### 3. Generate Insurance Market

from environment.market_generator import MarketGenerator

# Create lists of brokers, syndicates, reinsurancefirms, and shareholders
brokers, syndicates, reinsurancefirms, shareholders = MarketGenerator(with_reinsurance, 
                                                                      num_risk_models, 
                                                                      sim_args, 
                                                                      broker_args, 
                                                                      syndicate_args, 
                                                                      reinsurancefirm_args, 
                                                                      shareholder_args, 
                                                                      risk_model_configs).generate_agents()
for broker_id in range(len(brokers)):
    print(brokers[broker_id].data())
for syndicate_id in range(len(syndicates)):
    print(syndicates[syndicate_id].data())
    

{'broker_id': '0', 'broker_risk': [], 'broker_quote': []}
{'syndicate_id': '0', 'initial_capital': 10000000, 'current_capital': 10000000, 'premium_internal_weight': 0.5, 'interest_rate': 0.001, 'play_leader_in_contracts': [], 'play_follower_in_contracts': [], 'loss_experiency_weight': 0.2, 'volatility_weight': 0, 'underwriter_markup_recency_weight': 0.2, 'upper_premium_limit': 1.2, 'lower_premium_limit': 0.85, 'premium_reserve_ratio': 0.5, 'minimum_capital_reserve_ratio': 1, 'maximum_scaling_factor': 1, 'market_entry_probability': 0.3, 'exit_capital_threshold': 0.6, 'exit_time_limit': 24, 'premium_sensitivity': 5, 'acceptance_threshold_friction': 0.9}
{'syndicate_id': '1', 'initial_capital': 10000000, 'current_capital': 10000000, 'premium_internal_weight': 0.5, 'interest_rate': 0.001, 'play_leader_in_contracts': [], 'play_follower_in_contracts': [], 'loss_experiency_weight': 0.2, 'volatility_weight': 0, 'underwriter_markup_recency_weight': 0.2, 'upper_premium_limit': 1.2, 'lower_premiu

In [4]:
### 4. Input risk from broker

from environment.event_generator import EventGenerator

current_time = 0
broker_risk_events = EventGenerator(risk_model_configs).generate_risk_events(sim_args, brokers, catastrophes)
catastrophe_events = EventGenerator(risk_model_configs).generate_catastrophe_events(catastrophes)

for i in range(len(broker_risk_events)):
    print("risk_id:", broker_risk_events[i].risk_id, "broker_id:", broker_risk_events[i].broker_id, "risk_start_time:", broker_risk_events[i].risk_start_time,
         "risk_end_time:", broker_risk_events[i].risk_end_time, "risk_factor:", broker_risk_events[i].risk_factor,
         "risk_category:", broker_risk_events[i].risk_category, "risk_value:", broker_risk_events[i].risk_value)


risk_id: 0 broker_id: 0 risk_start_time: 0 risk_end_time: 360 risk_factor: 0.5813765128858709 risk_category: 1 risk_value: 1000.0
risk_id: 1 broker_id: 0 risk_start_time: 1 risk_end_time: 361 risk_factor: 0.5813765128858709 risk_category: 1 risk_value: 1000.0
risk_id: 2 broker_id: 0 risk_start_time: 2 risk_end_time: 362 risk_factor: 0.5813765128858709 risk_category: 1 risk_value: 1000.0
risk_id: 3 broker_id: 0 risk_start_time: 3 risk_end_time: 363 risk_factor: 0.5813765128858709 risk_category: 1 risk_value: 1000.0
risk_id: 4 broker_id: 0 risk_start_time: 4 risk_end_time: 364 risk_factor: 0.5813765128858709 risk_category: 1 risk_value: 1000.0
risk_id: 5 broker_id: 0 risk_start_time: 5 risk_end_time: 365 risk_factor: 0.5813765128858709 risk_category: 1 risk_value: 1000.0
risk_id: 6 broker_id: 0 risk_start_time: 6 risk_end_time: 366 risk_factor: 0.5813765128858709 risk_category: 1 risk_value: 1000.0
risk_id: 7 broker_id: 0 risk_start_time: 7 risk_end_time: 367 risk_factor: 0.5813765128858

In [5]:
from __future__ import annotations
import warnings
from environment.event.add_catastrophe import AddCatastropheEvent
from environment.event.add_attritionalloss import AddAttritionalLossEvent
from environment.event.add_risk import AddRiskEvent
from environment.event.add_premium import AddPremiumEvent
from environment.event.add_claim import AddClaimEvent
import numpy as np
from environment.market import NoReinsurance_RiskOne, NoReinsurance_RiskFour, Reinsurance_RiskOne, Reinsurance_RiskFour
from manager.event_handler import EventHandler

class MarketManager:
    """
    Manage and evolve the market.
    """

    def __init__(self, maxstep, manager_args, brokers, syndicates, reinsurancefirms, shareholders, risks, risk_model_configs, with_reinsurance, num_risk_models, catastrophe_events, broker_risk_events, event_handler, logger = None, time = 0):
        self.maxstep = maxstep
        self.manager_args = manager_args
        self.brokers = brokers
        self.syndicates = syndicates
        self.reinsurancefirms = reinsurancefirms
        self.shareholders = shareholders
        self.risks = risks
        self.risk_model_configs = risk_model_configs
        self.with_reinsurance = with_reinsurance
        self.num_risk_models = num_risk_models
        self.catastrophe_events = catastrophe_events
        #self.attritional_loss_events = attritional_loss_events
        self.broker_risk_events = broker_risk_events
        #self.broker_premium_events = broker_premium_events
        #self.broker_claim_events = broker_claim_events
        self.event_handler = event_handler

        self.market = NoReinsurance_RiskOne(time, self.maxstep, self.manager_args, self.brokers, self.syndicates, self.shareholders, self.risks, self.risk_model_configs, self.broker_risk_events)

        self.min_step_time = 1  # Day Event

        self.actions_to_apply = []
        # For logging keep track of all Actions ever received and whether they were accepted or refused by the manager
        self.actions_accepted = {}
        self.actions_refused = {}

        # Logging
        self.logger = logger
        if self.logger is not None:
            self.logger._store_metadata(
                self.market.time, self.market.brokers, self.market.syndicates, self.market.reinsurancefirms, self.market.shareholders, self.event_handler
            )

    def evolve_action_market(self, starting_broker_risk):
        """
        Evolve the syndicate, broker, risk in the market for step_time [day].

        Parameters
        ----------
        starting_broker_risk: AddRiskEvent
            The current risk event.
        step_time: float
            Amount of time in days to evolve the Market for.
        """

        # Update the status of brokers and syndicates in the market
        risk_id = starting_broker_risk.risk_id
        broker_id = starting_broker_risk.broker_id
        risks = {"risk_id": starting_broker_risk.risk_id,
                "risk_start_time": starting_broker_risk.risk_start_time,
                "risk_factor": starting_broker_risk.risk_factor,
                "risk_category": starting_broker_risk.risk_category,
                "risk_value": starting_broker_risk.risk_value}
        if len(self.actions_to_apply) > 0:
            lead_syndicate_id = self.actions_to_apply[0].syndicate
            follow_syndicates_id = [self.actions_to_apply[i].syndicate for i in range(1,len(self.actions_to_apply))]
            premium = starting_broker_risk.risk_value # TODO: will be changed in the future
            self.market.brokers[int(broker_id)].add_contract(risks, lead_syndicate_id, follow_syndicates_id, premium)
            self.market.syndicates[int(lead_syndicate_id)].add_leader(risks, self.actions_to_apply[0].line_size, premium)
            self.market.syndicates[int(lead_syndicate_id)].add_contract(risks, broker_id, premium)
            for sy in range(len(follow_syndicates_id)):
                self.market.syndicates[int(follow_syndicates_id[sy])].add_follower(risks, self.actions_to_apply[1+sy].line_size, premium)
                self.market.syndicates[int(follow_syndicates_id[sy])].add_contract(risks, broker_id, premium)
        else:
            self.market.brokers[0].not_underwritten_risk(risks)

    def evolve(self, step_time):

        # Storage for all the syndicates' status
        syndicates_status = {}

        # The time the market will have after being evolved
        market_start_time = self.market.time
        market_end_time = self.market.time + step_time

        upcoming_broker_risk = [
            e.risk_id for e in self.event_handler.upcoming_broker_risk.values() if isinstance(e, AddRiskEvent)
        ]

        # Enact the events
        self.market = self.event_handler.forward(self.market, step_time)

        # Track any newly-added broker_risk events
        newly_added_broker_risk_events = {
            e.risk_id: e.risk_start_time
            for e in self.event_handler.completed_broker_risk.values()
            if isinstance(e, AddRiskEvent) and (e.risk_id in upcoming_broker_risk)
        }

        broker_risk_event_start_times = np.array(
            [
                newly_added_broker_risk_events.get(risk_id)
                for risk_id in upcoming_broker_risk
                if newly_added_broker_risk_events.get(risk_id) != None
            ]
        )

        # Get the unique start times and sort
        sorted_unique_start_times = np.sort(np.unique(broker_risk_event_start_times))

        # Update all the agents, run the event at the same start time
        for start_time in sorted_unique_start_times:
            # Move along the market's time
            self.market.time = start_time

            # Get all the events starting at this time
            starting_broker_risk = None
            for i in range(len(self.broker_risk_events)):
                if self.broker_risk_events[i].risk_start_time == start_time:
                    starting_broker_risk = self.broker_risk_events[i]

            # Move along the corresponding syndicates
            self.evolve_action_market(starting_broker_risk)

            # Empty all the actions to apply to syndicates
            self.actions_to_apply = []

        self.market.time = market_end_time

    def receive_actions(self, actions):

        # Choose the leader and save its action, the first syndicate with the highest line size wins 
        # TODO: will add selection algorithm in the future
        sum_line_size = 0
        for sy in range(len(self.market.syndicates)):
            sum_line_size += actions[sy].line_size
        
        if sum_line_size < 1:
            # Refuse the quote TODO: will add refuse in the future, action space from 0.0 to 0.9
            accept_actions = []
        else:
            # Accept the quote
            accept_actions = []
            # Find the leader
            line_size = 0
            syndicate_id = 0
            syndicate_list = []
            for sy in range(len(self.market.syndicates)):
                if actions[sy].line_size > line_size:
                    line_size = actions[sy].line_size
                    syndicate_id = sy
            syndicate_list.append(syndicate_id)
            accept_actions.append(actions[syndicate_id])
            # Assign line size to the rest syndicates, FIFO
            rest_line_size = 1 - line_size
            while rest_line_size > 0:
                for sy in range(len(self.market.syndicates)):
                    if sy not in syndicate_list:
                        if actions[sy].line_size > rest_line_size:
                            actions[sy].line_size = rest_line_size
                            accept_actions.append(actions[sy])
                            syndicate_list.append(sy)
                            rest_line_size -= actions[sy].line_size
                            break
                        else:
                            rest_line_size -= actions[sy].line_size
                            accept_actions.append(actions[sy])
                            syndicate_list.append(sy)
        # Save Actions to issue
        self.actions_to_apply = accept_actions


In [6]:
### 5. Create Multi-agent Environment to get access to the market performance

import gym
from environment.environment import SpecialtyInsuranceMarketEnv
from environment.event_generator import EventGenerator
from manager.ai_model.action import Action
from manager import EventHandler

class MultiAgentBasedModel(SpecialtyInsuranceMarketEnv):

    def __init__(self, sim_args, manager_args, brokers, syndicates, reinsurancefirms, shareholders, catastrophes, risk_model_configs, with_reinsurance, num_risk_models, dt = 1):

        self.sim_args = sim_args
        self.maxstep = self.sim_args["max_time"]
        self.manager_args = manager_args
        self.brokers = brokers
        self.initial_brokers = brokers
        self.syndicates = syndicates
        self.initial_syndicates = syndicates
        self.reinsurancefirms = reinsurancefirms
        self.initial_reinsurancefirms = reinsurancefirms
        self.shareholders = shareholders
        self.initial_shareholders = shareholders
        self.risks = catastrophes
        self.initial_risks = catastrophes
        self.risk_model_configs = risk_model_configs
        self.with_reinsurance = with_reinsurance
        self.num_risk_models = num_risk_models
        self.dt = dt
        self.mm = None
        self.event_handler = None

        # Active syndicate list
        self.syndicate_active_list = []
        # Initialise events, actions, and states 
        self.catastrophe_events = []
        self.broker_risk_events = []
        self.action_map_dict = {}
        self.state_encoder_dict = {}

        # Define Action Space, Define Observation Space
        self.n = len(self.syndicates)
        self.agents = {self.syndicates[i].syndicate_id for i in range(self.n)} 
        self._agent_ids = set(self.agents)
        self.dones = set()
        self._spaces_in_preferred_format = True
        self.observation_space = gym.spaces.Dict({
            self.syndicates[i].syndicate_id: gym.spaces.Box(low=np.array([-1000000,-1000000,-1000000,-1000000,-1000000,-1000000]), 
                                                     high=np.array([1000000,1000000,3000000,3000000,3000000,3000000]), dtype = np.float32) for i in range(self.n)
        })
        self.action_space = gym.spaces.Dict({
            self.syndicates[i].syndicate_id: gym.spaces.Box(0.5, 0.9, dtype = np.float32) for i in range(self.n)})

        super(MultiAgentBasedModel, self).__init__(sim_args = self.sim_args, 
                                                   manager_args = self.manager_args , 
                                                   brokers = self.brokers, 
                                                   syndicates = self.syndicates, 
                                                   reinsurancefirms = self.reinsurancefirms, 
                                                   shareholders = self.shareholders, 
                                                   risks = self.risks, 
                                                   risk_model_configs = self.risk_model_configs, 
                                                   with_reinsurance = self.with_reinsurance, 
                                                   num_risk_models = self.num_risk_models,
                                                   dt = 1)
        # Reset the environmnet
        self.reset()

    def reset(self, seed = None, options = None):
        super().reset(seed = seed)
        
        # Reset the environment to an initial state
        self.brokers = self.initial_brokers
        self.syndicates = self.initial_syndicates
        self.reinsurancefirms = self.initial_reinsurancefirms
        self.shareholders = self.initial_shareholders
        self.risks = self.initial_risks
        # Broker risk event daily: TODO: broker generate risk according to poisson distribution
        self.catastrophe_events = catastrophe_events
        self.broker_risk_events = broker_risk_events
        self.event_handler = EventHandler(self.maxstep, self.catastrophe_events, self.broker_risk_events)
        # Initiate market manager
        self.mm = MarketManager(self.maxstep, self.manager_args, self.brokers, self.syndicates, self.reinsurancefirms, self.shareholders, self.risks, 
                                self.risk_model_configs, self.with_reinsurance, self.num_risk_models, self.catastrophe_events, self.broker_risk_events, 
                                self.event_handler)
        #self.mm.evolve(self.dt)
        
        # Set per syndicate active status and build status list
        self.syndicate_active_list = []   # Store syndicates currently in the market
        for sy in range(len(self.mm.market.syndicates)):
            if self.mm.market.syndicates[sy].status == True:
                self.syndicate_active_list.append(self.mm.market.syndicates[sy].syndicate_id)

        # Create action map and state list
        info_dict = {}
        for sy in range(len(self.mm.market.syndicates)):
            self.action_map_dict[self.mm.market.syndicates[sy].syndicate_id] = self.action_map_creator(self.mm.market.syndicates[sy], 0)
            self.state_encoder_dict[self.mm.market.syndicates[sy].syndicate_id] = self.state_encoder(self.mm.market.syndicates[sy].syndicate_id)
            info_dict[self.mm.market.syndicates[sy].syndicate_id] = None

        # Initiate time step
        self.timestep = -1
        self.step_track = 0
        self.log = []

        return self.state_encoder_dict, info_dict

    def step(self, action_dict):

        obs_dict, reward_dict, terminated_dict, info_dict = {}, {}, {}, {}
        flag_dict = {}

        # Update environemnt after actions
        parsed_actions = []        
        for syndicate_id, action in action_dict.items():
            # update action map
            self.action_map = self.action_map_creator(self.mm.market.syndicates[int(syndicate_id)],action)
            parsed_ac2add = self.action_map
            parsed_actions.append(parsed_ac2add)
        
        self.send_action2env(parsed_actions)

        # Update broker_risk_events, broker_premium_events, broker_claim_events, event_handler, market manager
        """self.broker_premium_events = EventGenerator(self.risk_model_configs).generate_premium_events(self.brokers, self.timestep)
        self.event_handler.add_premium_events(self.broker_premium_events)
        for i in range(len(self.catastrophe_events)):
            if self.catastrophe_events[i].risk_start_time == self.timestep:
                self.broker_claim_events = EventGenerator(self.risk_model_configs).generate_claim_events(self.brokers, self.timestep)
                self.event_handler.add_claim_events(self.broker_claim_events)
        self.mm.update_premium_events(self.broker_premium_events, self.event_handler)
        self.mm.update_claim_events(self.broker_claim_events, self.event_handler)"""

        
        self.mm.evolve(self.dt)
        print(self.mm.market.syndicates[0].current_capital_category)
        print(self.mm.market.syndicates[1].current_capital_category)
        print(self.mm.market.syndicates[2].current_capital_category)
        self.timestep += 1

        # Compute rewards and get next observation
        for syndicate_id, action in action_dict.items():
            reward_dict[syndicate_id] = self.compute_reward(action, syndicate_id)
            obs_dict[syndicate_id]= self.state_encoder(syndicate_id)
            info_dict[syndicate_id] = {}
            flag_dict[syndicate_id] = False
            terminated_dict[syndicate_id] = self.check_termination(syndicate_id)
            if terminated_dict[syndicate_id]:
                self.dones.add(i)
        # Update plot 
        self.draw2file(self.mm.market)

        # All done termination check
        all_terminated = True
        for _, syndicate_terminated in terminated_dict.items():
            if syndicate_terminated is False:
                all_terminated = False
                break
        
        terminated_dict["__all__"] = all_terminated
        flag_dict["__all__"] = all_terminated

        return obs_dict, reward_dict, terminated_dict, flag_dict, info_dict

    def check_termination(self, syndicate_id):

        # Update per syndicate status, True-active in market, False-exit market becuase of no contract or bankruptcy
        market = self.mm.market
        sy = market.syndicates[int(syndicate_id)] 

        # The simulation is done when syndicates exit or bankrupt or reach the maximum time step
        if self.timestep >= self.maxstep:
            terminated = True
        else:
            terminated = False

        return terminated

    def compute_reward(self, action, syndicate_id):

        market = self.mm.market
        # calculate reward function
        r = [0.0] * 4

        # For each insurable risk being accepted +1 or refused -1
        if(self.timestep <= self.maxstep):
            for broker_id in range(len(market.brokers)):
                for risk in range(len(market.brokers[broker_id].risks)):
                    for contract in range(len(market.brokers[broker_id].underwritten_contracts)):
                        if market.brokers[broker_id].risks[risk]["risk_id"] == market.brokers[broker_id].underwritten_contracts[contract]["risk_id"]:
                            r[0] += 1
                        else:
                            r[0] -= 1

        # For each claim being paied +1 or refused -1
        if(self.timestep <= self.maxstep):
            for claim in range(len(market.syndicates[int(syndicate_id)].paid_claim)):
                if market.syndicate[syndicate_id].paid_claim[claim]["status"] == True:
                    r[1] += 1
                else:
                    r[1] -= 1

        # Profit and Bankruptcy       
        if(self.timestep <= self.maxstep):
            initial_capital = market.syndicates[int(syndicate_id)].initial_capital
            current_capital = market.syndicates[int(syndicate_id)].current_capital
            r[2] += current_capital - initial_capital
            if (current_capital - initial_capital) < 0:
                r[3] -= 10000

        # Sum reward
        reward = 0.0
        reward += np.sum(r)

        return reward     

    def send_action2env(self, parsed_actions):               
            
        # Apply action
        if len(parsed_actions) > 0:
            self.mm.receive_actions(actions=parsed_actions) 
    
    def state_encoder(self, syndicate_id):
        
        ### Observation Space:             
        obs = []
        for risk in range(len(broker_risk_events)):
            if broker_risk_events[risk].risk_start_time == self.timestep+1:
                # Catastrophe risk category and risk value
                obs.append(broker_risk_events[risk].risk_category)
                obs.append(broker_risk_events[risk].risk_value)
        
        # Syndicates status current capital in 
        market = self.mm.market
        for num in range(len(market.syndicates[int(syndicate_id)].current_capital_category)):
            obs.append(market.syndicates[int(syndicate_id)].current_capital_category[num])
            
        return obs

    def action_map_creator(self, syndicate, line_size):

        action_map = None
        for risk in range(len(broker_risk_events)):
            if broker_risk_events[risk].risk_start_time == self.timestep+1:
                action_map = Action(syndicate.syndicate_id, line_size, broker_risk_events[risk].risk_id, broker_risk_events[risk].broker_id)
       
        return action_map
   

In [7]:
### 6. Register environment and train the model

import gymnasium as gym
import numpy as np
import ray
from ray.tune.registry import register_env
from ray import air, tune
from ray.rllib.algorithms.ppo import PPO
from ipywidgets import IntProgress
from gym.spaces import Box
from ray.rllib.policy.policy import PolicySpec
from ray.rllib.examples.policy.random_policy import RandomPolicy

insurance_args = {"sim_args": sim_args,
    "manager_args": manager_args,
    "brokers": brokers,
    "syndicates": syndicates,
    "reinsurancefirms": reinsurancefirms,
    "shareholders": shareholders,
    "catastrophes": catastrophes,
    "risk_model_configs": risk_model_configs,
    "with_reinsurance": with_reinsurance,
    "num_risk_models": num_risk_models}

def env_creator(env_config):
    return MultiAgentBasedModel(**env_config)

def policy_mapping_fn(agent_id, episode, worker, **kwargs):
        # agent0 -> main0
        # agent1 -> main1
        return f"main{agent_id[-1]}"

def ppo_trainer_creator(insurance_args):
    
    config = {
        "env": "SpecialtyInsuranceMarket-validation",
        "framework": "tf",
        "multi_agent": {"policies":{
                # The Policy we are actually learning.
                "main0": PolicySpec(
                    observation_space=gym.spaces.Box(low=np.array([-1000000,-1000000,-1000000,-1000000,-1000000,-1000000]), 
                                                     high=np.array([1000000,1000000,3000000,3000000,3000000,3000000]), dtype = np.float32),
                    action_space=gym.spaces.Box(0.5, 0.9, dtype = np.float32)
                ),
                "main1": PolicySpec(
                    observation_space=gym.spaces.Box(low=np.array([-1000000,-1000000,-1000000,-1000000,-1000000,-1000000]), 
                                                     high=np.array([1000000,1000000,3000000,3000000,3000000,3000000]), dtype = np.float32),
                    action_space=gym.spaces.Box(0.5, 0.9, dtype = np.float32)
                ),
                "random": PolicySpec(policy_class=RandomPolicy),
            }, 
                        "policy_mapping_fn": policy_mapping_fn,
                        "policies_to_train":["main0"],
        },
        "observation_space": gym.spaces.Box(low=np.array([-1000000,-1000000,-1000000,-1000000,-1000000,-1000000]), 
                                            high=np.array([1000000,1000000,3000000,3000000,3000000,3000000]), dtype = np.float32),
        "action_space": gym.spaces.Box(0.5, 0.9, dtype = np.float32),
        "env_config": insurance_args,
        "evaluation_interval": 2,
        "evaluation_duration": 20,
    }
    
    trainer = PPO(config=config)
    return trainer

# Folder for recording
top_dir = "noreinsurance_" + "_model_" + str(num_risk_models)

# Register environment
register_env("SpecialtyInsuranceMarket-validation", env_creator)

# The number of training iteration for the RL agent
num_training = 2

trainer = ppo_trainer_creator(insurance_args)
# Number of training iterations

"""for n in range(num_training):
    # Create a path to store the trained agent for each iteration
    model_filepath = f"{top_dir}/{str(n)}/saved_models"
        
    num_episode = 10

    # A training iteration includes parallel sample collection by the environment workers 
    # as well as loss calculation on the collected batch and a model update.

    bar = IntProgress(min=0, max=num_episode)
    display(bar)
    list_mean_rewards = []
    list_min_rewards = []
    list_max_rewards = []
    list_train_step = []

    for i in range(num_episode):
        trainer.train()     
        print("Progress:", i+1, "/", num_episode, end="\r")
        bar.value += 1
        if (i+1) % 2 == 0:
            list_mean_rewards.append(trainer.evaluation_metrics["evaluation"]["episode_reward_mean"])
            list_min_rewards.append(trainer.evaluation_metrics["evaluation"]["episode_reward_min"])
            list_max_rewards.append(trainer.evaluation_metrics["evaluation"]["episode_reward_max"])
            list_train_step.append(i+1)
        if i % 10 == 0:
            trainer.save(model_filepath)"""
        
    

# Can be used for game model
env = MultiAgentBasedModel(**insurance_args)
    
total_steps = 0
terminated_dict = {"__all__": False}
    
obs_dict, info_dict = env.reset()

while not terminated_dict["__all__"]:
    if total_steps % 20 == 0: print(".", end="")
        
    action_dict = trainer.compute_actions(obs_dict)  
    print(action_dict)
    total_steps += 1
        
    obs_dict, reward_dict, terminated_dict, flag_dict, info_dict = env.step(action_dict)


  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
`UnifiedLogger` will be removed in Ray 2.7.
  return UnifiedLogger(config, logdir, loggers=None)
The `JsonLogger interface is deprecated in favor of the `ray.tune.json.JsonLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `CSVLogger interface is deprecated in favor of the `ray.tune.csv.CSVLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `TBXLogger interface is deprecated in favor of the `ray.tune.tensorboardx.TBXLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
2024-03-13 09:14:38,955	INFO worker.py:1715 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
[36m(RolloutWorker pid=15562)[0m   gym.logger.warn(f"Box bound precision lowered by

.{'0': array([0.70956796], dtype=float32), '1': array([0.7311196], dtype=float32), '2': array([0.9], dtype=float32)}
[2500000.0, 2499000.0, 2500000.0, 2500000.0]
[2500000.0, 2500000.0, 2500000.0, 2500000.0]
[2500000.0, 2499000.0, 2500000.0, 2500000.0]
{'0': array([0.512263], dtype=float32), '1': array([0.5], dtype=float32), '2': array([0.9], dtype=float32)}
[2500000.0, 2498000.0, 2500000.0, 2500000.0]
[2500000.0, 2500000.0, 2500000.0, 2500000.0]
[2500000.0, 2498000.0, 2500000.0, 2500000.0]
{'0': array([0.837165], dtype=float32), '1': array([0.9], dtype=float32), '2': array([0.5], dtype=float32)}
[2500000.0, 2497000.0, 2500000.0, 2500000.0]
[2500000.0, 2499000.0, 2500000.0, 2500000.0]
[2500000.0, 2498000.0, 2500000.0, 2500000.0]
{'0': array([0.9], dtype=float32), '1': array([0.6681217], dtype=float32), '2': array([0.7971457], dtype=float32)}
[2500000.0, 2496000.0, 2500000.0, 2500000.0]
[2500000.0, 2498000.0, 2500000.0, 2500000.0]
[2500000.0, 2498000.0, 2500000.0, 2500000.0]
{'0': array(

TypeError: list indices must be integers or slices, not dict

In [None]:
### 7. Test the trained model performance

def trainer_restore(self, top_dir, n):
    if n <= 9:
        path0 = top_dir
        path1 = str(n-1)
        path2 = "saved_models"
        path3 = "checkpoint_"+str(0)+str(0)+str(0)+str(0)+str(0)+str(n)
        path4 = "rllib_checkpoint.json"
    elif 9 < n <= 99:
        path0 = top_dir
        path1 = str(n-1)
        path2 = "saved_models"
        path3 = "checkpoint_"+str(0)+str(0)+str(0)+str(0)+str(n)
        path4 = "rllib_checkpoint.json"
    elif 99 < n <= 999:
        path0 = top_dir
        path1 = str(n-1)
        path2 = "saved_models"
        path3 = "checkpoint_"+str(0)+str(0)+str(0)+str(n)
        path4 = "rllib_checkpoint.json"

    # Join various path components
    self.trainer.restore(os.path.join(path0, path1, path2, path3, path4))

insurance_args = {"sim_args": sim_args,
        "manager_args": manager_args,
        "brokers": brokers,
        "syndicates": syndicates,
        "reinsurancefirms": reinsurancefirms,
        "shareholders": shareholders,
        "catastrophes": catastrophes,
        "risk_model_configs": risk_model_configs,
         "with_reinsurance": with_reinsurance,
        "num_risk_models": num_risk_models}

validation_episodes = 1
all_rewards = {}
        
for epi in range(validation_episodes):
    env = MultiAgentBasedModel(**insurance_args)
    
    print(f"\nepisode: {epi} | ")
    total_steps = 0
    terminated_dict = {"__all__": False}
    all_rewards[epi] = {}
    
    obs_dict, info_dict = env.reset()
    
    while not terminated_dict["__all__"]:
        if total_steps % 20 == 0: print(".", end="")
        
        action_dict = trainer.compute_actions(obs_dict)  
        total_steps += 1
        
        obs_dict, reward_dict, terminated_dict, flag_dict, info_dict = env.step(action_dict)
        print(total_steps)
        print(action_dict)
        for k, v in reward_dict.items():
            if k not in all_rewards[epi]:
                all_rewards[epi][k] = [v]
            else:
                all_rewards[epi][k].append(v)




In [None]:
### Main function run the simulation, two syndicates will be chosen to compete for the leader position
from manager.ai_model.runner import AIRunner
from manager.game_model.runner import GameRunner

model = 0
if model == 0: 
    runner = AIRunner(sim_args, manager_args, brokers, syndicates, reinsurancefirms, shareholders, catastrophes, risk_model_configs, with_reinsurance, num_risk_models)
elif model == 1:
    runner = GameRunner(sim_args, manager_args, brokers, syndicates, reinsurancefirms, shareholders, catastrophes, risk_model_configs, with_reinsurance, num_risk_models)
runner.run()
