# Tutorial - L1BR and same and cross-play performance evaluation

## Install the toolbox (and Ray, Tune, RLLib, PyTorch, etc.)

In [None]:
!pip uninstall -y pyarrow
!pip install bs4
## If you are NOT running on Google Colab (which you should), comment below to remove installing the necessary dependencies 
print("Setting up colab environment")
!git clone https://github.com/longtermrisk/marltoolbox.git

## Here are different installation instructions to support different algorithms
### Default install
!pip install -e marltoolbox/.

# Needed for TensorBoard
!pip install tensorflow

# # A hack to force the runtime to restart, needed to include the above dependencies.
print("Done installing! Restarting via forced crash (this is not an issue).")
import os
os._exit(0)

After you run this cell, comment all its lines.

## Plan  

3. Using the experimentation features in the toolbox  
  a. Evaluate the same and cross-play performances of MA algorithms  
  b. Evaluate the exploitability of an algorithm using Level 1 Best-Response (L1BR)  
  
(Section 1 and 2 are in the tutorial: Basics - How to use the toolbox)  


## Requirements

You have done the first tutorial (Basics - How to use the toolbox).

 # 3. Evaluating the same and cross-play performances of MA algorithms 

 ## a. Evaluate the same and cross-play performances of MA algorithms  

In [None]:
import os
import copy

import numpy as np 

import ray
from ray import tune
from ray.rllib.evaluation.sample_batch_builder import MultiAgentSampleBatchBuilder
from ray.rllib.agents.callbacks import DefaultCallbacks
from ray.rllib.agents.ppo import PPOTrainer, PPOTorchPolicy
from ray.rllib.agents.ppo.ppo_torch_policy import setup_mixins

from marltoolbox.utils import log, miscellaneous
from marltoolbox.envs.matrix_sequential_social_dilemma import IteratedBoS
from marltoolbox.utils import same_and_cross_perf, restore
from marltoolbox.utils.plot import PlotConfig
from marltoolbox.envs.utils.wrappers import add_RewardUncertaintyEnvClassWrapper

We need to train some agents with different seeds to then compute their same and cross-play performances after deployment.

We are going to train PPO agents on the BachOrStravinsky(BoS) game using the RLLib API.


In [None]:
bos_env_payoffs = IteratedBoS({}).PAYOUT_MATRIX
for a_1, action_player_1 in enumerate(['Bach','Stravinsky']):
    for a_2, action_player_2 in enumerate(['Bach','Stravinsky']):
        print(f"payoffs for action pair ({action_player_1},{action_player_2}): " 
              f"({bos_env_payoffs[a_1][a_2][0]},{bos_env_payoffs[a_1][a_2][1]})")

 Here is the configuration for such training, I will not detail it:

In [None]:
def get_trainer_config(hp):
    train_n_replicates = hp["train_n_replicates"]
    seeds = miscellaneous.get_random_seeds(train_n_replicates)
    exp_name, _ = log.log_in_current_day_dir("PPO_BoS")

    # This modification to the policy will allow us to load each policies from different checkpoints 
    # This will be used during evaluation.
    def merged_after_init(*args, **kwargs):
      setup_mixins(*args, **kwargs)
      restore.after_init_load_policy_checkpoint(*args, **kwargs)
    MyPPOPolicy = PPOTorchPolicy.with_updates(after_init=merged_after_init)

    stop_config = {
        "episodes_total": hp["episodes_total"],
    }

    env_config = {
        "players_ids": ["player_row", "player_col"],
        "max_steps": hp["steps_per_epi"],
        "get_additional_info": True,
    }

    trainer_config = {
        "env": add_RewardUncertaintyEnvClassWrapper(
                  IteratedBoS,
                  reward_uncertainty_std=0.1),
        "env_config": env_config,

        "multiagent": {
            "policies": {
                env_config["players_ids"][0]: (MyPPOPolicy,
                                               IteratedBoS.OBSERVATION_SPACE,
                                               IteratedBoS.ACTION_SPACE,
                                               {}),
                env_config["players_ids"][1]: (MyPPOPolicy,
                                               IteratedBoS.OBSERVATION_SPACE,
                                               IteratedBoS.ACTION_SPACE,
                                               {}),
            },
            "policy_mapping_fn": lambda agent_id: agent_id,
        },

        #### PPO config ####
        # Size of batches collected from each worker.
        "rollout_fragment_length": hp["steps_per_epi"], 
        # Number of timesteps collected for each SGD round. This defines the size
        # of each SGD epoch.
        "train_batch_size": hp["steps_per_epi"]*3, 
        # Total SGD batch size across all devices for SGD. This defines the
        # minibatch size within each epoch.
        "sgd_minibatch_size": hp["steps_per_epi"],
        # Number of SGD iterations in each outer loop (i.e., number of epochs to
        # execute per train batch).
        "num_sgd_iter": 3,
        "model": {
            # Number of hidden layers for fully connected net
            "fcnet_hiddens": [4, 2],
            # Nonlinearity for fully connected net (tanh, relu)
            "fcnet_activation": "relu",
        },


        "lr": hp["base_lr"],
        "lr_schedule": [(0, hp["base_lr"]),
                (int(hp["steps_per_epi"] * hp["episodes_total"]), hp["base_lr"] / 1e9)],
    
        "seed": tune.grid_search(seeds),
        "callbacks": log.get_logging_callbacks_class(),
        "framework": "torch",
        "num_workers":0,
    }

    return trainer_config, env_config, stop_config

Let's train 16 PPO agents (8 seeds x 2 players): 

In [None]:
hyperparameters = {
    "steps_per_epi": 20,
    "train_n_replicates": 8,
    "episodes_total": 200,
    "exp_name": "PPO_BoS",
    "base_lr": 5e-1,
}

trainer_config, _, stop_config = get_trainer_config(hyperparameters)
ray.shutdown()
ray.init(num_cpus=os.cpu_count(), num_gpus=0, local_mode=False)
tune_analysis = tune.run(PPOTrainer, config=trainer_config, stop=stop_config,
                    checkpoint_freq=0, checkpoint_at_end=True, name=hyperparameters["exp_name"],
                    metric="episode_reward_mean", mode="max")
ray.shutdown()

We now have 16 PPO agents trained with 8 differents random seeds, which perform well on BoS.  
We will be able to load these agents using the checkpoints created.

In [None]:
print("location of the best checkpoint",tune_analysis.best_checkpoint)
tune_analysis_per_exp = {"": tune_analysis}

We will use the SameAndCrossPlayEvaluator (from our toolbox) to evaluate the same and cross-play performances.

In [None]:
def evaluate_same_and_cross_perf(tune_analysis_per_exp, hp):
    config_eval, env_config, stop_config, hp_eval = generate_eval_config(hp)

    evaluator = same_and_cross_perf.SameAndCrossPlayEvaluator(exp_name=hp_eval["exp_name"])
    analysis_metrics_per_mode = evaluator.perform_evaluation_or_load_data(
        evaluation_config=config_eval, 
        stop_config=stop_config,
        policies_to_load_from_checkpoint=copy.deepcopy(env_config["players_ids"]),
        tune_analysis_per_exp=tune_analysis_per_exp,
        TrainerClass=PPOTrainer,
        n_cross_play_per_checkpoint=2,
        to_load_path=None)

    plot_config = PlotConfig(xlim=hp_eval["x_limits"], ylim=hp_eval["y_limits"],
                             markersize=5, alpha=1.0, jitter=hp_eval["jitter"],
                             xlabel="player 1 payoffs", ylabel="player 2 payoffs",
                             title="cross and same-play performances: BoS",
                             x_scale_multiplier=hp_eval["scale_multipliers"][0],
                             y_scale_multiplier=hp_eval["scale_multipliers"][1])
    evaluator.plot_results(analysis_metrics_per_mode, plot_config=plot_config,
                           x_axis_metric=f"policy_reward_mean/{env_config['players_ids'][0]}",
                           y_axis_metric=f"policy_reward_mean/{env_config['players_ids'][1]}")

def generate_eval_config(hp):
    
    hp_eval = copy.deepcopy(hp)
    hp_eval["steps_per_epi"]= 20
    hp_eval["episodes_total"]= 1
    hp_eval["scale_multipliers"] = (1/hp_eval["steps_per_epi"], 1/hp_eval["steps_per_epi"])
    hp_eval["base_lr"]= 0.0
    hp_eval["jitter"]= 0.05
    hp_eval["x_limits"]= (-0.5,3.5)
    hp_eval["y_limits"]= (-0.5,3.5)

    trainer_config, env_config, stop_config = get_trainer_config(hp_eval)

    trainer_config["explore"] = False
    trainer_config["seed"] = 1111
    trainer_config["train_batch_size"] = hp_eval["steps_per_epi"]

    return trainer_config, env_config, stop_config, hp_eval

ray.shutdown()
evaluate_same_and_cross_perf(tune_analysis_per_exp, hyperparameters)
ray.shutdown()

We can see the cross-play and same-play performances in the plot. We should see some failures in cross-play (close to (0,0)). If there is no failure, you can restart the notebook and run again all the cells above to produce new evaluations with new agents. Theses failures are explained by the fact that the PPO agents only learned to coordinate on playing either Bach or Stravinsky. They have not learned to adapt to a change of behavior in the other player.

##  b. Evaluate the exploitability of an algorithm using Level 1 Best-Response (L1BR)


In [None]:
import ray
from ray import tune
from ray.rllib.agents.dqn import DQNTrainer
from ray.rllib.agents.dqn.dqn_torch_policy import DQNTorchPolicy, build_q_stats
from ray.rllib.policy.policy import Policy
from ray.rllib.utils import merge_dicts
from ray.rllib.utils.schedules import PiecewiseSchedule
from ray.rllib.utils.typing import TrainerConfigDict

import torch

from marltoolbox.envs.matrix_sequential_social_dilemma import IteratedPrisonersDilemma
from marltoolbox.algos.learning_tit_for_tat.ltft import LTFT_DEFAULT_CONFIG_UPDATE, LTFT, LTFTCallbacks
from marltoolbox.algos.supervised_learning import SPLTorchPolicy
from marltoolbox.utils import log, miscellaneous, exploration, lvl1_best_response
from marltoolbox.envs.utils.wrappers import add_RewardUncertaintyEnvClassWrapper
from marltoolbox.algos import population

We are going to see if `LTFT` is exploitable after deployement in IPD. We will train two populations of agents. First the Level 0 agents that will be `LTFT` agents trained in self-play. Then we will freeze their weights like if they were deployed in production. We will then train level 1 PG agents against this population of level 0 agents.  

To train the Lvl0 `LTFT` agents, we will use the code from the first tutorial, section 2. 

In [None]:
def get_env_config(hp):
    env_config = {
        # We provide the environment class
        "env": get_env_class(),
        "env_config": {
            "players_ids": ["player_row", "player_col"],
            "max_steps": hp["n_steps_per_epi"],
        },
    }
    return env_config

def get_env_class():
    # We add a wrapper around the environment to add some randomness to the rewards returned at each step
    MyUncertainIPD = add_RewardUncertaintyEnvClassWrapper(
        IteratedPrisonersDilemma,
        reward_uncertainty_std=0.1)
    return MyUncertainIPD

def get_policies_config(hp):

    # We will need to use the DQNTrainer (from RLLib) to manage the dataflow and 
    #   to provide the required default config for the base policy we are going to use: DQNTorchPolicy

    policies_config = {
        # Inside the "multiagent" key of the RLLib config dict, we define all the policies that are going to be used
        "multiagent": {
            "policies": {
                "player_row": (
                    # The default policy is DQNTorchPolicy (as defined in our Trainable class: DQNTrainer) 
                    #   but we overwrite it to use the LTFT policy from the toolbox
                    LTFT,
                    IteratedPrisonersDilemma.OBSERVATION_SPACE,
                    IteratedPrisonersDilemma.ACTION_SPACE,
                    # We provide an additionnal configuration dict to this policy. 
                    #   It will be merged with a copy of the rllib_config that we will provide to the RLLib Trainable (DQNTrainer).
                    get_ltft_policy_config(hp)),
                "player_col": (
                    # To use the the default policy (DQNTorchPolicy), we would have set the first arg of this tuple to None
                    LTFT,
                    IteratedPrisonersDilemma.OBSERVATION_SPACE,
                    IteratedPrisonersDilemma.ACTION_SPACE,
                    get_ltft_policy_config(hp)),
            },
            # We need to define how the agent_id (dict keys) used in the environment will be associated 
            #  to the policy_id (dict keys) of the policies above. Here they are identical.
            "policy_mapping_fn": lambda agent_id: agent_id,
        },

        # Callbacks that will be run during various phases of training. 
        # We add some callbacks needed by the LTFT policy and ask for additionnal logs.
        "callbacks": miscellaneous.merge_callbacks(LTFTCallbacks,
                                                   log.get_logging_callbacks_class(
                                                       log_env_step=True,
                                                       log_from_policy=True)),
    }
    return policies_config


def sgd_optimizer_dqn(policy: Policy, config: TrainerConfigDict) -> "torch.optim.Optimizer":
    return torch.optim.SGD(policy.q_func_vars, lr=policy.cur_lr, momentum=config["sgd_momentum"])


def sgd_optimizer_spl(policy: Policy, config: TrainerConfigDict) -> "torch.optim.Optimizer":
    return torch.optim.SGD(policy.model.parameters(), lr=policy.cur_lr, momentum=config["sgd_momentum"])


def get_rllib_config(hyperparameters: dict)-> dict:

    rllib_config = {}
    rllib_config.update(get_env_config(hyperparameters))
    rllib_config.update(get_policies_config(hyperparameters))   
    rllib_config.update(get_default_DQN_config(hyperparameters))
    rllib_config.update(get_exploration_config(hyperparameters))
    rllib_config.update(get_optimization_and_general_config(hyperparameters))

    return rllib_config

def get_ltft_policy_config(hp):

    MyDQNTorchPolicy = DQNTorchPolicy.with_updates(optimizer_fn=sgd_optimizer_dqn)

    ltft_config = merge_dicts(
      LTFT_DEFAULT_CONFIG_UPDATE,
      {
        "sgd_momentum": 0.9,
        'nested_policies': [
            {"Policy_class": MyDQNTorchPolicy, "config_update": {}},
            {"Policy_class": MyDQNTorchPolicy, "config_update": {}},
            {"Policy_class": MyDQNTorchPolicy, "config_update": {}},
            {"Policy_class": SPLTorchPolicy.with_updates(optimizer_fn=sgd_optimizer_spl), 
             "config_update": {
                "learn_action": True,
                "learn_reward": False,
                "sgd_momentum": 0.75,
                "explore": False,
                "timesteps_per_iteration": hp["n_steps_per_epi"],
                # === Optimization ===
                "lr": hp["base_lr"] * hp["spl_lr_mul"],
                "lr_schedule": [(0, hp["base_lr"] * hp["spl_lr_mul"]),
                                (int(hp["n_steps_per_epi"] * hp["n_epi"]), hp["base_lr"] / 1e9)],
                "loss_fn": torch.nn.CrossEntropyLoss(
                    weight=None,
                    size_average=None,
                    ignore_index=-100,
                    reduce=None,
                    reduction='mean')
              }
             },
          ],
      }
    )
    return ltft_config

def get_default_DQN_config(hp):
    default_DQN_config = {
        # === DQN Models ===
        # Minimum env steps to optimize for per train call. This value does
        # not affect learning, only the length of iterations.
        "timesteps_per_iteration": hp["n_steps_per_epi"],
        # Update the target network every `target_network_update_freq` steps.
        "target_network_update_freq": hp["n_steps_per_epi"],
        # === Replay buffer ===
        # Size of the replay buffer. Note that if async_updates is set, then
        # each worker will have a replay buffer of this size.
        "buffer_size": int(hp["n_steps_per_epi"] * hp["n_epi"]),
        # Whether to use dueling dqn
        "dueling": False,
        # Dense-layer setup for each the advantage branch and the value branch
        # in a dueling architecture.
        "hiddens": [32],
        # Whether to use double dqn
        "double_q": False,
        # If True prioritized replay buffer will be used.
        "prioritized_replay": False,
        "model": {
            # Number of hidden layers for fully connected net
            "fcnet_hiddens": [32, 2],
            # Nonlinearity for fully connected net (tanh, relu)
            "fcnet_activation": "relu",
        },
    }
    return default_DQN_config

def get_exploration_config(hp):
    exploration_config = {
        # === Exploration Settings ===
        # Set to False for no exploration behavior (e.g., for evaluation).
        "explore": True,
        # Provide a dict specifying the Exploration object's config.
        "exploration_config": {
            # The Exploration class to use. In the simplest case, this is the name
            # (str) of any class present in the `rllib.utils.exploration` package.
            # You can also provide the python class directly or the full location
            # of your class (e.g. "ray.rllib.utils.exploration.epsilon_greedy.
            # EpsilonGreedy").
            "type": exploration.SoftQSchedule,
            # Add constructor kwargs here (if any).
            "temperature_schedule": PiecewiseSchedule(
                endpoints=[
                    (0, 1.0), (int(hp["n_steps_per_epi"] * hp["n_epi"] * 0.75), 0.1)],
                outside_value=0.1,
                framework="torch")
        },

    }

    return exploration_config

def get_optimization_and_general_config(hp: dict):

    optim_and_general_config = {
        

        # === Optimization ===
        # Learning rate for adam optimizer
        "lr": hp["base_lr"],
        # Learning rate schedule
        "lr_schedule": [(0, hp["base_lr"]),
                        (int(hp["n_steps_per_epi"] * hp["n_epi"]), hp["base_lr"] / 1e9)],
        # How many steps of the model to sample before learning starts.
        "learning_starts": int(hp["n_steps_per_epi"] * hp["bs_epi_mul"]),
        # Update the replay buffer with this many samples at once. Note that
        # this setting applies per-worker if num_workers > 1.
        "rollout_fragment_length": hp["n_steps_per_epi"],
        # Size of a batch sampled from replay buffer for training. Note that
        # if async_updates is set, then each worker returns gradients for a
        # batch of this size.
        "train_batch_size": int(hp["n_steps_per_epi"] * hp["bs_epi_mul"]),
        "gamma": 0.5,

        # === General config ===
        "framework": "torch",
        "batch_mode": "complete_episodes",
        # LTFT supports only 1 worker only otherwise it would be mixing several opponents trajectories
        "num_workers": 0,
        # LTFT supports only 1 env per worker only otherwise several episodes would be played at the same time
        "num_envs_per_worker": 1,
        "seed": tune.grid_search(hp["seeds"]),

    }

    return optim_and_general_config

In [None]:
def get_stop_config(hp):
    stop_config = {
        "episodes_total": hp["n_epi"],
    }
    return stop_config

def train_lvl0_agents(lvl0_hparameters):

    rllib_config = get_rllib_config(lvl0_hparameters)
    stop_config = get_stop_config(lvl0_hparameters)
    ray.shutdown()
    ray.init(num_cpus=os.cpu_count(), num_gpus=0) 
    tune_analysis_lvl0 = ray.tune.run(DQNTrainer, config=rllib_config,
                            checkpoint_freq=0, stop=stop_config, 
                            checkpoint_at_end=True,
                            metric="episode_reward_mean", mode="max",
                            name="Lvl0_LTFT")
    ray.shutdown()
    return tune_analysis_lvl0

We train 4 lvl0 LTFT agents:

In [None]:
lvl0_hparameters = {
    "n_epi": 400,
    "n_steps_per_epi": 20,
    "bs_epi_mul": 4,
    "base_lr": 0.04,
    "spl_lr_mul": 10.0,
    "train_n_replicates": 2,
    "debug": False,
}
lvl0_hparameters["seeds"] = miscellaneous.get_random_seeds(lvl0_hparameters["train_n_replicates"])

tune_analysis_lvl0 = train_lvl0_agents(lvl0_hparameters)

We now have several pairs of `LTFT` agents trained in self-play. We are playing in `IteratedPrisonersDilemma` and thus we may fear that an opponent could exploit our agents after they have been deployed.  
We are going to look at that precisely. We will train lvl1 DQN agents that will learn while the `LTFT` agents are frozen (not learning any more). The DQN agents will learn by playing against a population of `LTFT` agents. This is used to simulate the fact that when training the exploiters, we may not know which `LTFT` agent will be in practice deployed and thus we want to produce an agent that would exploit any `LTFT` agent.

In [None]:
def train_lvl1_agents(hp_lvl1, tune_analysis_lvl0):

    rllib_config = get_rllib_config(hp_lvl1)
    stop_config = get_stop_config(hp_lvl1)

    # We use an helper to extract all the checkpoints saved in the tune_analysis_lvl0
    checkpoints_lvl0 = miscellaneous.extract_checkpoints(tune_analysis_lvl0)
    
    rllib_config = modify_conf_for_lvl1_training(hp_lvl1, get_env_config(hp_lvl1)["env_config"], rllib_config, checkpoints_lvl0)

    ray.shutdown()
    ray.init(num_cpus=os.cpu_count(), num_gpus=0) 
    tune_analysis_lvl1 = ray.tune.run(DQNTrainer, config=rllib_config,
                                      stop=stop_config,
                                      checkpoint_at_end=True,
                                      metric="episode_reward_mean", mode="max",
                                      name="Lvl1_DQN")
    ray.shutdown()
    return tune_analysis_lvl1

def modify_conf_for_lvl1_training(hp_lvl1, env_config, rllib_config_lvl1, lvl0_checkpoints):
    # The lvl0 agents will be player 2, The lvl1 agents will be player 1
    lvl0_policy_idx = 1
    lvl1_policy_idx = 0
    lvl0_policy_id = env_config["players_ids"][lvl0_policy_idx]
    lvl1_policy_id = env_config["players_ids"][lvl1_policy_idx]

    # Use a DQN Policy (with SGD + Momentum optimizer) as the lvl1 agent (instead of LTFT with nested DQN)
    MyDQNTorchPolicy = DQNTorchPolicy.with_updates(optimizer_fn=sgd_optimizer_dqn)
    rllib_config_lvl1["multiagent"]["policies"][lvl1_policy_id] = (
        MyDQNTorchPolicy,
        IteratedPrisonersDilemma.OBSERVATION_SPACE,
        IteratedPrisonersDilemma.ACTION_SPACE,
        {"sgd_momentum": 0.9}
    )

    # We add a callack needed by the PopulationOfIdenticalAlgo
    rllib_config_lvl1["callbacks"] = miscellaneous.merge_callbacks(
        LTFTCallbacks,
        population.PopulationOfIdenticalAlgoCallBacks,
        log.get_logging_callbacks_class(log_env_step=True, log_from_policy=True))

    # Finally, we replace player_2's policy (LTFT) by a PopulationOfIdenticalAlgo policy that use nested LTFT policies
    lvl1_best_response.prepare_config_for_lvl1_training(
        config=rllib_config_lvl1,
        lvl0_policy_id=lvl0_policy_id, 
        lvl1_policy_id=lvl1_policy_id,
        select_n_lvl0_from_population=hp_lvl1["n_seeds_lvl0"] // hp_lvl1["n_seeds_lvl1"], # Each lvl1 agent will be trained against a population of select_n_lvl0_from_population lvl0 agents
        n_lvl1_to_train=hp_lvl1["n_seeds_lvl1"], # We will train n_lvl1 agents
        overlapping_population=False, # We only use once the lvl0 agents
        lvl0_checkpoints=lvl0_checkpoints)
    # l1br_configuration_helper = lvl1_best_response.L1BRConfigurationHelper(rllib_config_lvl1, lvl0_policy_id, lvl1_policy_id)
    # l1br_configuration_helper.define_exp(
    #     use_n_lvl0_agents_in_each_population=hp_lvl1["n_seeds_lvl0"] // hp_lvl1["n_seeds_lvl1"],
    #     train_n_lvl1_agents=hp_lvl1["n_seeds_lvl1"],
    #     lvl0_checkpoints=lvl0_checkpoints)
    # rllib_config_lvl1 = l1br_configuration_helper.prepare_config_for_lvl1_training()

    return rllib_config_lvl1


In [None]:
lvl1_hparameters = copy.deepcopy(lvl0_hparameters)
lvl1_hparameters["n_seeds_lvl0"] = lvl0_hparameters["train_n_replicates"]
lvl1_hparameters["n_seeds_lvl1"] = min(lvl0_hparameters["train_n_replicates"]//2, 2)

tune_analysis_lvl1 = train_lvl1_agents(lvl1_hparameters, tune_analysis_lvl0)
print(tune_analysis_lvl1.results_df.columns)
print(tune_analysis_lvl1.results_df.head())
print(tune_analysis_lvl1.results_df["episode_reward_mean"])

##  c. Use TensorBoard to visualize the trainings


You can use TensorBoard to view trial performances.

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir /root/ray_results/ # On Google Colab
# %tensorboard --logdir ~/ray_results/ # On your machine

# You can filter the graphs with "mean_welfare|defection_metric|entropy|CC"