Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions docs/source/Examples/a2c_three_columns.rst
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,128 @@ where :math:`MSE` is the mean square error function and :math:`y_i` are the stat
Code
----

.. code-block::

import random
from pathlib import Path
import numpy as np
import torch

from src.algorithms.a2c import A2C, A2CConfig
from src.networks.a2c_networks import A2CNetSimpleLinear
from src.examples.helpers.load_full_mock_dataset import load_discrete_env, get_ethinicity_hierarchy, \
get_gender_hierarchy, get_salary_bins, load_mock_subjects
from src.datasets import ColumnType
from src.spaces.env_type import DiscreteEnvType
from src.spaces.action_space import ActionSpace
from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize
from src.utils.iteration_control import IterationControl
from src.examples.helpers.plot_utils import plot_running_avg
from src.spaces.multiprocess_env import MultiprocessEnv
from src.trainers.pytorch_trainer import PyTorchTrainer, PyTorchTrainerConfig
from src.maths.optimizer_type import OptimizerType
from src.maths.pytorch_optimizer_config import PyTorchOptimizerConfig
from src.utils import INFO

.. code-block::

N_STATES = 10
N_ITRS_PER_EPISODE = 400
ACTION_SPACE_SIZE = 10
N_WORKERS = 3
N_EPISODES = 1001
GAMMA = 0.99
ALPHA = 0.1
PUNISH_FACTOR = 2.0
MAX_DISTORTION = 0.7
MIN_DISTORTION = 0.4
SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/a2c_all_cols_multi_state_results/distorted_set"
USE_IDENTIFYING_COLUMNS_DIST = True
IDENTIFY_COLUMN_DIST_FACTOR = 0.1
OUT_OF_MAX_BOUND_REWARD = -1.0
OUT_OF_MIN_BOUND_REWARD = -1.0
IN_BOUNDS_REWARD = 5.0
OUTPUT_MSG_FREQUENCY = 100
N_ROUNDS_BELOW_MIN_DISTORTION = 10
N_COLUMNS = 11

.. code-block::

def env_loader(kwargs):

column_types = {"NHSno": ColumnType.IDENTIFYING_ATTRIBUTE,
"given_name": ColumnType.IDENTIFYING_ATTRIBUTE,
"surname": ColumnType.IDENTIFYING_ATTRIBUTE,
"gender": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
"dob": ColumnType.SENSITIVE_ATTRIBUTE,
"ethnicity": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
"education": ColumnType.SENSITIVE_ATTRIBUTE,
"salary": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
"mutation_status": ColumnType.SENSITIVE_ATTRIBUTE,
"preventative_treatment": ColumnType.SENSITIVE_ATTRIBUTE,
"diagnosis": ColumnType.INSENSITIVE_ATTRIBUTE}

# define the action space
action_space = ActionSpace(n=ACTION_SPACE_SIZE)

# all the columns that are SENSITIVE_ATTRIBUTE will be kept as they are
# because currently we have no model
# also INSENSITIVE_ATTRIBUTE will be kept as is
# in order to declare this we use an ActionIdentity
action_space.add_many(ActionIdentity(column_name="dob"),
ActionIdentity(column_name="education"),
ActionIdentity(column_name="salary"),
ActionIdentity(column_name="diagnosis"),
ActionIdentity(column_name="mutation_status"),
ActionIdentity(column_name="preventative_treatment"),
ActionIdentity(column_name="ethnicity"),
ActionStringGeneralize(column_name="ethnicity",
generalization_table=get_ethinicity_hierarchy()),
ActionStringGeneralize(column_name="gender",
generalization_table=get_gender_hierarchy()),
ActionNumericBinGeneralize(column_name="salary",
generalization_table=get_salary_bins(ds=load_mock_subjects(),
n_states=N_STATES)))
# shuffle the action space
# using different seeds
action_space.shuffle(seed=kwargs["rank"] + 1)

env = load_discrete_env(env_type=DiscreteEnvType.MULTI_COLUMN_STATE, n_states=N_STATES,
min_distortion={"ethnicity": 0.133, "salary": 0.133, "gender": 0.133,
"dob": 0.0, "education": 0.0, "diagnosis": 0.0,
"mutation_status": 0.0, "preventative_treatment": 0.0,
"NHSno": 0.0, "given_name": 0.0, "surname": 0.0},
max_distortion={"ethnicity": 0.133, "salary": 0.133, "gender": 0.133,
"dob": 0.0, "education": 0.0, "diagnosis": 0.0,
"mutation_status": 0.0, "preventative_treatment": 0.0,
"NHSno": 0.1, "given_name": 0.1, "surname": 0.1},
total_min_distortion=MIN_DISTORTION, total_max_distortion=MAX_DISTORTION,
out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD,
out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD,
in_bounds_reward=IN_BOUNDS_REWARD,
punish_factor=PUNISH_FACTOR,
column_types=column_types,
action_space=action_space,
save_distoreted_sets_dir=SAVE_DISTORTED_SETS_DIR,
use_identifying_column_dist_in_total_dist=USE_IDENTIFYING_COLUMNS_DIST,
use_identifying_column_dist_factor=IDENTIFY_COLUMN_DIST_FACTOR,
gamma=GAMMA,
n_rounds_below_min_distortion=N_ROUNDS_BELOW_MIN_DISTORTION)

# we want to get the distances as states
# not bin indices
env.config.state_as_distances = True

return env

.. code-block::

def action_sampler(logits: torch.Tensor) -> torch.distributions.Distribution:

action_dist = torch.distributions.Categorical(logits=logits)
return action_dist


.. code-block::

if __name__ == '__main__':
Expand Down Expand Up @@ -230,6 +352,15 @@ Code
plot_running_avg(avg_episode_dist, steps=100,
xlabel="Episodes", ylabel="Distortion",
title="Running distortion average over 100 episodes")


# play the agent on the environment.
# call the environment builder to create
# an instance of the environment
discrte_env = env.env_builder()

stop_criterion = IterationControl(n_itrs=10, min_dist=MIN_DISTORTION, max_dist=MAX_DISTORTION)
agent.play(env=discrte_env, criteria=stop_criterion)

except Exception as e:
print("An excpetion was thrown...{0}".format(str(e)))
Expand All @@ -239,6 +370,18 @@ Code
Results
--------

The following images show the performance of the learning process

.. figure:: images/a2c_multi_cols_multi_state_rewards.png

Running average reward.


.. figure:: images/a2c_multi_cols_multi_state_distortion.png

Running average total distortion.


References
----------

Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
16 changes: 10 additions & 6 deletions src/algorithms/a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,10 @@ class A2CConfig(object):
max_grad_norm: float = 1.0
n_iterations_per_episode: int = 100
n_workers: int = 1
action_sampler: Callable = None
value_function: LossFunction = None
policy_loss: LossFunction = None
batch_size: int = 0
normalize_advantages: bool = True
device: str = 'cpu'
action_sampler: Callable = None
a2cnet: nn.Module = None
save_model_path: Path = None
optimizer_config: PyTorchOptimizerConfig = None
Expand Down Expand Up @@ -147,6 +146,7 @@ def parameters(self) -> Any:
Returns
-------

An array with the model parameters
"""
return self.a2c_net.parameters()

Expand Down Expand Up @@ -195,11 +195,8 @@ def actions_after_episode_ends(self, env: Env, episode_idx: int, **options) -> N
"""

episode_info: EpisodeInfo = options["episode_info"]

buffer: ReplayBuffer = episode_info.info["buffer"]

#reward = buffer.get_item_as_torch_tensor("reward"),

self._optimize_model(rewards=buffer.get_item_as_torch_tensor("reward"),
logprobs=buffer.get_torch__tensor_info_item_as_torch_tensor("logprobs"),
values=buffer.get_torch__tensor_info_item_as_torch_tensor("values"),
Expand Down Expand Up @@ -361,6 +358,10 @@ def _compute_advantages(self, rewards: np.array, values: np.array) -> np.array:
# create TD errors: R_t + gamma*V_{t+1} - V_t for t=0 to T
advantages = rewards_[:-1] + self.config.gamma * values_[1:] - values_[: -1]

if self.config.normalize_advantages:
# normalize advantages
advantages = (advantages - np.mean(advantages)) / np.std(advantages)

# create the GAES by multiplying the tau discounts times the TD errors
gaes = np.array(
[[np.sum(tau_discounts[: total_time - 1 - t] * advantages[t:, w]) for t in range(total_time - 1 )] for w in
Expand Down Expand Up @@ -505,5 +506,8 @@ def play(self, env: Env, criteria: Criteria):
action = None
time_step = env.step(action)

print("{0} At state={1} with distortion={2} select action={3}".format("INFO: ", state, total_dist,
action.column_name + "-" + action.action_type.name))

if time_step.done:
time_step = env.reset()
28 changes: 20 additions & 8 deletions src/examples/a2c_multi_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@
from src.spaces.env_type import DiscreteEnvType
from src.spaces.action_space import ActionSpace
from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize
from src.algorithms.q_learning import QLearnConfig, QLearning
from src.policies.epsilon_greedy_policy import EpsilonGreedyPolicy, EpsilonDecayOption
from src.trainers.trainer import Trainer, TrainerConfig
from src.utils.iteration_control import IterationControl
from src.examples.helpers.plot_utils import plot_running_avg
from src.spaces.multiprocess_env import MultiprocessEnv
from src.trainers.pytorch_trainer import PyTorchTrainer, PyTorchTrainerConfig
Expand All @@ -22,7 +20,7 @@
from src.utils import INFO

N_STATES = 10
N_ITRS_PER_EPISODE = 30
N_ITRS_PER_EPISODE = 400
ACTION_SPACE_SIZE = 10
N_WORKERS = 3
N_EPISODES = 1001
Expand All @@ -32,9 +30,6 @@
MAX_DISTORTION = 0.7
MIN_DISTORTION = 0.4
SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/a2c_all_cols_multi_state_results/distorted_set"
EPS = 1.0
EPSILON_DECAY_OPTION = EpsilonDecayOption.CONSTANT_RATE # .INVERSE_STEP
EPSILON_DECAY_FACTOR = 0.01
USE_IDENTIFYING_COLUMNS_DIST = True
IDENTIFY_COLUMN_DIST_FACTOR = 0.1
OUT_OF_MAX_BOUND_REWARD = -1.0
Expand Down Expand Up @@ -131,8 +126,17 @@ def action_sampler(logits: torch.Tensor) -> torch.distributions.Distribution:

# agent configuration
a2c_config = A2CConfig(action_sampler=action_sampler, n_iterations_per_episode=N_ITRS_PER_EPISODE,
a2cnet=net, save_model_path=Path("./a2c_three_columns_output/"),
a2cnet=net, save_model_path=Path("./a2c_all_cols_multi_state_results/"),
n_workers=N_WORKERS,
normalize_advantages=True,
gamma=GAMMA,
tau=0.1,
beta=None, # don't use entropy
policy_loss_weight=1.0,
value_loss_weight=1.0,
max_grad_norm=1.0,
batch_size=N_ITRS_PER_EPISODE,
device='cpu',
optimizer_config=PyTorchOptimizerConfig(optimizer_type=OptimizerType.ADAM,
optimizer_learning_rate=ALPHA))

Expand Down Expand Up @@ -165,6 +169,14 @@ def action_sampler(logits: torch.Tensor) -> torch.distributions.Distribution:
xlabel="Episodes", ylabel="Distortion",
title="Running distortion average over 100 episodes")

# play the agent on the environment.
# call the environment builder to create
# an instance of the environment
discrte_env = env.env_builder()

stop_criterion = IterationControl(n_itrs=10, min_dist=MIN_DISTORTION, max_dist=MAX_DISTORTION)
agent.play(env=discrte_env, criteria=stop_criterion)

except Exception as e:
print("An excpetion was thrown...{0}".format(str(e)))
finally:
Expand Down