diff --git a/docs/source/Examples/images/qlearn_distortion_multi_cols.png b/docs/source/Examples/images/qlearn_distortion_multi_cols.png new file mode 100644 index 0000000..a52da78 Binary files /dev/null and b/docs/source/Examples/images/qlearn_distortion_multi_cols.png differ diff --git a/docs/source/Examples/images/qlearn_distortion_multi_cols_multi_state.png b/docs/source/Examples/images/qlearn_distortion_multi_cols_multi_state.png new file mode 100644 index 0000000..a52da78 Binary files /dev/null and b/docs/source/Examples/images/qlearn_distortion_multi_cols_multi_state.png differ diff --git a/docs/source/Examples/images/qlearn_rewards_all_cols.png b/docs/source/Examples/images/qlearn_rewards_all_cols.png new file mode 100644 index 0000000..d32f012 Binary files /dev/null and b/docs/source/Examples/images/qlearn_rewards_all_cols.png differ diff --git a/docs/source/Examples/images/qlearn_rewards_all_cols_multi_state.png b/docs/source/Examples/images/qlearn_rewards_all_cols_multi_state.png new file mode 100644 index 0000000..d32f012 Binary files /dev/null and b/docs/source/Examples/images/qlearn_rewards_all_cols_multi_state.png differ diff --git a/docs/source/Examples/qlearning_all_columns.rst b/docs/source/Examples/qlearning_all_columns.rst new file mode 100644 index 0000000..fa2b2c8 --- /dev/null +++ b/docs/source/Examples/qlearning_all_columns.rst @@ -0,0 +1,162 @@ +Q-learning with many columns +============================= + +Overview +-------- + +In the `previous `_ example, we applied Q-learning on a dataset consisting +of three columns. Moreover, we used a one dimensional state space; we discretized the range :math:`[0,1]` into bins and used the +resulting bin index as the state index. In this example, we will simply allow for more columns in the data set. +Other than that, this example is the same as the previous one. + +Code +---- + +The necessary imports + +.. code-block:: + + import random + import numpy as np + + from src.examples.helpers.load_full_mock_dataset import load_discrete_env, get_ethinicity_hierarchy, \ + get_gender_hierarchy, get_salary_bins, load_mock_subjects + from src.datasets import ColumnType + from src.spaces.env_type import DiscreteEnvType + from src.spaces.action_space import ActionSpace + from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize + from src.algorithms.q_learning import QLearnConfig, QLearning + from src.policies.epsilon_greedy_policy import EpsilonGreedyPolicy, EpsilonDecayOption + from src.trainers.trainer import Trainer, TrainerConfig + from src.examples.helpers.plot_utils import plot_running_avg + from src.utils import INFO + +Next establish a set of configuration parameters + +.. code-block:: + + # configuration params + N_STATES = 10 + GAMMA = 0.99 + ALPHA = 0.1 + PUNISH_FACTOR = 2.0 + MAX_DISTORTION = 0.7 + MIN_DISTORTION = 0.4 + SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/q_learning_all_cols_results/distorted_set" + EPS = 1.0 + EPSILON_DECAY_OPTION = EpsilonDecayOption.CONSTANT_RATE # .INVERSE_STEP + EPSILON_DECAY_FACTOR = 0.01 + USE_IDENTIFYING_COLUMNS_DIST = True + IDENTIFY_COLUMN_DIST_FACTOR = 0.1 + N_EPISODES = 1001 + N_ITRS_PER_EPISODE = 30 + OUT_OF_MAX_BOUND_REWARD = -1.0 + OUT_OF_MIN_BOUND_REWARD = -1.0 + IN_BOUNDS_REWARD = 5.0 + OUTPUT_MSG_FREQUENCY = 100 + N_ROUNDS_BELOW_MIN_DISTORTION = 10 + +The dirver code brings all the elements together + +.. code-block:: + + if __name__ == '__main__': + + # set the seed for random engine + random.seed(42) + + # specify the column types. An identifying column + # will me removed from the anonymized data set + # An INSENSITIVE_ATTRIBUTE remains intact. + # A QUASI_IDENTIFYING_ATTRIBUTE is used in the anonymization + # A SENSITIVE_ATTRIBUTE currently remains intact + column_types = {"NHSno": ColumnType.IDENTIFYING_ATTRIBUTE, + "given_name": ColumnType.IDENTIFYING_ATTRIBUTE, + "surname": ColumnType.IDENTIFYING_ATTRIBUTE, + "gender": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE, + "dob": ColumnType.SENSITIVE_ATTRIBUTE, + "ethnicity": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE, + "education": ColumnType.SENSITIVE_ATTRIBUTE, + "salary": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE, + "mutation_status": ColumnType.SENSITIVE_ATTRIBUTE, + "preventative_treatment": ColumnType.SENSITIVE_ATTRIBUTE, + "diagnosis": ColumnType.INSENSITIVE_ATTRIBUTE} + + # define the action space + action_space = ActionSpace(n=10) + + # all the columns that are SENSITIVE_ATTRIBUTE will be kept as they are + # because currently we have no model + # also INSENSITIVE_ATTRIBUTE will be kept as is + # in order to declare this we use an ActionIdentity + action_space.add_many(ActionIdentity(column_name="dob"), + ActionIdentity(column_name="education"), + ActionIdentity(column_name="salary"), + ActionIdentity(column_name="diagnosis"), + ActionIdentity(column_name="mutation_status"), + ActionIdentity(column_name="preventative_treatment"), + ActionIdentity(column_name="ethnicity"), + ActionStringGeneralize(column_name="ethnicity", + generalization_table=get_ethinicity_hierarchy()), + ActionStringGeneralize(column_name="gender", + generalization_table=get_gender_hierarchy()), + ActionNumericBinGeneralize(column_name="salary", + generalization_table=get_salary_bins(ds=load_mock_subjects(), + n_states=N_STATES)) + ) + action_space.shuffle() + + env = load_discrete_env(env_type=DiscreteEnvType.TOTAL_DISTORTION_STATE, + n_states=N_STATES, + min_distortion=MIN_DISTORTION, max_distortion=MAX_DISTORTION, + total_min_distortion=MIN_DISTORTION, total_max_distortion=MAX_DISTORTION, + out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD, + out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD, + in_bounds_reward=IN_BOUNDS_REWARD, + punish_factor=PUNISH_FACTOR, + column_types=column_types, + action_space=action_space, + save_distoreted_sets_dir=SAVE_DISTORTED_SETS_DIR, + use_identifying_column_dist_in_total_dist=USE_IDENTIFYING_COLUMNS_DIST, + use_identifying_column_dist_factor=IDENTIFY_COLUMN_DIST_FACTOR, + gamma=GAMMA, + n_rounds_below_min_distortion=N_ROUNDS_BELOW_MIN_DISTORTION) + + agent_config = QLearnConfig(n_itrs_per_episode=N_ITRS_PER_EPISODE, gamma=GAMMA, + alpha=ALPHA, + policy=EpsilonGreedyPolicy(eps=EPS, n_actions=env.n_actions, + decay_op=EPSILON_DECAY_OPTION, + epsilon_decay_factor=EPSILON_DECAY_FACTOR)) + + agent = QLearning(algo_config=agent_config) + + trainer_config = TrainerConfig(n_episodes=N_EPISODES, output_msg_frequency=OUTPUT_MSG_FREQUENCY) + trainer = Trainer(env=env, agent=agent, configuration=trainer_config) + trainer.train() + + avg_rewards = trainer.total_rewards + plot_running_avg(avg_rewards, steps=100, + xlabel="Episodes", ylabel="Reward", + title="Running reward average over 100 episodes") + + avg_episode_dist = np.array(trainer.total_distortions) + print("{0} Max/Min distortion {1}/{2}".format(INFO, np.max(avg_episode_dist), np.min(avg_episode_dist))) + + plot_running_avg(avg_episode_dist, steps=100, + xlabel="Episodes", ylabel="Distortion", + title="Running distortion average over 100 episodes") + + +Results +------- + +The following images show the performance of the learning process + +.. figure:: images/qlearn_rewards_all_cols.png + + Running average reward. + + +.. figure:: images/qlearn_distortion_multi_cols.png + + Running average total distortion. diff --git a/docs/source/Examples/qlearning_three_columns.rst b/docs/source/Examples/qlearning_three_columns.rst index d7cadc1..c44dd39 100644 --- a/docs/source/Examples/qlearning_three_columns.rst +++ b/docs/source/Examples/qlearning_three_columns.rst @@ -48,6 +48,17 @@ Given that the total dataset distortion is assumed to be in the range :math:`[0, discretize this range into bins and for each entailed value of the distortion we use the corresponding bin as a state index. Alternatively, we could discretize the distortion of each column into bins and create tuples of indeces representing a state. +We preprocess the data set by normalizing the numeric columns. +We will use the cosine normalized distance to measure the distortion of columns with string data. +Similarly, we use the following :math:`L_2`-based norm for calculating the distortion of +numeric columns + +.. math:: + + dist(\mathbf{v}_1, \mathbf{v}_2) = \sqrt{\frac{||\mathbf{v}_1 - \mathbf{v}_2||_{L_2}}{N}} + +where $N$ is the size of the vector. This way the resulting distance, due to the normalization of numeric columns, will be in the range :math:`[0,1]`. + Code ---- @@ -98,7 +109,7 @@ Next establish a set of configuration parameters SAVE_DISTORTED_SETS_DIR = "q_learning_three_columns_results/distorted_set" PUNISH_FACTOR = 2.0 -The dirver code creates brings all the elements together +The dirver code brings all the elements together .. code-block:: diff --git a/docs/source/examples.rst b/docs/source/examples.rst index 2eff455..73ca019 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -7,5 +7,6 @@ Some examples can be found below :maxdepth: 4 Examples/qlearning_three_columns + Examples/qlearning_all_columns Examples/semi_gradient_sarsa_three_columns Examples/a2c_three_columns diff --git a/src/datasets/dataset_wrapper.py b/src/datasets/dataset_wrapper.py index 26b95ad..10fe20e 100644 --- a/src/datasets/dataset_wrapper.py +++ b/src/datasets/dataset_wrapper.py @@ -214,6 +214,7 @@ def apply_column_transform(self, column_name: str, transform: Transform) -> None """ # get the column + column = self.get_column(col_name=column_name) column = transform.act(**{"data": column.values}) self.ds[transform.column_name] = column diff --git a/src/examples/helpers/load_full_mock_dataset.py b/src/examples/helpers/load_full_mock_dataset.py index d14d14b..18ec319 100644 --- a/src/examples/helpers/load_full_mock_dataset.py +++ b/src/examples/helpers/load_full_mock_dataset.py @@ -9,7 +9,6 @@ from src.datasets.datasets_loaders import MockSubjectsLoader, MockSubjectsData from src.spaces.discrete_state_environment import DiscreteStateEnvironment from src.spaces.action_space import ActionSpace -from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize from src.maths.distortion_calculator import DistortionCalculationType, DistortionCalculator from src.maths.numeric_distance_type import NumericDistanceType from src.maths.string_distance_calculator import StringDistanceType @@ -17,24 +16,6 @@ from src.spaces.env_type import DiscreteEnvType -N_LAYERS = 5 -N_BINS = 10 -N_EPISODES = 1000 -OUTPUT_MSG_FREQUENCY = 100 -GAMMA = 0.99 -ALPHA = 0.1 - -MAX_DISTORTION = 0.7 -MIN_DISTORTION = 0.3 -OUT_OF_MAX_BOUND_REWARD = -1.0 -OUT_OF_MIN_BOUND_REWARD = -1.0 -IN_BOUNDS_REWARD = 5.0 -N_ROUNDS_BELOW_MIN_DISTORTION = 10 -#SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/semi_grad_sarsa/distorted_set" -REWARD_FACTOR = 0.95 -PUNISH_FACTOR = 2.0 - - def get_gender_hierarchy(): hierarchy = SerialHierarchy(values={"F": "*", "M": "*", "*": "*"}) return hierarchy @@ -94,14 +75,32 @@ def load_mock_subjects() -> MockSubjectsLoader: return ds +def get_salary_bins(ds: MockSubjectsLoader, n_states: int): + # create bins for the salary generalization + unique_salary = ds.get_column_unique_values(col_name="salary") + unique_salary.sort() + + # modify slightly the max value because + # we get out of bounds for the maximum salary + bins = np.linspace(unique_salary[0], unique_salary[-1] + 1, n_states) + return bins + + def load_discrete_env(env_type: DiscreteEnvType, n_states: int, min_distortion: Any, max_distortion: Any, total_min_distortion: float, total_max_distortion: float, - punish_factor: float, column_types: dict, + out_of_max_bound_reward: float, + out_of_min_bound_reward: float, + in_bounds_reward: float, + punish_factor: float, + column_types: dict, action_space: ActionSpace, save_distoreted_sets_dir: str, use_identifying_column_dist_in_total_dist: bool, - use_identifying_column_dist_factor: float) -> DiscreteStateEnvironment: + use_identifying_column_dist_factor: float, + gamma: float, + n_rounds_below_min_distortion: int) -> DiscreteStateEnvironment: + mock_ds = load_mock_subjects() action_space.shuffle() @@ -114,18 +113,17 @@ def load_discrete_env(env_type: DiscreteEnvType, n_states: int, dataset_distortion_type=DistortionCalculationType.SUM), reward_manager=RewardManager( bounds=(total_min_distortion, total_max_distortion), - out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD, - out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD, - in_bounds_reward=IN_BOUNDS_REWARD, + out_of_max_bound_reward=out_of_max_bound_reward, + out_of_min_bound_reward=out_of_min_bound_reward, + in_bounds_reward=in_bounds_reward, min_distortions=min_distortion, max_distortions=max_distortion, punish_factor=punish_factor), - gamma=GAMMA, - reward_factor=REWARD_FACTOR, + gamma=gamma, min_distortion=min_distortion, min_total_distortion=total_min_distortion, max_distortion=max_distortion, max_total_distortion=total_max_distortion, - n_rounds_below_min_distortion=N_ROUNDS_BELOW_MIN_DISTORTION, + n_rounds_below_min_distortion=n_rounds_below_min_distortion, distorted_set_path=Path(save_distoreted_sets_dir), n_states=n_states, env_type=env_type, column_types=column_types, use_identifying_column_dist_in_total_dist=use_identifying_column_dist_in_total_dist, diff --git a/src/examples/helpers/load_three_columns_mock_dataset.py b/src/examples/helpers/load_three_columns_mock_dataset.py index 62d7457..31fbaec 100644 --- a/src/examples/helpers/load_three_columns_mock_dataset.py +++ b/src/examples/helpers/load_three_columns_mock_dataset.py @@ -9,7 +9,6 @@ from src.datasets.datasets_loaders import MockSubjectsLoader, MockSubjectsData from src.spaces.discrete_state_environment import DiscreteStateEnvironment from src.spaces.action_space import ActionSpace -from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize from src.maths.distortion_calculator import DistortionCalculationType, DistortionCalculator from src.maths.numeric_distance_type import NumericDistanceType from src.maths.string_distance_calculator import StringDistanceType diff --git a/src/examples/q_learning_all_columns.py b/src/examples/q_learning_all_columns.py index 0940873..0beba2a 100644 --- a/src/examples/q_learning_all_columns.py +++ b/src/examples/q_learning_all_columns.py @@ -1,6 +1,8 @@ import random +import numpy as np -from src.examples.helpers.load_full_mock_dataset import load_discrete_env, get_ethinicity_hierarchy, get_gender_hierarchy +from src.examples.helpers.load_full_mock_dataset import load_discrete_env, get_ethinicity_hierarchy, \ + get_gender_hierarchy, get_salary_bins, load_mock_subjects from src.datasets import ColumnType from src.spaces.env_type import DiscreteEnvType from src.spaces.action_space import ActionSpace @@ -8,10 +10,13 @@ from src.algorithms.q_learning import QLearnConfig, QLearning from src.policies.epsilon_greedy_policy import EpsilonGreedyPolicy, EpsilonDecayOption from src.trainers.trainer import Trainer, TrainerConfig +from src.examples.helpers.plot_utils import plot_running_avg +from src.utils import INFO -N_BINS = 20 +# configuration params +N_STATES = 10 GAMMA = 0.99 -ALPHA = 0.01 +ALPHA = 0.1 PUNISH_FACTOR = 2.0 MAX_DISTORTION = 0.7 MIN_DISTORTION = 0.4 @@ -21,12 +26,25 @@ EPSILON_DECAY_FACTOR = 0.01 USE_IDENTIFYING_COLUMNS_DIST = True IDENTIFY_COLUMN_DIST_FACTOR = 0.1 +N_EPISODES = 1001 +N_ITRS_PER_EPISODE = 30 +OUT_OF_MAX_BOUND_REWARD = -1.0 +OUT_OF_MIN_BOUND_REWARD = -1.0 +IN_BOUNDS_REWARD = 5.0 +OUTPUT_MSG_FREQUENCY = 100 +N_ROUNDS_BELOW_MIN_DISTORTION = 10 + if __name__ == '__main__': # set the seed for random engine random.seed(42) + # specify the column types. An identifying column + # will me removed from the anonymized data set + # An INSENSITIVE_ATTRIBUTE remains intact. + # A QUASI_IDENTIFYING_ATTRIBUTE is used in the anonymization + # A SENSITIVE_ATTRIBUTE currently remains intact column_types = {"NHSno": ColumnType.IDENTIFYING_ATTRIBUTE, "given_name": ColumnType.IDENTIFYING_ATTRIBUTE, "surname": ColumnType.IDENTIFYING_ATTRIBUTE, @@ -34,38 +52,52 @@ "dob": ColumnType.SENSITIVE_ATTRIBUTE, "ethnicity": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE, "education": ColumnType.SENSITIVE_ATTRIBUTE, - "salary": ColumnType.SENSITIVE_ATTRIBUTE, + "salary": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE, "mutation_status": ColumnType.SENSITIVE_ATTRIBUTE, "preventative_treatment": ColumnType.SENSITIVE_ATTRIBUTE, "diagnosis": ColumnType.INSENSITIVE_ATTRIBUTE} - action_space = ActionSpace(n=8) + # define the action space + action_space = ActionSpace(n=10) + # all the columns that are SENSITIVE_ATTRIBUTE will be kept as they are # because currently we have no model # also INSENSITIVE_ATTRIBUTE will be kept as is + # in order to declare this we use an ActionIdentity action_space.add_many(ActionIdentity(column_name="dob"), ActionIdentity(column_name="education"), ActionIdentity(column_name="salary"), ActionIdentity(column_name="diagnosis"), ActionIdentity(column_name="mutation_status"), ActionIdentity(column_name="preventative_treatment"), + ActionIdentity(column_name="ethnicity"), ActionStringGeneralize(column_name="ethnicity", generalization_table=get_ethinicity_hierarchy()), ActionStringGeneralize(column_name="gender", - generalization_table=get_gender_hierarchy())) + generalization_table=get_gender_hierarchy()), + ActionNumericBinGeneralize(column_name="salary", + generalization_table=get_salary_bins(ds=load_mock_subjects(), + n_states=N_STATES)) + ) + action_space.shuffle() env = load_discrete_env(env_type=DiscreteEnvType.TOTAL_DISTORTION_STATE, - n_states=N_BINS, + n_states=N_STATES, min_distortion=MIN_DISTORTION, max_distortion=MAX_DISTORTION, total_min_distortion=MIN_DISTORTION, total_max_distortion=MAX_DISTORTION, + out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD, + out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD, + in_bounds_reward=IN_BOUNDS_REWARD, punish_factor=PUNISH_FACTOR, column_types=column_types, action_space=action_space, save_distoreted_sets_dir=SAVE_DISTORTED_SETS_DIR, use_identifying_column_dist_in_total_dist=USE_IDENTIFYING_COLUMNS_DIST, - use_identifying_column_dist_factor=IDENTIFY_COLUMN_DIST_FACTOR) + use_identifying_column_dist_factor=IDENTIFY_COLUMN_DIST_FACTOR, + gamma=GAMMA, + n_rounds_below_min_distortion=N_ROUNDS_BELOW_MIN_DISTORTION) - agent_config = QLearnConfig(n_itrs_per_episode=100, gamma=GAMMA, + agent_config = QLearnConfig(n_itrs_per_episode=N_ITRS_PER_EPISODE, gamma=GAMMA, alpha=ALPHA, policy=EpsilonGreedyPolicy(eps=EPS, n_actions=env.n_actions, decay_op=EPSILON_DECAY_OPTION, @@ -73,7 +105,19 @@ agent = QLearning(algo_config=agent_config) - trainer_config = TrainerConfig(n_episodes=100, output_msg_frequency=10) + trainer_config = TrainerConfig(n_episodes=N_EPISODES, output_msg_frequency=OUTPUT_MSG_FREQUENCY) trainer = Trainer(env=env, agent=agent, configuration=trainer_config) trainer.train() + avg_rewards = trainer.total_rewards + plot_running_avg(avg_rewards, steps=100, + xlabel="Episodes", ylabel="Reward", + title="Running reward average over 100 episodes") + + avg_episode_dist = np.array(trainer.total_distortions) + print("{0} Max/Min distortion {1}/{2}".format(INFO, np.max(avg_episode_dist), np.min(avg_episode_dist))) + + plot_running_avg(avg_episode_dist, steps=100, + xlabel="Episodes", ylabel="Distortion", + title="Running distortion average over 100 episodes") + diff --git a/src/examples/q_learning_multistate.py b/src/examples/q_learning_multistate.py index fa03adf..094925f 100644 --- a/src/examples/q_learning_multistate.py +++ b/src/examples/q_learning_multistate.py @@ -1,34 +1,38 @@ import random -from pathlib import Path import numpy as np -from src.algorithms.q_learning import QLearning, QLearnConfig +from src.examples.helpers.load_full_mock_dataset import load_discrete_env, get_ethinicity_hierarchy, \ + get_gender_hierarchy, get_salary_bins, load_mock_subjects +from src.datasets import ColumnType from src.spaces.env_type import DiscreteEnvType - -from src.trainers.trainer import Trainer +from src.spaces.action_space import ActionSpace +from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize +from src.algorithms.q_learning import QLearnConfig, QLearning from src.policies.epsilon_greedy_policy import EpsilonGreedyPolicy, EpsilonDecayOption -from src.utils.plot_utils import plot_running_avg +from src.trainers.trainer import Trainer, TrainerConfig +from src.examples.helpers.plot_utils import plot_running_avg from src.utils import INFO -from src.examples.helpers.load_three_columns_mock_dataset import load_discrete_env -N_BINS = 10 -N_EPISODES = 1000 -OUTPUT_MSG_FREQUENCY = 100 +# configuration params +N_STATES = 10 GAMMA = 0.99 ALPHA = 0.1 -N_ITRS_PER_EPISODE = 30 +PUNISH_FACTOR = 2.0 +MAX_DISTORTION = 0.7 +MIN_DISTORTION = 0.4 +SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/q_learning_all_cols_multi_state_results/distorted_set" EPS = 1.0 -EPSILON_DECAY_OPTION = EpsilonDecayOption.CONSTANT_RATE #.INVERSE_STEP +EPSILON_DECAY_OPTION = EpsilonDecayOption.CONSTANT_RATE # .INVERSE_STEP EPSILON_DECAY_FACTOR = 0.01 -MAX_DISTORTION = 0.7 -MIN_DISTORTION = 0.3 -#OUT_OF_MAX_BOUND_REWARD = -1.0 -#OUT_OF_MIN_BOUND_REWARD = -1.0 -#IN_BOUNDS_REWARD = 5.0 +USE_IDENTIFYING_COLUMNS_DIST = True +IDENTIFY_COLUMN_DIST_FACTOR = 0.1 +N_EPISODES = 1001 +N_ITRS_PER_EPISODE = 30 +OUT_OF_MAX_BOUND_REWARD = -1.0 +OUT_OF_MIN_BOUND_REWARD = -1.0 +IN_BOUNDS_REWARD = 5.0 +OUTPUT_MSG_FREQUENCY = 100 N_ROUNDS_BELOW_MIN_DISTORTION = 10 -SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/q_learning_multistate_results/distorted_set" -#REWARD_FACTOR = 0.95 -PUNISH_FACTOR = 2.0 if __name__ == '__main__': @@ -36,36 +40,82 @@ # set the seed for random engine random.seed(42) + # specify the column types. An identifying column + # will me removed from the anonymized data set + # An INSENSITIVE_ATTRIBUTE remains intact. + # A QUASI_IDENTIFYING_ATTRIBUTE is used in the anonymization + # A SENSITIVE_ATTRIBUTE currently remains intact + column_types = {"NHSno": ColumnType.IDENTIFYING_ATTRIBUTE, + "given_name": ColumnType.IDENTIFYING_ATTRIBUTE, + "surname": ColumnType.IDENTIFYING_ATTRIBUTE, + "gender": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE, + "dob": ColumnType.SENSITIVE_ATTRIBUTE, + "ethnicity": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE, + "education": ColumnType.SENSITIVE_ATTRIBUTE, + "salary": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE, + "mutation_status": ColumnType.SENSITIVE_ATTRIBUTE, + "preventative_treatment": ColumnType.SENSITIVE_ATTRIBUTE, + "diagnosis": ColumnType.INSENSITIVE_ATTRIBUTE} + + # define the action space + action_space = ActionSpace(n=10) + + # all the columns that are SENSITIVE_ATTRIBUTE will be kept as they are + # because currently we have no model + # also INSENSITIVE_ATTRIBUTE will be kept as is + # in order to declare this we use an ActionIdentity + action_space.add_many(ActionIdentity(column_name="dob"), + ActionIdentity(column_name="education"), + ActionIdentity(column_name="salary"), + ActionIdentity(column_name="diagnosis"), + ActionIdentity(column_name="mutation_status"), + ActionIdentity(column_name="preventative_treatment"), + ActionIdentity(column_name="ethnicity"), + ActionStringGeneralize(column_name="ethnicity", + generalization_table=get_ethinicity_hierarchy()), + ActionStringGeneralize(column_name="gender", + generalization_table=get_gender_hierarchy()), + ActionNumericBinGeneralize(column_name="salary", + generalization_table=get_salary_bins(ds=load_mock_subjects(), + n_states=N_STATES)) + ) + action_space.shuffle() + # load the discrete environment - env = load_discrete_env(env_type=DiscreteEnvType.MULTI_COLUMN_STATE, n_states=N_BINS, - min_distortion={"ethnicity": 0.15, "salary": 0.15, - "diagnosis": 0.0}, - max_distortion={"ethnicity": 0.35, "salary": 0.35, - "diagnosis": 0.0}, + env = load_discrete_env(env_type=DiscreteEnvType.MULTI_COLUMN_STATE, n_states=N_STATES, + min_distortion={"ethnicity": 0.133, "salary": 0.133, "gender": 0.133, + "dob": 0.0, "education": 0.0, "diagnosis": 0.0, + "mutation_status": 0.0, "preventative_treatment": 0.0, + "NHSno": 0.0, "given_name": 0.0, "surname": 0.0}, + max_distortion={"ethnicity": 0.133, "salary": 0.133, "gender": 0.133, + "dob": 0.0, "education": 0.0, "diagnosis": 0.0, + "mutation_status": 0.0, "preventative_treatment": 0.0, + "NHSno": 0.1, "given_name": 0.1, "surname": 0.1}, total_min_distortion=MIN_DISTORTION, total_max_distortion=MAX_DISTORTION, - punish_factor=PUNISH_FACTOR) - env.config.state_type = DiscreteEnvType.MULTI_COLUMN_STATE - - algo_config = QLearnConfig() - algo_config.n_itrs_per_episode = N_ITRS_PER_EPISODE - algo_config.gamma = GAMMA - algo_config.alpha = ALPHA - # algo_config.policy = SoftMaxPolicy(n_actions=len(action_space), tau=1.2) - algo_config.policy = EpsilonGreedyPolicy(eps=EPS, n_actions=env.n_actions, - decay_op=EPSILON_DECAY_OPTION, - epsilon_decay_factor=EPSILON_DECAY_FACTOR) - - # the learner we want to train - agent = QLearning(algo_config=algo_config) - - # create a trainer to train the Qlearning agent - configuration = {"n_episodes": N_EPISODES, "output_msg_frequency": OUTPUT_MSG_FREQUENCY} - trainer = Trainer(env=env, agent=agent, configuration=configuration) - - # train the agent + out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD, + out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD, + in_bounds_reward=IN_BOUNDS_REWARD, + punish_factor=PUNISH_FACTOR, + column_types=column_types, + action_space=action_space, + save_distoreted_sets_dir=SAVE_DISTORTED_SETS_DIR, + use_identifying_column_dist_in_total_dist=USE_IDENTIFYING_COLUMNS_DIST, + use_identifying_column_dist_factor=IDENTIFY_COLUMN_DIST_FACTOR, + gamma=GAMMA, + n_rounds_below_min_distortion=N_ROUNDS_BELOW_MIN_DISTORTION) + + agent_config = QLearnConfig(n_itrs_per_episode=N_ITRS_PER_EPISODE, gamma=GAMMA, + alpha=ALPHA, + policy=EpsilonGreedyPolicy(eps=EPS, n_actions=env.n_actions, + decay_op=EPSILON_DECAY_OPTION, + epsilon_decay_factor=EPSILON_DECAY_FACTOR)) + + agent = QLearning(algo_config=agent_config) + + trainer_config = TrainerConfig(n_episodes=N_EPISODES, output_msg_frequency=OUTPUT_MSG_FREQUENCY) + trainer = Trainer(env=env, agent=agent, configuration=trainer_config) trainer.train() - # avg_rewards = trainer.avg_rewards() avg_rewards = trainer.total_rewards plot_running_avg(avg_rewards, steps=100, xlabel="Episodes", ylabel="Reward", @@ -77,17 +127,3 @@ plot_running_avg(avg_episode_dist, steps=100, xlabel="Episodes", ylabel="Distortion", title="Running distortion average over 100 episodes") - - print("=============================================") - print("{0} Generating distorted dataset".format(INFO)) - - """ - # Let's play - env.reset() - - stop_criterion = IterationControl(n_itrs=10, min_dist=MIN_DISTORTION, max_dist=MAX_DISTORTION) - agent.play(env=env, stop_criterion=stop_criterion) - env.save_current_dataset(episode_index=-2, save_index=False) - """ - print("{0} Done....".format(INFO)) - print("=============================================") diff --git a/src/examples/qlearning_three_columns.py b/src/examples/qlearning_three_columns.py index 3a0dc91..68ad9de 100644 --- a/src/examples/qlearning_three_columns.py +++ b/src/examples/qlearning_three_columns.py @@ -40,6 +40,7 @@ SAVE_DISTORTED_SETS_DIR = "q_learning_three_columns_results/distorted_set" PUNISH_FACTOR = 2.0 + if __name__ == '__main__': # set the seed for random engine diff --git a/src/spaces/discrete_state_environment.py b/src/spaces/discrete_state_environment.py index 1a64ea0..3ed6c19 100644 --- a/src/spaces/discrete_state_environment.py +++ b/src/spaces/discrete_state_environment.py @@ -249,10 +249,10 @@ def get_aggregated_state(self, state_val: Any, column_name: str = None) -> Any: raise ValueError("Name {0} not in column bins names {1} ".format(column_name, list(self.column_bins.keys()))) if column_name is None: - column_dists = [(0.0, name) for name in self.column_bins] + column_dists = [(0.0, name) for name in self.column_bins if self.config.column_types[name] == ColumnType.QUASI_IDENTIFYING_ATTRIBUTE] else: - column_dists = [(self.column_distances[name], name) for name in self.column_bins] + column_dists = [(self.column_distances[name], name) for name in self.column_bins if self.config.column_types[name] == ColumnType.QUASI_IDENTIFYING_ATTRIBUTE] state = [] for distortion, name in column_dists: @@ -274,8 +274,13 @@ def initialize_column_counts(self) -> None: """ col_names = self.config.data_set.get_columns_names() - for col in col_names: - self.column_visits[col] = 0 + for name in col_names: + self.column_visits[name] = 0 + + # if it is an identifying attribute + # we have visited it + if self.config.column_types[name] == ColumnType.IDENTIFYING_ATTRIBUTE: + self.column_visits[name] = 1 def all_columns_visited(self) -> bool: """Returns true is all columns have been visited @@ -524,21 +529,83 @@ def _create_multi_column_state_bins(self) -> None: # create the column bins for name in self.column_names: - self.column_bins[name] = np.linspace(0.0, 1.0, self.config.n_states) + + # we create bins only for the QUASI_IDENTIFYING_ATTRIBUTE + # attributes + if self.config.column_types[name] == ColumnType.QUASI_IDENTIFYING_ATTRIBUTE: + self.column_bins[name] = np.linspace(0.0, 1.0, self.config.n_states) + #else: + # self.column_bins["all_the_rest"] = np.linspace(0.0, 1.0, self.config.n_states) if len(self.column_bins) == 3: self._build_three_columns() + elif len(self.column_bins) == 4: + self._build_4_columns() + elif len(self.column_bins) == 5: + self._build_5_columns() else: raise ValueError("Invalid number of columns. Cannot build the multi-column state bins") + # add the remaining columns + for name in self.column_names: + + # we create bins only for the QUASI_IDENTIFYING_ATTRIBUTE + # attributes + if self.config.column_types[name] != ColumnType.QUASI_IDENTIFYING_ATTRIBUTE: + self.column_bins[name] = np.linspace(0.0, 1.0, self.config.n_states) + def _build_three_columns(self): - name = self.column_names[0] + name = "" + for n in self.config.column_types: + if self.config.column_types[n] == ColumnType.QUASI_IDENTIFYING_ATTRIBUTE: + name = n + break + + if name == "": + raise ValueError("No QUASI_IDENTIFYING_ATTRIBUTE has been specified") + for i in range(len(self.column_bins[name])): for j in range(len(self.column_bins[name])): for k in range(len(self.column_bins[name])): self.state_space.append((i, j, k)) + def _build_4_columns(self): + + name = "" + for n in self.config.column_types: + if self.config.column_types[n] == ColumnType.QUASI_IDENTIFYING_ATTRIBUTE: + name = n + break + + if name == "": + raise ValueError("No QUASI_IDENTIFYING_ATTRIBUTE has been specified") + + for i1 in range(len(self.column_bins[name])): + for i2 in range(len(self.column_bins[name])): + for i3 in range(len(self.column_bins[name])): + for i4 in range(len(self.column_bins[name])): + self.state_space.append((i1, i2, i3, i4)) + + def _build_5_columns(self): + + name = "" + for n in self.config.column_types: + if self.config.column_types[n] == ColumnType.QUASI_IDENTIFYING_ATTRIBUTE: + name = n + break + + if name == "": + raise ValueError("No QUASI_IDENTIFYING_ATTRIBUTE has been specified") + + name = self.column_names[0] + for i1 in range(len(self.column_bins[name])): + for i2 in range(len(self.column_bins[name])): + for i3 in range(len(self.column_bins[name])): + for i4 in range(len(self.column_bins[name])): + for i5 in range(len(self.column_bins[name])): + self.state_space.append((i1, i2, i3, i4, i5)) + def _distort_identifying_attributes(self): for name in self.config.column_types: