diff --git a/README.md b/README.md index 4e98cfc..ba04386 100644 --- a/README.md +++ b/README.md @@ -54,9 +54,32 @@ The images below show the overall running distortion average and running reward ## Dependencies +The following packages are required. + - NumPy +- Sphinx +- Python Pandas + +You can use + +``` +pip install -r requirements.txt +``` + +## Examples + +- Qlearning agent on a three columns dataset +- n-step semi-gradient SARSA on a three columns dataset ## Documentation +You will need Sphinx in order to generate the API documentation. Assuming that Sphinx is already installed +on your machine execute the following commands (see also Sphinx tutorial). + +``` +sphinx-quickstart docs +sphinx-build -b html docs/source/ docs/build/html +``` + ## References diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..30a9347 --- /dev/null +++ b/README.rst @@ -0,0 +1,67 @@ +# RL anonymity + +An experimental effort to use reinforcement learning techniques for data anonymization. + +## Conceptual overview + +The term data anonymization refers to techiniques that can be applied on a given dataset, D, such that after +the latter has been submitted to such techniques, it makes it difficult for a third party to identify or infer the existence +of specific individuals in D. Anonymization techniques, typically result into some sort of distortion +of the original dataset. This means that in order to maintain some utility of the transformed dataset, the transofrmations +applied should be constrained in some sense. In the end, it can be argued, that data anonymization is an optimization problem +meaning striking the right balance between data utility and privacy. + +Reinforcement learning is a learning framework based on accumulated experience. In this paradigm, an agent is learning by iteracting with an environment +without (to a large extent) any supervision. The following image describes, schematically, the reinforcement learning framework . + +![RL paradigm](images/agent_environment_interface.png "Reinforcement learning paradigm") + +The agent chooses an action, ```a_t```, to perform out of predefined set of actions ```A```. The chosen action is executed by the environment +instance and returns to the agent a reward signal, ```r_t```, as well as the new state, ```s_t```, that the enviroment is in. +The framework has successfully been used to many recent advances in control, robotics, games and elsewhere. + + +Let's assume that we have in our disposal two numbers a minimum distortion, ```MIN_DIST``` that should be applied to the dataset +for achieving privacy and a maximum distortion, ```MAX_DIST```, that should be applied to the dataset in order to maintain some utility. +Let's assume also that any overall dataset distortion in ```[MIN_DIST, MAX_DIST]``` is acceptable in order to cast the dataset as +preserving privacy and preserving dataset utility. We can then train a reinforcement learning agent to distort the dataset +such that the aforementioned objective is achieved. + +Overall, this is shown in the image below. + +![RL anonymity paradigm](images/general_concept.png "Reinforcement learning anonymity schematics") + +The images below show the overall running distortion average and running reward average achieved by using the +Q-learning algorithm and various policies. + +**Q-learning with epsilon-greedy policy and constant epsilon** +![RL anonymity paradigm](images/q_learn_epsilon_greedy_avg_run_distortion.png "Epsilon-greedy constant epsilon ") +![RL anonymity paradigm](images/q_learn_epsilon_greedy_avg_run_reward.png "Reinforcement learning anonymity schematics") + +**Q-learning with epsilon-greedy policy and decaying epsilon per episode** +![RL anonymity paradigm](images/q_learn_epsilon_greedy_decay_avg_run_distortion.png "Reinforcement learning anonymity schematics") +![RL anonymity paradigm](images/q_learn_epsilon_greedy_decay_avg_run_reward.png "Reinforcement learning anonymity schematics") + + +**Q-learning with epsilon-greedy policy with decaying epsilon at constant rate** +![RL anonymity paradigm](images/q_learn_epsilon_greedy_decay_rate_avg_run_distortion.png "Reinforcement learning anonymity schematics") +![RL anonymity paradigm](images/q_learn_epsilon_greedy_decay_rate_avg_run_reward.png "Reinforcement learning anonymity schematics") + +**Q-learning with softmax policy running average distorion** +![RL anonymity paradigm](images/q_learn_softmax_avg_run_distortion.png "Reinforcement learning anonymity schematics") +![RL anonymity paradigm](images/q_learn_softmax_avg_run_reward.png "Reinforcement learning anonymity schematics") + + +## Dependencies + +- NumPy + +## Examples + +- Qlearning agent on a three columns dataset +- n-step semi-gradient SARSA on a three columns dataset + +## Documentation + +## References + diff --git a/src/algorithms/sarsa_semi_gradient.py b/src/algorithms/sarsa_semi_gradient.py index e1ddea8..768efa2 100644 --- a/src/algorithms/sarsa_semi_gradient.py +++ b/src/algorithms/sarsa_semi_gradient.py @@ -7,6 +7,7 @@ from typing import TypeVar from src.utils.mixins import WithMaxActionMixin, WithQTableMixinBase +from src.utils.episode_info import EpisodeInfo from src.algorithms.q_estimator import QEstimator from src.exceptions.exceptions import InvalidParamValue @@ -75,7 +76,7 @@ def actions_before_episode_begins(self, **options) -> None: # reset the estimator self.config.estimator.reset(self.config.reset_estimator_z_traces) - def on_episode(self, env: Env) -> tuple: + def on_episode(self, env: Env) -> EpisodeInfo: """ Train the agent on the given algorithm :param env: @@ -86,28 +87,27 @@ def on_episode(self, env: Env) -> tuple: time_step = env.reset() state = time_step.observation + action_idx = self.config.policy(self.q_table, state) + action = env.get_action(action_idx) + # vars to measure performance episode_score = 0 counter = 0 total_distortion = 0 T = float('inf') - actions = [] - rewards = [] + actions = [action_idx] + rewards = [0.0] + states = [state] for itr in range(self.config.n_itrs_per_episode): if itr < T: - # select an action using the current - # policy - action_idx = self.config.policy(self.q_table, state) - - action = env.get_action(action_idx) - actions.append(action) # take action A, observe R, S' next_time_step = env.step(action) next_state = next_time_step.observation reward = next_time_step.reward + total_distortion += next_time_step.info["total_distortion"] episode_score += reward rewards.append(reward) @@ -115,9 +115,10 @@ def on_episode(self, env: Env) -> tuple: T = itr + 1 else: - # take the next step - pass - """ + next_action_idx = self.config.policy(self.q_table, next_state) + next_action = env.get_action(next_action_idx) + actions.append(next_action) + # should we update update_time = itr + 1 - self.config.n if update_time >= 0: @@ -137,6 +138,14 @@ def on_episode(self, env: Env) -> tuple: if update_time == T - 1: break + counter += 1 state = next_state action = next_action - """ + + + episode_info = EpisodeInfo() + episode_info.episode_score = episode_score + episode_info.total_distortion = total_distortion + episode_info.info["m_iterations"] = counter + return episode_info + diff --git a/src/examples/nstep_semi_grad_sarsa_three_columns.py b/src/examples/nstep_semi_grad_sarsa_three_columns.py new file mode 100644 index 0000000..57fab54 --- /dev/null +++ b/src/examples/nstep_semi_grad_sarsa_three_columns.py @@ -0,0 +1,207 @@ +import random +import numpy as np +from pathlib import Path + +from src.algorithms.sarsa_semi_gradient import SARSAnConfig, SARSAn +from src.algorithms.trainer import Trainer +from src.datasets.datasets_loaders import MockSubjectsLoader +from src.spaces.action_space import ActionSpace +from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize +from src.utils.reward_manager import RewardManager +from src.utils.serial_hierarchy import SerialHierarchy +from src.policies.epsilon_greedy_policy import EpsilonGreedyPolicy, EpsilonDecayOption +from src.policies.softmax_policy import SoftMaxPolicy +from src.utils.numeric_distance_type import NumericDistanceType +from src.utils.string_distance_calculator import StringDistanceType +from src.utils.distortion_calculator import DistortionCalculationType, DistortionCalculator +from src.spaces.discrete_state_environment import DiscreteStateEnvironment, DiscreteEnvConfig +from src.spaces.tiled_environment import TiledEnv +from src.utils.iteration_control import IterationControl +from src.utils.plot_utils import plot_running_avg +from src.utils import INFO + +# configuration params +EPS = 1.0 +EPSILON_DECAY_OPTION = EpsilonDecayOption.CONSTANT_RATE #.INVERSE_STEP +EPSILON_DECAY_FACTOR = 0.01 +GAMMA = 0.99 +ALPHA = 0.1 +N_EPISODES = 1001 +N_ITRS_PER_EPISODE = 30 +N_STATES = 10 +REWARD_FACTOR = 0.95 +PUNISH_FACTOR = 2.0 + + +# fix the rewards. Assume that any average distortion in +# (0.4, 0.7) suits us +MAX_DISTORTION = 0.7 +MIN_DISTORTION = 0.3 +OUT_OF_MAX_BOUND_REWARD = -1.0 +OUT_OF_MIN_BOUND_REWARD = -1.0 +IN_BOUNDS_REWARD = 5.0 +OUTPUT_MSG_FREQUENCY = 100 +N_ROUNDS_BELOW_MIN_DISTORTION = 10 +SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/nstep_semi_grad_sarsa_learn_distorted_sets/distorted_set" + + +def get_ethinicity_hierarchy(): + ethnicity_hierarchy = SerialHierarchy(values={}) + + ethnicity_hierarchy["Mixed White/Asian"] = "White/Asian" + ethnicity_hierarchy["White/Asian"] = "Mixed" + + ethnicity_hierarchy["Chinese"] = "Asian" + ethnicity_hierarchy["Indian"] = "Asian" + ethnicity_hierarchy["Mixed White/Black African"] = "White/Black" + ethnicity_hierarchy["White/Black"] = "Mixed" + + ethnicity_hierarchy["Black African"] = "African" + ethnicity_hierarchy["African"] = "Black" + ethnicity_hierarchy["Asian other"] = "Asian" + ethnicity_hierarchy["Black other"] = "Black" + ethnicity_hierarchy["Mixed White/Black Caribbean"] = "White/Black" + ethnicity_hierarchy["White/Black"] = "Mixed" + + ethnicity_hierarchy["Mixed other"] = "Mixed" + ethnicity_hierarchy["Arab"] = "Asian" + ethnicity_hierarchy["White Irish"] = "Irish" + ethnicity_hierarchy["Irish"] = "European" + ethnicity_hierarchy["Not stated"] = "Not stated" + ethnicity_hierarchy["White Gypsy/Traveller"] = "White" + ethnicity_hierarchy["White British"] = "British" + ethnicity_hierarchy["British"] = "European" + ethnicity_hierarchy["Bangladeshi"] = "Asian" + ethnicity_hierarchy["White other"] = "White" + ethnicity_hierarchy["Black Caribbean"] = "Caribbean" + ethnicity_hierarchy["Caribbean"] = "Black" + ethnicity_hierarchy["Pakistani"] = "Asian" + + ethnicity_hierarchy["European"] = "European" + ethnicity_hierarchy["Mixed"] = "Mixed" + ethnicity_hierarchy["Asian"] = "Asian" + ethnicity_hierarchy["Black"] = "Black" + ethnicity_hierarchy["White"] = "White" + return ethnicity_hierarchy + + +def load_dataset() -> MockSubjectsLoader: + # specify the columns to drop + drop_columns = MockSubjectsLoader.FEATURES_DROP_NAMES + ["preventative_treatment", "gender", + "education", "mutation_status"] + MockSubjectsLoader.FEATURES_DROP_NAMES = drop_columns + + # do a salary normalization so that we work with + # salaries in [0, 1] this is needed as we will + # be using normalized distances + MockSubjectsLoader.NORMALIZED_COLUMNS = ["salary"] + + # specify the columns to use + MockSubjectsLoader.COLUMNS_TYPES = {"ethnicity": str, "salary": float, "diagnosis": int} + ds = MockSubjectsLoader() + + assert ds.n_columns == 3, "Invalid number of columns {0} not equal to 3".format(ds.n_columns) + + return ds + +if __name__ == '__main__': + + # set the seed for random engine + random.seed(42) + + ds = load_dataset() + + # create bins for the salary generalization + unique_salary = ds.get_column_unique_values(col_name="salary") + unique_salary.sort() + + # modify slightly the max value because + # we get out of bounds for the maximum salary + bins = np.linspace(unique_salary[0], unique_salary[-1] + 1, N_STATES) + + # establish the action space. For every column + # we assume three actions except for the ```diagnosis``` + # which we do not alter + action_space = ActionSpace(n=5) + action_space.add_many(ActionIdentity(column_name="ethnicity"), + ActionStringGeneralize(column_name="ethnicity", + generalization_table=get_ethinicity_hierarchy()), + ActionIdentity(column_name="salary"), + ActionNumericBinGeneralize(column_name="salary", generalization_table=bins), + ActionIdentity(column_name="diagnosis")) + + action_space.shuffle() + + env_config = DiscreteEnvConfig() + + env_config.action_space = action_space + env_config.reward_manager = RewardManager(bounds=(MIN_DISTORTION, MAX_DISTORTION), + out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD, + out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD, + in_bounds_reward=IN_BOUNDS_REWARD) + env_config.data_set = ds + env_config.gamma = GAMMA + env_config.max_distortion = MAX_DISTORTION + env_config.min_distortion = MIN_DISTORTION + env_config.n_states = N_STATES + env_config.n_rounds_below_min_distortion = N_ROUNDS_BELOW_MIN_DISTORTION + env_config.distorted_set_path = Path(SAVE_DISTORTED_SETS_DIR) + env_config.distortion_calculator = DistortionCalculator( + numeric_column_distortion_metric_type=NumericDistanceType.L2_AVG, + string_column_distortion_metric_type=StringDistanceType.COSINE_NORMALIZE, + dataset_distortion_type=DistortionCalculationType.SUM) + env_config.reward_factor = REWARD_FACTOR + env_config.punish_factor = PUNISH_FACTOR + + # create the environment + env = DiscreteStateEnvironment(env_config=env_config) + + # we will use a tiled environment in this example + tiled_env = TiledEnv() + env.reset() + + # save the data before distortion so that we can + # later load it on ARX + env.save_current_dataset(episode_index=-1, save_index=False) + + # configuration for the Q-learner + algo_config = QLearnConfig() + algo_config.n_itrs_per_episode = N_ITRS_PER_EPISODE + algo_config.gamma = GAMMA + algo_config.alpha = ALPHA + #algo_config.policy = SoftMaxPolicy(n_actions=len(action_space), tau=1.2) + algo_config.policy = EpsilonGreedyPolicy(eps=EPS, env=env,decay_op=EPSILON_DECAY_OPTION, + epsilon_decay_factor=EPSILON_DECAY_FACTOR) + + # the learner we want to train + agent = QLearning(algo_config=algo_config) + + configuration = {"n_episodes": N_EPISODES, "output_msg_frequency": OUTPUT_MSG_FREQUENCY} + + # create a trainer to train the Qlearning agent + trainer = Trainer(env=env, agent=agent, configuration=configuration) + trainer.train() + + # avg_rewards = trainer.avg_rewards() + avg_rewards = trainer.total_rewards + plot_running_avg(avg_rewards, steps=100, + xlabel="Episodes", ylabel="Reward", + title="Running reward average over 100 episodes") + + avg_episode_dist = np.array(trainer.total_distortions) + print("{0} Max/Min distortion {1}/{2}".format(INFO, np.max(avg_episode_dist), np.min(avg_episode_dist))) + + plot_running_avg(avg_episode_dist, steps=100, + xlabel="Episodes", ylabel="Distortion", + title="Running distortion average over 100 episodes") + + print("=============================================") + print("{0} Generating distorted dataset".format(INFO)) + # Let's play + env.reset() + + stop_criterion = IterationControl(n_itrs=10, min_dist=MIN_DISTORTION, max_dist=MAX_DISTORTION) + agent.play(env=env, stop_criterion=stop_criterion) + env.save_current_dataset(episode_index=-2, save_index=False) + print("{0} Done....".format(INFO)) + print("=============================================") \ No newline at end of file diff --git a/src/examples/qlearning_three_columns.py b/src/examples/qlearning_three_columns.py index a4253f0..29e13a7 100644 --- a/src/examples/qlearning_three_columns.py +++ b/src/examples/qlearning_three_columns.py @@ -27,50 +27,6 @@ def get_ethinicity_hierarchy(): ethnicity_hierarchy = SerialHierarchy(values={}) - """ - ethnicity_hierarchy.add("Mixed White/Asian", "White/Asian") - ethnicity_hierarchy.add("White/Asian", "White") - - ethnicity_hierarchy.add("Chinese", "Asian") - ethnicity_hierarchy.add("Indian", "Asian") - - ethnicity_hierarchy.add("Mixed White/Black African", "African-Mixed") - ethnicity_hierarchy.add("African-Mixed", "Mixed") - - ethnicity_hierarchy.add("Black African", "African") - ethnicity_hierarchy.add("African", "African") - - ethnicity_hierarchy.add("Asian other", "Asian") - ethnicity_hierarchy.add("Black other", "Black") - - ethnicity_hierarchy.add("Mixed White/Black Caribbean", "Caribbean-Mixed") - ethnicity_hierarchy.add("Caribbean-Mixed", "Mixed") - - ethnicity_hierarchy.add("Mixed other", "Mixed") - ethnicity_hierarchy.add("Arab", "Asian") - - ethnicity_hierarchy.add("White Irish", "European-White") - ethnicity_hierarchy.add("European-White", "European") - - ethnicity_hierarchy.add("Not stated", "Not stated") - ethnicity_hierarchy.add("White Gypsy/Traveller", "White") - - ethnicity_hierarchy.add("White British", "British") - ethnicity_hierarchy.add("British", "European") - - ethnicity_hierarchy.add("Bangladeshi", "Asian") - ethnicity_hierarchy.add("White other", "White") - ethnicity_hierarchy.add("Black Caribbean", "Black") - ethnicity_hierarchy.add("Pakistani", "Asian") - - ethnicity_hierarchy.add("White", "White") - ethnicity_hierarchy.add("Mixed", "Mixed") - ethnicity_hierarchy.add("European", "European") - ethnicity_hierarchy.add("Asian", "Asian") - ethnicity_hierarchy.add("Black", "Black") - ethnicity_hierarchy.add("Not stated", "Not stated") - """ - ethnicity_hierarchy["Mixed White/Asian"] = "White/Asian" ethnicity_hierarchy["White/Asian"] = "Mixed" @@ -109,6 +65,8 @@ def get_ethinicity_hierarchy(): if __name__ == '__main__': + + # set the seed for random engine random.seed(42) # configuration params diff --git a/src/utils/episode_info.py b/src/utils/episode_info.py new file mode 100644 index 0000000..ef20be7 --- /dev/null +++ b/src/utils/episode_info.py @@ -0,0 +1,12 @@ +""" +EpisodeInfo class. This is a helper class +to wrap the output after an episode has finished +""" + + +class EpisodeInfo(object): + + def __init__(self): + self.episode_score = None + self.total_distortion = None + self.info = {} \ No newline at end of file