diff --git a/README.md b/README.md
index 4e98cfc..ba04386 100644
--- a/README.md
+++ b/README.md
@@ -54,9 +54,32 @@ The images below show the overall running distortion average and running reward
## Dependencies
+The following packages are required.
+
- NumPy
+- Sphinx
+- Python Pandas
+
+You can use
+
+```
+pip install -r requirements.txt
+```
+
+## Examples
+
+- Qlearning agent on a three columns dataset
+- n-step semi-gradient SARSA on a three columns dataset
## Documentation
+You will need Sphinx in order to generate the API documentation. Assuming that Sphinx is already installed
+on your machine execute the following commands (see also Sphinx tutorial).
+
+```
+sphinx-quickstart docs
+sphinx-build -b html docs/source/ docs/build/html
+```
+
## References
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..30a9347
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,67 @@
+# RL anonymity
+
+An experimental effort to use reinforcement learning techniques for data anonymization.
+
+## Conceptual overview
+
+The term data anonymization refers to techiniques that can be applied on a given dataset, D, such that after
+the latter has been submitted to such techniques, it makes it difficult for a third party to identify or infer the existence
+of specific individuals in D. Anonymization techniques, typically result into some sort of distortion
+of the original dataset. This means that in order to maintain some utility of the transformed dataset, the transofrmations
+applied should be constrained in some sense. In the end, it can be argued, that data anonymization is an optimization problem
+meaning striking the right balance between data utility and privacy.
+
+Reinforcement learning is a learning framework based on accumulated experience. In this paradigm, an agent is learning by iteracting with an environment
+without (to a large extent) any supervision. The following image describes, schematically, the reinforcement learning framework .
+
+
+
+The agent chooses an action, ```a_t```, to perform out of predefined set of actions ```A```. The chosen action is executed by the environment
+instance and returns to the agent a reward signal, ```r_t```, as well as the new state, ```s_t```, that the enviroment is in.
+The framework has successfully been used to many recent advances in control, robotics, games and elsewhere.
+
+
+Let's assume that we have in our disposal two numbers a minimum distortion, ```MIN_DIST``` that should be applied to the dataset
+for achieving privacy and a maximum distortion, ```MAX_DIST```, that should be applied to the dataset in order to maintain some utility.
+Let's assume also that any overall dataset distortion in ```[MIN_DIST, MAX_DIST]``` is acceptable in order to cast the dataset as
+preserving privacy and preserving dataset utility. We can then train a reinforcement learning agent to distort the dataset
+such that the aforementioned objective is achieved.
+
+Overall, this is shown in the image below.
+
+
+
+The images below show the overall running distortion average and running reward average achieved by using the
+Q-learning algorithm and various policies.
+
+**Q-learning with epsilon-greedy policy and constant epsilon**
+
+
+
+**Q-learning with epsilon-greedy policy and decaying epsilon per episode**
+
+
+
+
+**Q-learning with epsilon-greedy policy with decaying epsilon at constant rate**
+
+
+
+**Q-learning with softmax policy running average distorion**
+
+
+
+
+## Dependencies
+
+- NumPy
+
+## Examples
+
+- Qlearning agent on a three columns dataset
+- n-step semi-gradient SARSA on a three columns dataset
+
+## Documentation
+
+## References
+
diff --git a/src/algorithms/sarsa_semi_gradient.py b/src/algorithms/sarsa_semi_gradient.py
index e1ddea8..768efa2 100644
--- a/src/algorithms/sarsa_semi_gradient.py
+++ b/src/algorithms/sarsa_semi_gradient.py
@@ -7,6 +7,7 @@
from typing import TypeVar
from src.utils.mixins import WithMaxActionMixin, WithQTableMixinBase
+from src.utils.episode_info import EpisodeInfo
from src.algorithms.q_estimator import QEstimator
from src.exceptions.exceptions import InvalidParamValue
@@ -75,7 +76,7 @@ def actions_before_episode_begins(self, **options) -> None:
# reset the estimator
self.config.estimator.reset(self.config.reset_estimator_z_traces)
- def on_episode(self, env: Env) -> tuple:
+ def on_episode(self, env: Env) -> EpisodeInfo:
"""
Train the agent on the given algorithm
:param env:
@@ -86,28 +87,27 @@ def on_episode(self, env: Env) -> tuple:
time_step = env.reset()
state = time_step.observation
+ action_idx = self.config.policy(self.q_table, state)
+ action = env.get_action(action_idx)
+
# vars to measure performance
episode_score = 0
counter = 0
total_distortion = 0
T = float('inf')
- actions = []
- rewards = []
+ actions = [action_idx]
+ rewards = [0.0]
+ states = [state]
for itr in range(self.config.n_itrs_per_episode):
if itr < T:
- # select an action using the current
- # policy
- action_idx = self.config.policy(self.q_table, state)
-
- action = env.get_action(action_idx)
- actions.append(action)
# take action A, observe R, S'
next_time_step = env.step(action)
next_state = next_time_step.observation
reward = next_time_step.reward
+ total_distortion += next_time_step.info["total_distortion"]
episode_score += reward
rewards.append(reward)
@@ -115,9 +115,10 @@ def on_episode(self, env: Env) -> tuple:
T = itr + 1
else:
- # take the next step
- pass
- """
+ next_action_idx = self.config.policy(self.q_table, next_state)
+ next_action = env.get_action(next_action_idx)
+ actions.append(next_action)
+
# should we update
update_time = itr + 1 - self.config.n
if update_time >= 0:
@@ -137,6 +138,14 @@ def on_episode(self, env: Env) -> tuple:
if update_time == T - 1:
break
+ counter += 1
state = next_state
action = next_action
- """
+
+
+ episode_info = EpisodeInfo()
+ episode_info.episode_score = episode_score
+ episode_info.total_distortion = total_distortion
+ episode_info.info["m_iterations"] = counter
+ return episode_info
+
diff --git a/src/examples/nstep_semi_grad_sarsa_three_columns.py b/src/examples/nstep_semi_grad_sarsa_three_columns.py
new file mode 100644
index 0000000..57fab54
--- /dev/null
+++ b/src/examples/nstep_semi_grad_sarsa_three_columns.py
@@ -0,0 +1,207 @@
+import random
+import numpy as np
+from pathlib import Path
+
+from src.algorithms.sarsa_semi_gradient import SARSAnConfig, SARSAn
+from src.algorithms.trainer import Trainer
+from src.datasets.datasets_loaders import MockSubjectsLoader
+from src.spaces.action_space import ActionSpace
+from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize
+from src.utils.reward_manager import RewardManager
+from src.utils.serial_hierarchy import SerialHierarchy
+from src.policies.epsilon_greedy_policy import EpsilonGreedyPolicy, EpsilonDecayOption
+from src.policies.softmax_policy import SoftMaxPolicy
+from src.utils.numeric_distance_type import NumericDistanceType
+from src.utils.string_distance_calculator import StringDistanceType
+from src.utils.distortion_calculator import DistortionCalculationType, DistortionCalculator
+from src.spaces.discrete_state_environment import DiscreteStateEnvironment, DiscreteEnvConfig
+from src.spaces.tiled_environment import TiledEnv
+from src.utils.iteration_control import IterationControl
+from src.utils.plot_utils import plot_running_avg
+from src.utils import INFO
+
+# configuration params
+EPS = 1.0
+EPSILON_DECAY_OPTION = EpsilonDecayOption.CONSTANT_RATE #.INVERSE_STEP
+EPSILON_DECAY_FACTOR = 0.01
+GAMMA = 0.99
+ALPHA = 0.1
+N_EPISODES = 1001
+N_ITRS_PER_EPISODE = 30
+N_STATES = 10
+REWARD_FACTOR = 0.95
+PUNISH_FACTOR = 2.0
+
+
+# fix the rewards. Assume that any average distortion in
+# (0.4, 0.7) suits us
+MAX_DISTORTION = 0.7
+MIN_DISTORTION = 0.3
+OUT_OF_MAX_BOUND_REWARD = -1.0
+OUT_OF_MIN_BOUND_REWARD = -1.0
+IN_BOUNDS_REWARD = 5.0
+OUTPUT_MSG_FREQUENCY = 100
+N_ROUNDS_BELOW_MIN_DISTORTION = 10
+SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/nstep_semi_grad_sarsa_learn_distorted_sets/distorted_set"
+
+
+def get_ethinicity_hierarchy():
+ ethnicity_hierarchy = SerialHierarchy(values={})
+
+ ethnicity_hierarchy["Mixed White/Asian"] = "White/Asian"
+ ethnicity_hierarchy["White/Asian"] = "Mixed"
+
+ ethnicity_hierarchy["Chinese"] = "Asian"
+ ethnicity_hierarchy["Indian"] = "Asian"
+ ethnicity_hierarchy["Mixed White/Black African"] = "White/Black"
+ ethnicity_hierarchy["White/Black"] = "Mixed"
+
+ ethnicity_hierarchy["Black African"] = "African"
+ ethnicity_hierarchy["African"] = "Black"
+ ethnicity_hierarchy["Asian other"] = "Asian"
+ ethnicity_hierarchy["Black other"] = "Black"
+ ethnicity_hierarchy["Mixed White/Black Caribbean"] = "White/Black"
+ ethnicity_hierarchy["White/Black"] = "Mixed"
+
+ ethnicity_hierarchy["Mixed other"] = "Mixed"
+ ethnicity_hierarchy["Arab"] = "Asian"
+ ethnicity_hierarchy["White Irish"] = "Irish"
+ ethnicity_hierarchy["Irish"] = "European"
+ ethnicity_hierarchy["Not stated"] = "Not stated"
+ ethnicity_hierarchy["White Gypsy/Traveller"] = "White"
+ ethnicity_hierarchy["White British"] = "British"
+ ethnicity_hierarchy["British"] = "European"
+ ethnicity_hierarchy["Bangladeshi"] = "Asian"
+ ethnicity_hierarchy["White other"] = "White"
+ ethnicity_hierarchy["Black Caribbean"] = "Caribbean"
+ ethnicity_hierarchy["Caribbean"] = "Black"
+ ethnicity_hierarchy["Pakistani"] = "Asian"
+
+ ethnicity_hierarchy["European"] = "European"
+ ethnicity_hierarchy["Mixed"] = "Mixed"
+ ethnicity_hierarchy["Asian"] = "Asian"
+ ethnicity_hierarchy["Black"] = "Black"
+ ethnicity_hierarchy["White"] = "White"
+ return ethnicity_hierarchy
+
+
+def load_dataset() -> MockSubjectsLoader:
+ # specify the columns to drop
+ drop_columns = MockSubjectsLoader.FEATURES_DROP_NAMES + ["preventative_treatment", "gender",
+ "education", "mutation_status"]
+ MockSubjectsLoader.FEATURES_DROP_NAMES = drop_columns
+
+ # do a salary normalization so that we work with
+ # salaries in [0, 1] this is needed as we will
+ # be using normalized distances
+ MockSubjectsLoader.NORMALIZED_COLUMNS = ["salary"]
+
+ # specify the columns to use
+ MockSubjectsLoader.COLUMNS_TYPES = {"ethnicity": str, "salary": float, "diagnosis": int}
+ ds = MockSubjectsLoader()
+
+ assert ds.n_columns == 3, "Invalid number of columns {0} not equal to 3".format(ds.n_columns)
+
+ return ds
+
+if __name__ == '__main__':
+
+ # set the seed for random engine
+ random.seed(42)
+
+ ds = load_dataset()
+
+ # create bins for the salary generalization
+ unique_salary = ds.get_column_unique_values(col_name="salary")
+ unique_salary.sort()
+
+ # modify slightly the max value because
+ # we get out of bounds for the maximum salary
+ bins = np.linspace(unique_salary[0], unique_salary[-1] + 1, N_STATES)
+
+ # establish the action space. For every column
+ # we assume three actions except for the ```diagnosis```
+ # which we do not alter
+ action_space = ActionSpace(n=5)
+ action_space.add_many(ActionIdentity(column_name="ethnicity"),
+ ActionStringGeneralize(column_name="ethnicity",
+ generalization_table=get_ethinicity_hierarchy()),
+ ActionIdentity(column_name="salary"),
+ ActionNumericBinGeneralize(column_name="salary", generalization_table=bins),
+ ActionIdentity(column_name="diagnosis"))
+
+ action_space.shuffle()
+
+ env_config = DiscreteEnvConfig()
+
+ env_config.action_space = action_space
+ env_config.reward_manager = RewardManager(bounds=(MIN_DISTORTION, MAX_DISTORTION),
+ out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD,
+ out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD,
+ in_bounds_reward=IN_BOUNDS_REWARD)
+ env_config.data_set = ds
+ env_config.gamma = GAMMA
+ env_config.max_distortion = MAX_DISTORTION
+ env_config.min_distortion = MIN_DISTORTION
+ env_config.n_states = N_STATES
+ env_config.n_rounds_below_min_distortion = N_ROUNDS_BELOW_MIN_DISTORTION
+ env_config.distorted_set_path = Path(SAVE_DISTORTED_SETS_DIR)
+ env_config.distortion_calculator = DistortionCalculator(
+ numeric_column_distortion_metric_type=NumericDistanceType.L2_AVG,
+ string_column_distortion_metric_type=StringDistanceType.COSINE_NORMALIZE,
+ dataset_distortion_type=DistortionCalculationType.SUM)
+ env_config.reward_factor = REWARD_FACTOR
+ env_config.punish_factor = PUNISH_FACTOR
+
+ # create the environment
+ env = DiscreteStateEnvironment(env_config=env_config)
+
+ # we will use a tiled environment in this example
+ tiled_env = TiledEnv()
+ env.reset()
+
+ # save the data before distortion so that we can
+ # later load it on ARX
+ env.save_current_dataset(episode_index=-1, save_index=False)
+
+ # configuration for the Q-learner
+ algo_config = QLearnConfig()
+ algo_config.n_itrs_per_episode = N_ITRS_PER_EPISODE
+ algo_config.gamma = GAMMA
+ algo_config.alpha = ALPHA
+ #algo_config.policy = SoftMaxPolicy(n_actions=len(action_space), tau=1.2)
+ algo_config.policy = EpsilonGreedyPolicy(eps=EPS, env=env,decay_op=EPSILON_DECAY_OPTION,
+ epsilon_decay_factor=EPSILON_DECAY_FACTOR)
+
+ # the learner we want to train
+ agent = QLearning(algo_config=algo_config)
+
+ configuration = {"n_episodes": N_EPISODES, "output_msg_frequency": OUTPUT_MSG_FREQUENCY}
+
+ # create a trainer to train the Qlearning agent
+ trainer = Trainer(env=env, agent=agent, configuration=configuration)
+ trainer.train()
+
+ # avg_rewards = trainer.avg_rewards()
+ avg_rewards = trainer.total_rewards
+ plot_running_avg(avg_rewards, steps=100,
+ xlabel="Episodes", ylabel="Reward",
+ title="Running reward average over 100 episodes")
+
+ avg_episode_dist = np.array(trainer.total_distortions)
+ print("{0} Max/Min distortion {1}/{2}".format(INFO, np.max(avg_episode_dist), np.min(avg_episode_dist)))
+
+ plot_running_avg(avg_episode_dist, steps=100,
+ xlabel="Episodes", ylabel="Distortion",
+ title="Running distortion average over 100 episodes")
+
+ print("=============================================")
+ print("{0} Generating distorted dataset".format(INFO))
+ # Let's play
+ env.reset()
+
+ stop_criterion = IterationControl(n_itrs=10, min_dist=MIN_DISTORTION, max_dist=MAX_DISTORTION)
+ agent.play(env=env, stop_criterion=stop_criterion)
+ env.save_current_dataset(episode_index=-2, save_index=False)
+ print("{0} Done....".format(INFO))
+ print("=============================================")
\ No newline at end of file
diff --git a/src/examples/qlearning_three_columns.py b/src/examples/qlearning_three_columns.py
index a4253f0..29e13a7 100644
--- a/src/examples/qlearning_three_columns.py
+++ b/src/examples/qlearning_three_columns.py
@@ -27,50 +27,6 @@
def get_ethinicity_hierarchy():
ethnicity_hierarchy = SerialHierarchy(values={})
- """
- ethnicity_hierarchy.add("Mixed White/Asian", "White/Asian")
- ethnicity_hierarchy.add("White/Asian", "White")
-
- ethnicity_hierarchy.add("Chinese", "Asian")
- ethnicity_hierarchy.add("Indian", "Asian")
-
- ethnicity_hierarchy.add("Mixed White/Black African", "African-Mixed")
- ethnicity_hierarchy.add("African-Mixed", "Mixed")
-
- ethnicity_hierarchy.add("Black African", "African")
- ethnicity_hierarchy.add("African", "African")
-
- ethnicity_hierarchy.add("Asian other", "Asian")
- ethnicity_hierarchy.add("Black other", "Black")
-
- ethnicity_hierarchy.add("Mixed White/Black Caribbean", "Caribbean-Mixed")
- ethnicity_hierarchy.add("Caribbean-Mixed", "Mixed")
-
- ethnicity_hierarchy.add("Mixed other", "Mixed")
- ethnicity_hierarchy.add("Arab", "Asian")
-
- ethnicity_hierarchy.add("White Irish", "European-White")
- ethnicity_hierarchy.add("European-White", "European")
-
- ethnicity_hierarchy.add("Not stated", "Not stated")
- ethnicity_hierarchy.add("White Gypsy/Traveller", "White")
-
- ethnicity_hierarchy.add("White British", "British")
- ethnicity_hierarchy.add("British", "European")
-
- ethnicity_hierarchy.add("Bangladeshi", "Asian")
- ethnicity_hierarchy.add("White other", "White")
- ethnicity_hierarchy.add("Black Caribbean", "Black")
- ethnicity_hierarchy.add("Pakistani", "Asian")
-
- ethnicity_hierarchy.add("White", "White")
- ethnicity_hierarchy.add("Mixed", "Mixed")
- ethnicity_hierarchy.add("European", "European")
- ethnicity_hierarchy.add("Asian", "Asian")
- ethnicity_hierarchy.add("Black", "Black")
- ethnicity_hierarchy.add("Not stated", "Not stated")
- """
-
ethnicity_hierarchy["Mixed White/Asian"] = "White/Asian"
ethnicity_hierarchy["White/Asian"] = "Mixed"
@@ -109,6 +65,8 @@ def get_ethinicity_hierarchy():
if __name__ == '__main__':
+
+ # set the seed for random engine
random.seed(42)
# configuration params
diff --git a/src/utils/episode_info.py b/src/utils/episode_info.py
new file mode 100644
index 0000000..ef20be7
--- /dev/null
+++ b/src/utils/episode_info.py
@@ -0,0 +1,12 @@
+"""
+EpisodeInfo class. This is a helper class
+to wrap the output after an episode has finished
+"""
+
+
+class EpisodeInfo(object):
+
+ def __init__(self):
+ self.episode_score = None
+ self.total_distortion = None
+ self.info = {}
\ No newline at end of file