pockerman · pockerman · Apr 23, 2022 · Apr 23, 2022 · Apr 23, 2022 · Apr 23, 2022
diff --git a/docs/source/Examples/images/qlearn_distortion_multi_cols.png b/docs/source/Examples/images/qlearn_distortion_multi_cols.png
diff --git a/docs/source/Examples/images/qlearn_distortion_multi_cols_multi_state.png b/docs/source/Examples/images/qlearn_distortion_multi_cols_multi_state.png
diff --git a/docs/source/Examples/images/qlearn_rewards_all_cols.png b/docs/source/Examples/images/qlearn_rewards_all_cols.png
diff --git a/docs/source/Examples/images/qlearn_rewards_all_cols_multi_state.png b/docs/source/Examples/images/qlearn_rewards_all_cols_multi_state.png
diff --git a/docs/source/Examples/qlearning_all_columns.rst b/docs/source/Examples/qlearning_all_columns.rst
@@ -0,0 +1,162 @@
+Q-learning with many columns 
+=============================
+
+Overview
+--------
+
+In the `previous <qlearning_three_columns.html>`_ example, we applied Q-learning on a dataset consisting
+of three columns. Moreover, we used a one dimensional state space; we discretized the range :math:`[0,1]` into bins and used the
+resulting bin index as the state index. In this example, we will simply allow for more columns in the data set. 
+Other than that, this example is the same as the previous one. 
+
+Code
+----
+
+The necessary imports
+
+.. code-block::
+
+	import random
+	import numpy as np
+
+	from src.examples.helpers.load_full_mock_dataset import load_discrete_env, get_ethinicity_hierarchy, \
+	    get_gender_hierarchy, get_salary_bins, load_mock_subjects
+	from src.datasets import ColumnType
+	from src.spaces.env_type import DiscreteEnvType
+	from src.spaces.action_space import ActionSpace
+	from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize
+	from src.algorithms.q_learning import QLearnConfig, QLearning
+	from src.policies.epsilon_greedy_policy import EpsilonGreedyPolicy, EpsilonDecayOption
+	from src.trainers.trainer import Trainer, TrainerConfig
+	from src.examples.helpers.plot_utils import plot_running_avg
+	from src.utils import INFO
+
+Next establish a set of configuration parameters
+
+.. code-block::
+
+	# configuration params
+	N_STATES = 10
+	GAMMA = 0.99
+	ALPHA = 0.1
+	PUNISH_FACTOR = 2.0
+	MAX_DISTORTION = 0.7
+	MIN_DISTORTION = 0.4
+	SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/q_learning_all_cols_results/distorted_set"
+	EPS = 1.0
+	EPSILON_DECAY_OPTION = EpsilonDecayOption.CONSTANT_RATE  # .INVERSE_STEP
+	EPSILON_DECAY_FACTOR = 0.01
+	USE_IDENTIFYING_COLUMNS_DIST = True
+	IDENTIFY_COLUMN_DIST_FACTOR = 0.1
+	N_EPISODES = 1001
+	N_ITRS_PER_EPISODE = 30
+	OUT_OF_MAX_BOUND_REWARD = -1.0
+	OUT_OF_MIN_BOUND_REWARD = -1.0
+	IN_BOUNDS_REWARD = 5.0
+	OUTPUT_MSG_FREQUENCY = 100
+	N_ROUNDS_BELOW_MIN_DISTORTION = 10
+
+The dirver code brings all the elements together
+
+.. code-block::
+
+	if __name__ == '__main__':
+
+	    # set the seed for random engine
+	    random.seed(42)
+
+	    # specify the column types. An identifying column
+	    # will me removed from the anonymized data set
+	    # An  INSENSITIVE_ATTRIBUTE remains intact.
+	    # A QUASI_IDENTIFYING_ATTRIBUTE is used in the anonymization
+	    # A SENSITIVE_ATTRIBUTE currently remains intact
+	    column_types = {"NHSno": ColumnType.IDENTIFYING_ATTRIBUTE,
+		            "given_name": ColumnType.IDENTIFYING_ATTRIBUTE,
+		            "surname": ColumnType.IDENTIFYING_ATTRIBUTE,
+		            "gender": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
+		            "dob": ColumnType.SENSITIVE_ATTRIBUTE,
+		            "ethnicity": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
+		            "education": ColumnType.SENSITIVE_ATTRIBUTE,
+		            "salary": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
+		            "mutation_status": ColumnType.SENSITIVE_ATTRIBUTE,
+		            "preventative_treatment": ColumnType.SENSITIVE_ATTRIBUTE,
+		            "diagnosis": ColumnType.INSENSITIVE_ATTRIBUTE}
+
+	    # define the action space
+	    action_space = ActionSpace(n=10)
+
+	    # all the columns that are SENSITIVE_ATTRIBUTE will be kept as they are
+	    # because currently we have no model
+	    # also INSENSITIVE_ATTRIBUTE will be kept as is
+	    # in order to declare this we use an ActionIdentity
+	    action_space.add_many(ActionIdentity(column_name="dob"),
+		                  ActionIdentity(column_name="education"),
+		                  ActionIdentity(column_name="salary"),
+		                  ActionIdentity(column_name="diagnosis"),
+		                  ActionIdentity(column_name="mutation_status"),
+		                  ActionIdentity(column_name="preventative_treatment"),
+		                  ActionIdentity(column_name="ethnicity"),
+		                  ActionStringGeneralize(column_name="ethnicity",
+		                                         generalization_table=get_ethinicity_hierarchy()),
+		                  ActionStringGeneralize(column_name="gender",
+		                                         generalization_table=get_gender_hierarchy()),
+		                  ActionNumericBinGeneralize(column_name="salary",
+		                                             generalization_table=get_salary_bins(ds=load_mock_subjects(),
+		                                                                                  n_states=N_STATES))
+		                  )
+	    action_space.shuffle()
+
+	    env = load_discrete_env(env_type=DiscreteEnvType.TOTAL_DISTORTION_STATE,
+		                    n_states=N_STATES,
+		                    min_distortion=MIN_DISTORTION, max_distortion=MAX_DISTORTION,
+		                    total_min_distortion=MIN_DISTORTION, total_max_distortion=MAX_DISTORTION,
+		                    out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD,
+		                    out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD,
+		                    in_bounds_reward=IN_BOUNDS_REWARD,
+		                    punish_factor=PUNISH_FACTOR,
+		                    column_types=column_types,
+		                    action_space=action_space,
+		                    save_distoreted_sets_dir=SAVE_DISTORTED_SETS_DIR,
+		                    use_identifying_column_dist_in_total_dist=USE_IDENTIFYING_COLUMNS_DIST,
+		                    use_identifying_column_dist_factor=IDENTIFY_COLUMN_DIST_FACTOR,
+		                    gamma=GAMMA,
+		                    n_rounds_below_min_distortion=N_ROUNDS_BELOW_MIN_DISTORTION)
+
+	    agent_config = QLearnConfig(n_itrs_per_episode=N_ITRS_PER_EPISODE, gamma=GAMMA,
+		                        alpha=ALPHA,
+		                        policy=EpsilonGreedyPolicy(eps=EPS, n_actions=env.n_actions,
+		                                                   decay_op=EPSILON_DECAY_OPTION,
+		                                                   epsilon_decay_factor=EPSILON_DECAY_FACTOR))
+
+	    agent = QLearning(algo_config=agent_config)
+
+	    trainer_config = TrainerConfig(n_episodes=N_EPISODES, output_msg_frequency=OUTPUT_MSG_FREQUENCY)
+	    trainer = Trainer(env=env, agent=agent, configuration=trainer_config)
+	    trainer.train()
+
+	    avg_rewards = trainer.total_rewards
+	    plot_running_avg(avg_rewards, steps=100,
+		             xlabel="Episodes", ylabel="Reward",
+		             title="Running reward average over 100 episodes")
+
+	    avg_episode_dist = np.array(trainer.total_distortions)
+	    print("{0} Max/Min distortion {1}/{2}".format(INFO, np.max(avg_episode_dist), np.min(avg_episode_dist)))
+
+	    plot_running_avg(avg_episode_dist, steps=100,
+		             xlabel="Episodes", ylabel="Distortion",
+		             title="Running distortion average over 100 episodes")
+
+
+Results
+-------
+
+The following images show the performance of the learning process
+
+.. figure:: images/qlearn_rewards_all_cols.png
+
+   Running average reward.
+
+
+.. figure:: images/qlearn_distortion_multi_cols.png
+
+   Running average total distortion.
diff --git a/docs/source/Examples/qlearning_three_columns.rst b/docs/source/Examples/qlearning_three_columns.rst
@@ -48,6 +48,17 @@ Given that the total dataset distortion is assumed to be in the range :math:`[0,
 discretize this range into bins and for each entailed value of the distortion we use the corresponding bin as a state index. 
 Alternatively, we could discretize the distortion of each column into bins and create tuples of indeces representing a state.
 
+We preprocess the data set by normalizing the numeric columns. 
+We will use the cosine normalized distance to measure the distortion of columns with string data.
+Similarly, we use the following :math:`L_2`-based norm for calculating the distortion of
+numeric columns
+
+.. math::
+
+	dist(\mathbf{v}_1, \mathbf{v}_2) = \sqrt{\frac{||\mathbf{v}_1 - \mathbf{v}_2||_{L_2}}{N}}
+
+where $N$ is the size of the vector.	 This way the resulting distance, due to the normalization of numeric columns, will be in the range :math:`[0,1]`.
+
 
 Code
 ----
@@ -98,7 +109,7 @@ Next establish a set of configuration parameters
 	SAVE_DISTORTED_SETS_DIR = "q_learning_three_columns_results/distorted_set"
 	PUNISH_FACTOR = 2.0
 
-The dirver code creates brings all the elements together
+The dirver code  brings all the elements together
 
 .. code-block::
 

diff --git a/docs/source/examples.rst b/docs/source/examples.rst
@@ -7,5 +7,6 @@ Some examples can be found below
    :maxdepth: 4
 
    Examples/qlearning_three_columns
+   Examples/qlearning_all_columns
    Examples/semi_gradient_sarsa_three_columns
    Examples/a2c_three_columns
diff --git a/src/datasets/dataset_wrapper.py b/src/datasets/dataset_wrapper.py
@@ -214,6 +214,7 @@ def apply_column_transform(self, column_name: str, transform: Transform) -> None
         """
 
         # get the column
+
         column = self.get_column(col_name=column_name)
         column = transform.act(**{"data": column.values})
         self.ds[transform.column_name] = column
diff --git a/src/examples/helpers/load_full_mock_dataset.py b/src/examples/helpers/load_full_mock_dataset.py
@@ -9,32 +9,13 @@
 from src.datasets.datasets_loaders import MockSubjectsLoader, MockSubjectsData
 from src.spaces.discrete_state_environment import DiscreteStateEnvironment
 from src.spaces.action_space import ActionSpace
-from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize
 from src.maths.distortion_calculator import DistortionCalculationType, DistortionCalculator
 from src.maths.numeric_distance_type import NumericDistanceType
 from src.maths.string_distance_calculator import StringDistanceType
 from src.utils.reward_manager import RewardManager
 from src.spaces.env_type import DiscreteEnvType
 
 
-N_LAYERS = 5
-N_BINS = 10
-N_EPISODES = 1000
-OUTPUT_MSG_FREQUENCY = 100
-GAMMA = 0.99
-ALPHA = 0.1
-
-MAX_DISTORTION = 0.7
-MIN_DISTORTION = 0.3
-OUT_OF_MAX_BOUND_REWARD = -1.0
-OUT_OF_MIN_BOUND_REWARD = -1.0
-IN_BOUNDS_REWARD = 5.0
-N_ROUNDS_BELOW_MIN_DISTORTION = 10
-#SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/semi_grad_sarsa/distorted_set"
-REWARD_FACTOR = 0.95
-PUNISH_FACTOR = 2.0
-
-
 def get_gender_hierarchy():
     hierarchy = SerialHierarchy(values={"F": "*", "M": "*", "*": "*"})
     return hierarchy
@@ -94,14 +75,32 @@ def load_mock_subjects() -> MockSubjectsLoader:
     return ds
 
 
+def get_salary_bins(ds: MockSubjectsLoader, n_states: int):
+    # create bins for the salary generalization
+    unique_salary = ds.get_column_unique_values(col_name="salary")
+    unique_salary.sort()
+
+    # modify slightly the max value because
+    # we get out of bounds for the maximum salary
+    bins = np.linspace(unique_salary[0], unique_salary[-1] + 1, n_states)
+    return bins
+
+
 def load_discrete_env(env_type: DiscreteEnvType, n_states: int,
                       min_distortion: Any, max_distortion: Any,
                       total_min_distortion: float, total_max_distortion: float,
-                      punish_factor: float, column_types: dict,
+                      out_of_max_bound_reward: float,
+                      out_of_min_bound_reward: float,
+                      in_bounds_reward: float,
+                      punish_factor: float,
+                      column_types: dict,
                       action_space: ActionSpace,
                       save_distoreted_sets_dir: str,
                       use_identifying_column_dist_in_total_dist: bool,
-                      use_identifying_column_dist_factor: float) -> DiscreteStateEnvironment:
+                      use_identifying_column_dist_factor: float,
+                      gamma: float,
+                      n_rounds_below_min_distortion: int) -> DiscreteStateEnvironment:
+
         mock_ds = load_mock_subjects()
 
         action_space.shuffle()
@@ -114,18 +113,17 @@ def load_discrete_env(env_type: DiscreteEnvType, n_states: int,
                                                         dataset_distortion_type=DistortionCalculationType.SUM),
                                                     reward_manager=RewardManager(
                                                         bounds=(total_min_distortion, total_max_distortion),
-                                                        out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD,
-                                                        out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD,
-                                                        in_bounds_reward=IN_BOUNDS_REWARD,
+                                                        out_of_max_bound_reward=out_of_max_bound_reward,
+                                                        out_of_min_bound_reward=out_of_min_bound_reward,
+                                                        in_bounds_reward=in_bounds_reward,
                                                         min_distortions=min_distortion, max_distortions=max_distortion,
                                                         punish_factor=punish_factor),
-                                                    gamma=GAMMA,
-                                                    reward_factor=REWARD_FACTOR,
+                                                    gamma=gamma,
                                                     min_distortion=min_distortion,
                                                     min_total_distortion=total_min_distortion,
                                                     max_distortion=max_distortion,
                                                     max_total_distortion=total_max_distortion,
-                                                    n_rounds_below_min_distortion=N_ROUNDS_BELOW_MIN_DISTORTION,
+                                                    n_rounds_below_min_distortion=n_rounds_below_min_distortion,
                                                     distorted_set_path=Path(save_distoreted_sets_dir),
                                                     n_states=n_states, env_type=env_type, column_types=column_types,
                                                     use_identifying_column_dist_in_total_dist=use_identifying_column_dist_in_total_dist,

diff --git a/src/examples/helpers/load_three_columns_mock_dataset.py b/src/examples/helpers/load_three_columns_mock_dataset.py
@@ -9,7 +9,6 @@
 from src.datasets.datasets_loaders import MockSubjectsLoader, MockSubjectsData
 from src.spaces.discrete_state_environment import DiscreteStateEnvironment
 from src.spaces.action_space import ActionSpace
-from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize
 from src.maths.distortion_calculator import DistortionCalculationType, DistortionCalculator
 from src.maths.numeric_distance_type import NumericDistanceType
 from src.maths.string_distance_calculator import StringDistanceType