diff --git a/docs/source/Examples/images/qlearn_distortion_multi_cols.png b/docs/source/Examples/images/qlearn_distortion_multi_cols.png
new file mode 100644
index 0000000..a52da78
Binary files /dev/null and b/docs/source/Examples/images/qlearn_distortion_multi_cols.png differ
diff --git a/docs/source/Examples/images/qlearn_distortion_multi_cols_multi_state.png b/docs/source/Examples/images/qlearn_distortion_multi_cols_multi_state.png
new file mode 100644
index 0000000..a52da78
Binary files /dev/null and b/docs/source/Examples/images/qlearn_distortion_multi_cols_multi_state.png differ
diff --git a/docs/source/Examples/images/qlearn_rewards_all_cols.png b/docs/source/Examples/images/qlearn_rewards_all_cols.png
new file mode 100644
index 0000000..d32f012
Binary files /dev/null and b/docs/source/Examples/images/qlearn_rewards_all_cols.png differ
diff --git a/docs/source/Examples/images/qlearn_rewards_all_cols_multi_state.png b/docs/source/Examples/images/qlearn_rewards_all_cols_multi_state.png
new file mode 100644
index 0000000..d32f012
Binary files /dev/null and b/docs/source/Examples/images/qlearn_rewards_all_cols_multi_state.png differ
diff --git a/docs/source/Examples/qlearning_all_columns.rst b/docs/source/Examples/qlearning_all_columns.rst
new file mode 100644
index 0000000..fa2b2c8
--- /dev/null
+++ b/docs/source/Examples/qlearning_all_columns.rst
@@ -0,0 +1,162 @@
+Q-learning with many columns 
+=============================
+
+Overview
+--------
+
+In the `previous <qlearning_three_columns.html>`_ example, we applied Q-learning on a dataset consisting
+of three columns. Moreover, we used a one dimensional state space; we discretized the range :math:`[0,1]` into bins and used the
+resulting bin index as the state index. In this example, we will simply allow for more columns in the data set. 
+Other than that, this example is the same as the previous one. 
+
+Code
+----
+
+The necessary imports
+
+.. code-block::
+
+	import random
+	import numpy as np
+
+	from src.examples.helpers.load_full_mock_dataset import load_discrete_env, get_ethinicity_hierarchy, \
+	    get_gender_hierarchy, get_salary_bins, load_mock_subjects
+	from src.datasets import ColumnType
+	from src.spaces.env_type import DiscreteEnvType
+	from src.spaces.action_space import ActionSpace
+	from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize
+	from src.algorithms.q_learning import QLearnConfig, QLearning
+	from src.policies.epsilon_greedy_policy import EpsilonGreedyPolicy, EpsilonDecayOption
+	from src.trainers.trainer import Trainer, TrainerConfig
+	from src.examples.helpers.plot_utils import plot_running_avg
+	from src.utils import INFO
+
+Next establish a set of configuration parameters
+
+.. code-block::
+
+	# configuration params
+	N_STATES = 10
+	GAMMA = 0.99
+	ALPHA = 0.1
+	PUNISH_FACTOR = 2.0
+	MAX_DISTORTION = 0.7
+	MIN_DISTORTION = 0.4
+	SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/q_learning_all_cols_results/distorted_set"
+	EPS = 1.0
+	EPSILON_DECAY_OPTION = EpsilonDecayOption.CONSTANT_RATE  # .INVERSE_STEP
+	EPSILON_DECAY_FACTOR = 0.01
+	USE_IDENTIFYING_COLUMNS_DIST = True
+	IDENTIFY_COLUMN_DIST_FACTOR = 0.1
+	N_EPISODES = 1001
+	N_ITRS_PER_EPISODE = 30
+	OUT_OF_MAX_BOUND_REWARD = -1.0
+	OUT_OF_MIN_BOUND_REWARD = -1.0
+	IN_BOUNDS_REWARD = 5.0
+	OUTPUT_MSG_FREQUENCY = 100
+	N_ROUNDS_BELOW_MIN_DISTORTION = 10
+	
+The dirver code brings all the elements together
+
+.. code-block::
+
+	if __name__ == '__main__':
+
+	    # set the seed for random engine
+	    random.seed(42)
+
+	    # specify the column types. An identifying column
+	    # will me removed from the anonymized data set
+	    # An  INSENSITIVE_ATTRIBUTE remains intact.
+	    # A QUASI_IDENTIFYING_ATTRIBUTE is used in the anonymization
+	    # A SENSITIVE_ATTRIBUTE currently remains intact
+	    column_types = {"NHSno": ColumnType.IDENTIFYING_ATTRIBUTE,
+		            "given_name": ColumnType.IDENTIFYING_ATTRIBUTE,
+		            "surname": ColumnType.IDENTIFYING_ATTRIBUTE,
+		            "gender": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
+		            "dob": ColumnType.SENSITIVE_ATTRIBUTE,
+		            "ethnicity": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
+		            "education": ColumnType.SENSITIVE_ATTRIBUTE,
+		            "salary": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
+		            "mutation_status": ColumnType.SENSITIVE_ATTRIBUTE,
+		            "preventative_treatment": ColumnType.SENSITIVE_ATTRIBUTE,
+		            "diagnosis": ColumnType.INSENSITIVE_ATTRIBUTE}
+
+	    # define the action space
+	    action_space = ActionSpace(n=10)
+
+	    # all the columns that are SENSITIVE_ATTRIBUTE will be kept as they are
+	    # because currently we have no model
+	    # also INSENSITIVE_ATTRIBUTE will be kept as is
+	    # in order to declare this we use an ActionIdentity
+	    action_space.add_many(ActionIdentity(column_name="dob"),
+		                  ActionIdentity(column_name="education"),
+		                  ActionIdentity(column_name="salary"),
+		                  ActionIdentity(column_name="diagnosis"),
+		                  ActionIdentity(column_name="mutation_status"),
+		                  ActionIdentity(column_name="preventative_treatment"),
+		                  ActionIdentity(column_name="ethnicity"),
+		                  ActionStringGeneralize(column_name="ethnicity",
+		                                         generalization_table=get_ethinicity_hierarchy()),
+		                  ActionStringGeneralize(column_name="gender",
+		                                         generalization_table=get_gender_hierarchy()),
+		                  ActionNumericBinGeneralize(column_name="salary",
+		                                             generalization_table=get_salary_bins(ds=load_mock_subjects(),
+		                                                                                  n_states=N_STATES))
+		                  )
+	    action_space.shuffle()
+
+	    env = load_discrete_env(env_type=DiscreteEnvType.TOTAL_DISTORTION_STATE,
+		                    n_states=N_STATES,
+		                    min_distortion=MIN_DISTORTION, max_distortion=MAX_DISTORTION,
+		                    total_min_distortion=MIN_DISTORTION, total_max_distortion=MAX_DISTORTION,
+		                    out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD,
+		                    out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD,
+		                    in_bounds_reward=IN_BOUNDS_REWARD,
+		                    punish_factor=PUNISH_FACTOR,
+		                    column_types=column_types,
+		                    action_space=action_space,
+		                    save_distoreted_sets_dir=SAVE_DISTORTED_SETS_DIR,
+		                    use_identifying_column_dist_in_total_dist=USE_IDENTIFYING_COLUMNS_DIST,
+		                    use_identifying_column_dist_factor=IDENTIFY_COLUMN_DIST_FACTOR,
+		                    gamma=GAMMA,
+		                    n_rounds_below_min_distortion=N_ROUNDS_BELOW_MIN_DISTORTION)
+
+	    agent_config = QLearnConfig(n_itrs_per_episode=N_ITRS_PER_EPISODE, gamma=GAMMA,
+		                        alpha=ALPHA,
+		                        policy=EpsilonGreedyPolicy(eps=EPS, n_actions=env.n_actions,
+		                                                   decay_op=EPSILON_DECAY_OPTION,
+		                                                   epsilon_decay_factor=EPSILON_DECAY_FACTOR))
+
+	    agent = QLearning(algo_config=agent_config)
+
+	    trainer_config = TrainerConfig(n_episodes=N_EPISODES, output_msg_frequency=OUTPUT_MSG_FREQUENCY)
+	    trainer = Trainer(env=env, agent=agent, configuration=trainer_config)
+	    trainer.train()
+
+	    avg_rewards = trainer.total_rewards
+	    plot_running_avg(avg_rewards, steps=100,
+		             xlabel="Episodes", ylabel="Reward",
+		             title="Running reward average over 100 episodes")
+
+	    avg_episode_dist = np.array(trainer.total_distortions)
+	    print("{0} Max/Min distortion {1}/{2}".format(INFO, np.max(avg_episode_dist), np.min(avg_episode_dist)))
+
+	    plot_running_avg(avg_episode_dist, steps=100,
+		             xlabel="Episodes", ylabel="Distortion",
+		             title="Running distortion average over 100 episodes")
+
+
+Results
+-------
+
+The following images show the performance of the learning process
+
+.. figure:: images/qlearn_rewards_all_cols.png
+   
+   Running average reward.
+   
+   
+.. figure:: images/qlearn_distortion_multi_cols.png
+   
+   Running average total distortion.
diff --git a/docs/source/Examples/qlearning_three_columns.rst b/docs/source/Examples/qlearning_three_columns.rst
index d7cadc1..c44dd39 100644
--- a/docs/source/Examples/qlearning_three_columns.rst
+++ b/docs/source/Examples/qlearning_three_columns.rst
@@ -48,6 +48,17 @@ Given that the total dataset distortion is assumed to be in the range :math:`[0,
 discretize this range into bins and for each entailed value of the distortion we use the corresponding bin as a state index. 
 Alternatively, we could discretize the distortion of each column into bins and create tuples of indeces representing a state.
 
+We preprocess the data set by normalizing the numeric columns. 
+We will use the cosine normalized distance to measure the distortion of columns with string data.
+Similarly, we use the following :math:`L_2`-based norm for calculating the distortion of
+numeric columns
+
+.. math::
+
+	dist(\mathbf{v}_1, \mathbf{v}_2) = \sqrt{\frac{||\mathbf{v}_1 - \mathbf{v}_2||_{L_2}}{N}}
+
+where $N$ is the size of the vector.	 This way the resulting distance, due to the normalization of numeric columns, will be in the range :math:`[0,1]`.
+
 
 Code
 ----
@@ -98,7 +109,7 @@ Next establish a set of configuration parameters
 	SAVE_DISTORTED_SETS_DIR = "q_learning_three_columns_results/distorted_set"
 	PUNISH_FACTOR = 2.0
 
-The dirver code creates brings all the elements together
+The dirver code  brings all the elements together
 
 .. code-block::
 
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
index 2eff455..73ca019 100644
--- a/docs/source/examples.rst
+++ b/docs/source/examples.rst
@@ -7,5 +7,6 @@ Some examples can be found below
    :maxdepth: 4
    
    Examples/qlearning_three_columns
+   Examples/qlearning_all_columns
    Examples/semi_gradient_sarsa_three_columns
    Examples/a2c_three_columns
diff --git a/src/datasets/dataset_wrapper.py b/src/datasets/dataset_wrapper.py
index 26b95ad..10fe20e 100644
--- a/src/datasets/dataset_wrapper.py
+++ b/src/datasets/dataset_wrapper.py
@@ -214,6 +214,7 @@ def apply_column_transform(self, column_name: str, transform: Transform) -> None
         """
 
         # get the column
+
         column = self.get_column(col_name=column_name)
         column = transform.act(**{"data": column.values})
         self.ds[transform.column_name] = column
diff --git a/src/examples/helpers/load_full_mock_dataset.py b/src/examples/helpers/load_full_mock_dataset.py
index d14d14b..18ec319 100644
--- a/src/examples/helpers/load_full_mock_dataset.py
+++ b/src/examples/helpers/load_full_mock_dataset.py
@@ -9,7 +9,6 @@
 from src.datasets.datasets_loaders import MockSubjectsLoader, MockSubjectsData
 from src.spaces.discrete_state_environment import DiscreteStateEnvironment
 from src.spaces.action_space import ActionSpace
-from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize
 from src.maths.distortion_calculator import DistortionCalculationType, DistortionCalculator
 from src.maths.numeric_distance_type import NumericDistanceType
 from src.maths.string_distance_calculator import StringDistanceType
@@ -17,24 +16,6 @@
 from src.spaces.env_type import DiscreteEnvType
 
 
-N_LAYERS = 5
-N_BINS = 10
-N_EPISODES = 1000
-OUTPUT_MSG_FREQUENCY = 100
-GAMMA = 0.99
-ALPHA = 0.1
-
-MAX_DISTORTION = 0.7
-MIN_DISTORTION = 0.3
-OUT_OF_MAX_BOUND_REWARD = -1.0
-OUT_OF_MIN_BOUND_REWARD = -1.0
-IN_BOUNDS_REWARD = 5.0
-N_ROUNDS_BELOW_MIN_DISTORTION = 10
-#SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/semi_grad_sarsa/distorted_set"
-REWARD_FACTOR = 0.95
-PUNISH_FACTOR = 2.0
-
-
 def get_gender_hierarchy():
     hierarchy = SerialHierarchy(values={"F": "*", "M": "*", "*": "*"})
     return hierarchy
@@ -94,14 +75,32 @@ def load_mock_subjects() -> MockSubjectsLoader:
     return ds
 
 
+def get_salary_bins(ds: MockSubjectsLoader, n_states: int):
+    # create bins for the salary generalization
+    unique_salary = ds.get_column_unique_values(col_name="salary")
+    unique_salary.sort()
+
+    # modify slightly the max value because
+    # we get out of bounds for the maximum salary
+    bins = np.linspace(unique_salary[0], unique_salary[-1] + 1, n_states)
+    return bins
+
+
 def load_discrete_env(env_type: DiscreteEnvType, n_states: int,
                       min_distortion: Any, max_distortion: Any,
                       total_min_distortion: float, total_max_distortion: float,
-                      punish_factor: float, column_types: dict,
+                      out_of_max_bound_reward: float,
+                      out_of_min_bound_reward: float,
+                      in_bounds_reward: float,
+                      punish_factor: float,
+                      column_types: dict,
                       action_space: ActionSpace,
                       save_distoreted_sets_dir: str,
                       use_identifying_column_dist_in_total_dist: bool,
-                      use_identifying_column_dist_factor: float) -> DiscreteStateEnvironment:
+                      use_identifying_column_dist_factor: float,
+                      gamma: float,
+                      n_rounds_below_min_distortion: int) -> DiscreteStateEnvironment:
+
         mock_ds = load_mock_subjects()
 
         action_space.shuffle()
@@ -114,18 +113,17 @@ def load_discrete_env(env_type: DiscreteEnvType, n_states: int,
                                                         dataset_distortion_type=DistortionCalculationType.SUM),
                                                     reward_manager=RewardManager(
                                                         bounds=(total_min_distortion, total_max_distortion),
-                                                        out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD,
-                                                        out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD,
-                                                        in_bounds_reward=IN_BOUNDS_REWARD,
+                                                        out_of_max_bound_reward=out_of_max_bound_reward,
+                                                        out_of_min_bound_reward=out_of_min_bound_reward,
+                                                        in_bounds_reward=in_bounds_reward,
                                                         min_distortions=min_distortion, max_distortions=max_distortion,
                                                         punish_factor=punish_factor),
-                                                    gamma=GAMMA,
-                                                    reward_factor=REWARD_FACTOR,
+                                                    gamma=gamma,
                                                     min_distortion=min_distortion,
                                                     min_total_distortion=total_min_distortion,
                                                     max_distortion=max_distortion,
                                                     max_total_distortion=total_max_distortion,
-                                                    n_rounds_below_min_distortion=N_ROUNDS_BELOW_MIN_DISTORTION,
+                                                    n_rounds_below_min_distortion=n_rounds_below_min_distortion,
                                                     distorted_set_path=Path(save_distoreted_sets_dir),
                                                     n_states=n_states, env_type=env_type, column_types=column_types,
                                                     use_identifying_column_dist_in_total_dist=use_identifying_column_dist_in_total_dist,
diff --git a/src/examples/helpers/load_three_columns_mock_dataset.py b/src/examples/helpers/load_three_columns_mock_dataset.py
index 62d7457..31fbaec 100644
--- a/src/examples/helpers/load_three_columns_mock_dataset.py
+++ b/src/examples/helpers/load_three_columns_mock_dataset.py
@@ -9,7 +9,6 @@
 from src.datasets.datasets_loaders import MockSubjectsLoader, MockSubjectsData
 from src.spaces.discrete_state_environment import DiscreteStateEnvironment
 from src.spaces.action_space import ActionSpace
-from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize
 from src.maths.distortion_calculator import DistortionCalculationType, DistortionCalculator
 from src.maths.numeric_distance_type import NumericDistanceType
 from src.maths.string_distance_calculator import StringDistanceType
diff --git a/src/examples/q_learning_all_columns.py b/src/examples/q_learning_all_columns.py
index 0940873..0beba2a 100644
--- a/src/examples/q_learning_all_columns.py
+++ b/src/examples/q_learning_all_columns.py
@@ -1,6 +1,8 @@
 import random
+import numpy as np
 
-from src.examples.helpers.load_full_mock_dataset import load_discrete_env, get_ethinicity_hierarchy, get_gender_hierarchy
+from src.examples.helpers.load_full_mock_dataset import load_discrete_env, get_ethinicity_hierarchy, \
+    get_gender_hierarchy, get_salary_bins, load_mock_subjects
 from src.datasets import ColumnType
 from src.spaces.env_type import DiscreteEnvType
 from src.spaces.action_space import ActionSpace
@@ -8,10 +10,13 @@
 from src.algorithms.q_learning import QLearnConfig, QLearning
 from src.policies.epsilon_greedy_policy import EpsilonGreedyPolicy, EpsilonDecayOption
 from src.trainers.trainer import Trainer, TrainerConfig
+from src.examples.helpers.plot_utils import plot_running_avg
+from src.utils import INFO
 
-N_BINS = 20
+# configuration params
+N_STATES = 10
 GAMMA = 0.99
-ALPHA = 0.01
+ALPHA = 0.1
 PUNISH_FACTOR = 2.0
 MAX_DISTORTION = 0.7
 MIN_DISTORTION = 0.4
@@ -21,12 +26,25 @@
 EPSILON_DECAY_FACTOR = 0.01
 USE_IDENTIFYING_COLUMNS_DIST = True
 IDENTIFY_COLUMN_DIST_FACTOR = 0.1
+N_EPISODES = 1001
+N_ITRS_PER_EPISODE = 30
+OUT_OF_MAX_BOUND_REWARD = -1.0
+OUT_OF_MIN_BOUND_REWARD = -1.0
+IN_BOUNDS_REWARD = 5.0
+OUTPUT_MSG_FREQUENCY = 100
+N_ROUNDS_BELOW_MIN_DISTORTION = 10
+
 
 if __name__ == '__main__':
 
     # set the seed for random engine
     random.seed(42)
 
+    # specify the column types. An identifying column
+    # will me removed from the anonymized data set
+    # An  INSENSITIVE_ATTRIBUTE remains intact.
+    # A QUASI_IDENTIFYING_ATTRIBUTE is used in the anonymization
+    # A SENSITIVE_ATTRIBUTE currently remains intact
     column_types = {"NHSno": ColumnType.IDENTIFYING_ATTRIBUTE,
                     "given_name": ColumnType.IDENTIFYING_ATTRIBUTE,
                     "surname": ColumnType.IDENTIFYING_ATTRIBUTE,
@@ -34,38 +52,52 @@
                     "dob": ColumnType.SENSITIVE_ATTRIBUTE,
                     "ethnicity": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
                     "education": ColumnType.SENSITIVE_ATTRIBUTE,
-                    "salary": ColumnType.SENSITIVE_ATTRIBUTE,
+                    "salary": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
                     "mutation_status": ColumnType.SENSITIVE_ATTRIBUTE,
                     "preventative_treatment": ColumnType.SENSITIVE_ATTRIBUTE,
                     "diagnosis": ColumnType.INSENSITIVE_ATTRIBUTE}
 
-    action_space = ActionSpace(n=8)
+    # define the action space
+    action_space = ActionSpace(n=10)
+
     # all the columns that are SENSITIVE_ATTRIBUTE will be kept as they are
     # because currently we have no model
     # also INSENSITIVE_ATTRIBUTE will be kept as is
+    # in order to declare this we use an ActionIdentity
     action_space.add_many(ActionIdentity(column_name="dob"),
                           ActionIdentity(column_name="education"),
                           ActionIdentity(column_name="salary"),
                           ActionIdentity(column_name="diagnosis"),
                           ActionIdentity(column_name="mutation_status"),
                           ActionIdentity(column_name="preventative_treatment"),
+                          ActionIdentity(column_name="ethnicity"),
                           ActionStringGeneralize(column_name="ethnicity",
                                                  generalization_table=get_ethinicity_hierarchy()),
                           ActionStringGeneralize(column_name="gender",
-                                                 generalization_table=get_gender_hierarchy()))
+                                                 generalization_table=get_gender_hierarchy()),
+                          ActionNumericBinGeneralize(column_name="salary",
+                                                     generalization_table=get_salary_bins(ds=load_mock_subjects(),
+                                                                                          n_states=N_STATES))
+                          )
+    action_space.shuffle()
 
     env = load_discrete_env(env_type=DiscreteEnvType.TOTAL_DISTORTION_STATE,
-                            n_states=N_BINS,
+                            n_states=N_STATES,
                             min_distortion=MIN_DISTORTION, max_distortion=MAX_DISTORTION,
                             total_min_distortion=MIN_DISTORTION, total_max_distortion=MAX_DISTORTION,
+                            out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD,
+                            out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD,
+                            in_bounds_reward=IN_BOUNDS_REWARD,
                             punish_factor=PUNISH_FACTOR,
                             column_types=column_types,
                             action_space=action_space,
                             save_distoreted_sets_dir=SAVE_DISTORTED_SETS_DIR,
                             use_identifying_column_dist_in_total_dist=USE_IDENTIFYING_COLUMNS_DIST,
-                            use_identifying_column_dist_factor=IDENTIFY_COLUMN_DIST_FACTOR)
+                            use_identifying_column_dist_factor=IDENTIFY_COLUMN_DIST_FACTOR,
+                            gamma=GAMMA,
+                            n_rounds_below_min_distortion=N_ROUNDS_BELOW_MIN_DISTORTION)
 
-    agent_config = QLearnConfig(n_itrs_per_episode=100, gamma=GAMMA,
+    agent_config = QLearnConfig(n_itrs_per_episode=N_ITRS_PER_EPISODE, gamma=GAMMA,
                                 alpha=ALPHA,
                                 policy=EpsilonGreedyPolicy(eps=EPS, n_actions=env.n_actions,
                                                            decay_op=EPSILON_DECAY_OPTION,
@@ -73,7 +105,19 @@
 
     agent = QLearning(algo_config=agent_config)
 
-    trainer_config = TrainerConfig(n_episodes=100, output_msg_frequency=10)
+    trainer_config = TrainerConfig(n_episodes=N_EPISODES, output_msg_frequency=OUTPUT_MSG_FREQUENCY)
     trainer = Trainer(env=env, agent=agent, configuration=trainer_config)
     trainer.train()
 
+    avg_rewards = trainer.total_rewards
+    plot_running_avg(avg_rewards, steps=100,
+                     xlabel="Episodes", ylabel="Reward",
+                     title="Running reward average over 100 episodes")
+
+    avg_episode_dist = np.array(trainer.total_distortions)
+    print("{0} Max/Min distortion {1}/{2}".format(INFO, np.max(avg_episode_dist), np.min(avg_episode_dist)))
+
+    plot_running_avg(avg_episode_dist, steps=100,
+                     xlabel="Episodes", ylabel="Distortion",
+                     title="Running distortion average over 100 episodes")
+
diff --git a/src/examples/q_learning_multistate.py b/src/examples/q_learning_multistate.py
index fa03adf..094925f 100644
--- a/src/examples/q_learning_multistate.py
+++ b/src/examples/q_learning_multistate.py
@@ -1,34 +1,38 @@
 import random
-from pathlib import Path
 import numpy as np
 
-from src.algorithms.q_learning import QLearning, QLearnConfig
+from src.examples.helpers.load_full_mock_dataset import load_discrete_env, get_ethinicity_hierarchy, \
+    get_gender_hierarchy, get_salary_bins, load_mock_subjects
+from src.datasets import ColumnType
 from src.spaces.env_type import DiscreteEnvType
-
-from src.trainers.trainer import Trainer
+from src.spaces.action_space import ActionSpace
+from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize
+from src.algorithms.q_learning import QLearnConfig, QLearning
 from src.policies.epsilon_greedy_policy import EpsilonGreedyPolicy, EpsilonDecayOption
-from src.utils.plot_utils import plot_running_avg
+from src.trainers.trainer import Trainer, TrainerConfig
+from src.examples.helpers.plot_utils import plot_running_avg
 from src.utils import INFO
-from src.examples.helpers.load_three_columns_mock_dataset import load_discrete_env
 
-N_BINS = 10
-N_EPISODES = 1000
-OUTPUT_MSG_FREQUENCY = 100
+# configuration params
+N_STATES = 10
 GAMMA = 0.99
 ALPHA = 0.1
-N_ITRS_PER_EPISODE = 30
+PUNISH_FACTOR = 2.0
+MAX_DISTORTION = 0.7
+MIN_DISTORTION = 0.4
+SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/q_learning_all_cols_multi_state_results/distorted_set"
 EPS = 1.0
-EPSILON_DECAY_OPTION = EpsilonDecayOption.CONSTANT_RATE #.INVERSE_STEP
+EPSILON_DECAY_OPTION = EpsilonDecayOption.CONSTANT_RATE  # .INVERSE_STEP
 EPSILON_DECAY_FACTOR = 0.01
-MAX_DISTORTION = 0.7
-MIN_DISTORTION = 0.3
-#OUT_OF_MAX_BOUND_REWARD = -1.0
-#OUT_OF_MIN_BOUND_REWARD = -1.0
-#IN_BOUNDS_REWARD = 5.0
+USE_IDENTIFYING_COLUMNS_DIST = True
+IDENTIFY_COLUMN_DIST_FACTOR = 0.1
+N_EPISODES = 1001
+N_ITRS_PER_EPISODE = 30
+OUT_OF_MAX_BOUND_REWARD = -1.0
+OUT_OF_MIN_BOUND_REWARD = -1.0
+IN_BOUNDS_REWARD = 5.0
+OUTPUT_MSG_FREQUENCY = 100
 N_ROUNDS_BELOW_MIN_DISTORTION = 10
-SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/q_learning_multistate_results/distorted_set"
-#REWARD_FACTOR = 0.95
-PUNISH_FACTOR = 2.0
 
 
 if __name__ == '__main__':
@@ -36,36 +40,82 @@
     # set the seed for random engine
     random.seed(42)
 
+    # specify the column types. An identifying column
+    # will me removed from the anonymized data set
+    # An  INSENSITIVE_ATTRIBUTE remains intact.
+    # A QUASI_IDENTIFYING_ATTRIBUTE is used in the anonymization
+    # A SENSITIVE_ATTRIBUTE currently remains intact
+    column_types = {"NHSno": ColumnType.IDENTIFYING_ATTRIBUTE,
+                    "given_name": ColumnType.IDENTIFYING_ATTRIBUTE,
+                    "surname": ColumnType.IDENTIFYING_ATTRIBUTE,
+                    "gender": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
+                    "dob": ColumnType.SENSITIVE_ATTRIBUTE,
+                    "ethnicity": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
+                    "education": ColumnType.SENSITIVE_ATTRIBUTE,
+                    "salary": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
+                    "mutation_status": ColumnType.SENSITIVE_ATTRIBUTE,
+                    "preventative_treatment": ColumnType.SENSITIVE_ATTRIBUTE,
+                    "diagnosis": ColumnType.INSENSITIVE_ATTRIBUTE}
+
+    # define the action space
+    action_space = ActionSpace(n=10)
+
+    # all the columns that are SENSITIVE_ATTRIBUTE will be kept as they are
+    # because currently we have no model
+    # also INSENSITIVE_ATTRIBUTE will be kept as is
+    # in order to declare this we use an ActionIdentity
+    action_space.add_many(ActionIdentity(column_name="dob"),
+                          ActionIdentity(column_name="education"),
+                          ActionIdentity(column_name="salary"),
+                          ActionIdentity(column_name="diagnosis"),
+                          ActionIdentity(column_name="mutation_status"),
+                          ActionIdentity(column_name="preventative_treatment"),
+                          ActionIdentity(column_name="ethnicity"),
+                          ActionStringGeneralize(column_name="ethnicity",
+                                                 generalization_table=get_ethinicity_hierarchy()),
+                          ActionStringGeneralize(column_name="gender",
+                                                 generalization_table=get_gender_hierarchy()),
+                          ActionNumericBinGeneralize(column_name="salary",
+                                                     generalization_table=get_salary_bins(ds=load_mock_subjects(),
+                                                                                          n_states=N_STATES))
+                          )
+    action_space.shuffle()
+
     # load the discrete environment
-    env = load_discrete_env(env_type=DiscreteEnvType.MULTI_COLUMN_STATE, n_states=N_BINS,
-                            min_distortion={"ethnicity": 0.15, "salary": 0.15,
-                                            "diagnosis": 0.0},
-                            max_distortion={"ethnicity": 0.35, "salary": 0.35,
-                                            "diagnosis": 0.0},
+    env = load_discrete_env(env_type=DiscreteEnvType.MULTI_COLUMN_STATE, n_states=N_STATES,
+                            min_distortion={"ethnicity": 0.133, "salary": 0.133, "gender": 0.133,
+                                            "dob": 0.0, "education": 0.0, "diagnosis": 0.0,
+                                            "mutation_status": 0.0, "preventative_treatment": 0.0,
+                                            "NHSno": 0.0, "given_name": 0.0, "surname": 0.0},
+                            max_distortion={"ethnicity": 0.133, "salary": 0.133, "gender": 0.133,
+                                            "dob": 0.0, "education": 0.0, "diagnosis": 0.0,
+                                            "mutation_status": 0.0, "preventative_treatment": 0.0,
+                                            "NHSno": 0.1, "given_name": 0.1, "surname": 0.1},
                             total_min_distortion=MIN_DISTORTION, total_max_distortion=MAX_DISTORTION,
-                            punish_factor=PUNISH_FACTOR)
-    env.config.state_type = DiscreteEnvType.MULTI_COLUMN_STATE
-
-    algo_config = QLearnConfig()
-    algo_config.n_itrs_per_episode = N_ITRS_PER_EPISODE
-    algo_config.gamma = GAMMA
-    algo_config.alpha = ALPHA
-    # algo_config.policy = SoftMaxPolicy(n_actions=len(action_space), tau=1.2)
-    algo_config.policy = EpsilonGreedyPolicy(eps=EPS, n_actions=env.n_actions,
-                                             decay_op=EPSILON_DECAY_OPTION,
-                                             epsilon_decay_factor=EPSILON_DECAY_FACTOR)
-
-    # the learner we want to train
-    agent = QLearning(algo_config=algo_config)
-
-    # create a trainer to train the Qlearning agent
-    configuration = {"n_episodes": N_EPISODES, "output_msg_frequency": OUTPUT_MSG_FREQUENCY}
-    trainer = Trainer(env=env, agent=agent, configuration=configuration)
-
-    # train the agent
+                            out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD,
+                            out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD,
+                            in_bounds_reward=IN_BOUNDS_REWARD,
+                            punish_factor=PUNISH_FACTOR,
+                            column_types=column_types,
+                            action_space=action_space,
+                            save_distoreted_sets_dir=SAVE_DISTORTED_SETS_DIR,
+                            use_identifying_column_dist_in_total_dist=USE_IDENTIFYING_COLUMNS_DIST,
+                            use_identifying_column_dist_factor=IDENTIFY_COLUMN_DIST_FACTOR,
+                            gamma=GAMMA,
+                            n_rounds_below_min_distortion=N_ROUNDS_BELOW_MIN_DISTORTION)
+
+    agent_config = QLearnConfig(n_itrs_per_episode=N_ITRS_PER_EPISODE, gamma=GAMMA,
+                                alpha=ALPHA,
+                                policy=EpsilonGreedyPolicy(eps=EPS, n_actions=env.n_actions,
+                                                           decay_op=EPSILON_DECAY_OPTION,
+                                                           epsilon_decay_factor=EPSILON_DECAY_FACTOR))
+
+    agent = QLearning(algo_config=agent_config)
+
+    trainer_config = TrainerConfig(n_episodes=N_EPISODES, output_msg_frequency=OUTPUT_MSG_FREQUENCY)
+    trainer = Trainer(env=env, agent=agent, configuration=trainer_config)
     trainer.train()
 
-    # avg_rewards = trainer.avg_rewards()
     avg_rewards = trainer.total_rewards
     plot_running_avg(avg_rewards, steps=100,
                      xlabel="Episodes", ylabel="Reward",
@@ -77,17 +127,3 @@
     plot_running_avg(avg_episode_dist, steps=100,
                      xlabel="Episodes", ylabel="Distortion",
                      title="Running distortion average over 100 episodes")
-
-    print("=============================================")
-    print("{0} Generating distorted dataset".format(INFO))
-
-    """
-    # Let's play
-    env.reset()
-
-    stop_criterion = IterationControl(n_itrs=10, min_dist=MIN_DISTORTION, max_dist=MAX_DISTORTION)
-    agent.play(env=env, stop_criterion=stop_criterion)
-    env.save_current_dataset(episode_index=-2, save_index=False)
-    """
-    print("{0} Done....".format(INFO))
-    print("=============================================")
diff --git a/src/examples/qlearning_three_columns.py b/src/examples/qlearning_three_columns.py
index 3a0dc91..68ad9de 100644
--- a/src/examples/qlearning_three_columns.py
+++ b/src/examples/qlearning_three_columns.py
@@ -40,6 +40,7 @@
 SAVE_DISTORTED_SETS_DIR = "q_learning_three_columns_results/distorted_set"
 PUNISH_FACTOR = 2.0
 
+
 if __name__ == '__main__':
 
     # set the seed for random engine
diff --git a/src/spaces/discrete_state_environment.py b/src/spaces/discrete_state_environment.py
index 1a64ea0..3ed6c19 100644
--- a/src/spaces/discrete_state_environment.py
+++ b/src/spaces/discrete_state_environment.py
@@ -249,10 +249,10 @@ def get_aggregated_state(self, state_val: Any, column_name: str = None) -> Any:
                 raise ValueError("Name {0} not in column bins names {1} ".format(column_name, list(self.column_bins.keys())))
 
             if column_name is None:
-                column_dists = [(0.0, name) for name in self.column_bins]
+                column_dists = [(0.0, name) for name in self.column_bins if self.config.column_types[name] == ColumnType.QUASI_IDENTIFYING_ATTRIBUTE]
 
             else:
-                column_dists = [(self.column_distances[name], name) for name in self.column_bins]
+                column_dists = [(self.column_distances[name], name) for name in self.column_bins if self.config.column_types[name] == ColumnType.QUASI_IDENTIFYING_ATTRIBUTE]
 
             state = []
             for distortion, name in column_dists:
@@ -274,8 +274,13 @@ def initialize_column_counts(self) -> None:
         """
 
         col_names = self.config.data_set.get_columns_names()
-        for col in col_names:
-            self.column_visits[col] = 0
+        for name in col_names:
+            self.column_visits[name] = 0
+
+            # if it is an identifying attribute
+            # we have visited it
+            if self.config.column_types[name] == ColumnType.IDENTIFYING_ATTRIBUTE:
+                self.column_visits[name] = 1
 
     def all_columns_visited(self) -> bool:
         """Returns true is all columns have been visited
@@ -524,21 +529,83 @@ def _create_multi_column_state_bins(self) -> None:
 
         # create the column bins
         for name in self.column_names:
-            self.column_bins[name] = np.linspace(0.0, 1.0, self.config.n_states)
+
+            # we create bins only for the QUASI_IDENTIFYING_ATTRIBUTE
+            # attributes
+            if self.config.column_types[name] == ColumnType.QUASI_IDENTIFYING_ATTRIBUTE:
+                self.column_bins[name] = np.linspace(0.0, 1.0, self.config.n_states)
+            #else:
+            #    self.column_bins["all_the_rest"] = np.linspace(0.0, 1.0, self.config.n_states)
 
         if len(self.column_bins) == 3:
             self._build_three_columns()
+        elif len(self.column_bins) == 4:
+            self._build_4_columns()
+        elif len(self.column_bins) == 5:
+            self._build_5_columns()
         else:
             raise ValueError("Invalid number of columns. Cannot build the multi-column state bins")
 
+        # add the remaining columns
+        for name in self.column_names:
+
+            # we create bins only for the QUASI_IDENTIFYING_ATTRIBUTE
+            # attributes
+            if self.config.column_types[name] != ColumnType.QUASI_IDENTIFYING_ATTRIBUTE:
+                self.column_bins[name] = np.linspace(0.0, 1.0, self.config.n_states)
+
     def _build_three_columns(self):
 
-        name = self.column_names[0]
+        name = ""
+        for n in self.config.column_types:
+            if self.config.column_types[n] == ColumnType.QUASI_IDENTIFYING_ATTRIBUTE:
+                name = n
+                break
+
+        if name == "":
+            raise ValueError("No QUASI_IDENTIFYING_ATTRIBUTE has been specified")
+
         for i in range(len(self.column_bins[name])):
             for j in range(len(self.column_bins[name])):
                 for k in range(len(self.column_bins[name])):
                     self.state_space.append((i, j, k))
 
+    def _build_4_columns(self):
+
+        name = ""
+        for n in self.config.column_types:
+            if self.config.column_types[n] == ColumnType.QUASI_IDENTIFYING_ATTRIBUTE:
+                name = n
+                break
+
+        if name == "":
+            raise ValueError("No QUASI_IDENTIFYING_ATTRIBUTE has been specified")
+
+        for i1 in range(len(self.column_bins[name])):
+            for i2 in range(len(self.column_bins[name])):
+                for i3 in range(len(self.column_bins[name])):
+                    for i4 in range(len(self.column_bins[name])):
+                        self.state_space.append((i1, i2, i3, i4))
+
+    def _build_5_columns(self):
+
+        name = ""
+        for n in self.config.column_types:
+            if self.config.column_types[n] == ColumnType.QUASI_IDENTIFYING_ATTRIBUTE:
+                name = n
+                break
+
+        if name == "":
+            raise ValueError("No QUASI_IDENTIFYING_ATTRIBUTE has been specified")
+
+        name = self.column_names[0]
+        for i1 in range(len(self.column_bins[name])):
+            for i2 in range(len(self.column_bins[name])):
+                for i3 in range(len(self.column_bins[name])):
+                    for i4 in range(len(self.column_bins[name])):
+                        for i5 in range(len(self.column_bins[name])):
+                            self.state_space.append((i1, i2, i3, i4, i5))
+
     def _distort_identifying_attributes(self):
 
         for name in self.config.column_types: