Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
162 changes: 162 additions & 0 deletions docs/source/Examples/qlearning_all_columns.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
Q-learning with many columns
=============================

Overview
--------

In the `previous <qlearning_three_columns.html>`_ example, we applied Q-learning on a dataset consisting
of three columns. Moreover, we used a one dimensional state space; we discretized the range :math:`[0,1]` into bins and used the
resulting bin index as the state index. In this example, we will simply allow for more columns in the data set.
Other than that, this example is the same as the previous one.

Code
----

The necessary imports

.. code-block::

import random
import numpy as np

from src.examples.helpers.load_full_mock_dataset import load_discrete_env, get_ethinicity_hierarchy, \
get_gender_hierarchy, get_salary_bins, load_mock_subjects
from src.datasets import ColumnType
from src.spaces.env_type import DiscreteEnvType
from src.spaces.action_space import ActionSpace
from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize
from src.algorithms.q_learning import QLearnConfig, QLearning
from src.policies.epsilon_greedy_policy import EpsilonGreedyPolicy, EpsilonDecayOption
from src.trainers.trainer import Trainer, TrainerConfig
from src.examples.helpers.plot_utils import plot_running_avg
from src.utils import INFO

Next establish a set of configuration parameters

.. code-block::

# configuration params
N_STATES = 10
GAMMA = 0.99
ALPHA = 0.1
PUNISH_FACTOR = 2.0
MAX_DISTORTION = 0.7
MIN_DISTORTION = 0.4
SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/q_learning_all_cols_results/distorted_set"
EPS = 1.0
EPSILON_DECAY_OPTION = EpsilonDecayOption.CONSTANT_RATE # .INVERSE_STEP
EPSILON_DECAY_FACTOR = 0.01
USE_IDENTIFYING_COLUMNS_DIST = True
IDENTIFY_COLUMN_DIST_FACTOR = 0.1
N_EPISODES = 1001
N_ITRS_PER_EPISODE = 30
OUT_OF_MAX_BOUND_REWARD = -1.0
OUT_OF_MIN_BOUND_REWARD = -1.0
IN_BOUNDS_REWARD = 5.0
OUTPUT_MSG_FREQUENCY = 100
N_ROUNDS_BELOW_MIN_DISTORTION = 10

The dirver code brings all the elements together

.. code-block::

if __name__ == '__main__':

# set the seed for random engine
random.seed(42)

# specify the column types. An identifying column
# will me removed from the anonymized data set
# An INSENSITIVE_ATTRIBUTE remains intact.
# A QUASI_IDENTIFYING_ATTRIBUTE is used in the anonymization
# A SENSITIVE_ATTRIBUTE currently remains intact
column_types = {"NHSno": ColumnType.IDENTIFYING_ATTRIBUTE,
"given_name": ColumnType.IDENTIFYING_ATTRIBUTE,
"surname": ColumnType.IDENTIFYING_ATTRIBUTE,
"gender": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
"dob": ColumnType.SENSITIVE_ATTRIBUTE,
"ethnicity": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
"education": ColumnType.SENSITIVE_ATTRIBUTE,
"salary": ColumnType.QUASI_IDENTIFYING_ATTRIBUTE,
"mutation_status": ColumnType.SENSITIVE_ATTRIBUTE,
"preventative_treatment": ColumnType.SENSITIVE_ATTRIBUTE,
"diagnosis": ColumnType.INSENSITIVE_ATTRIBUTE}

# define the action space
action_space = ActionSpace(n=10)

# all the columns that are SENSITIVE_ATTRIBUTE will be kept as they are
# because currently we have no model
# also INSENSITIVE_ATTRIBUTE will be kept as is
# in order to declare this we use an ActionIdentity
action_space.add_many(ActionIdentity(column_name="dob"),
ActionIdentity(column_name="education"),
ActionIdentity(column_name="salary"),
ActionIdentity(column_name="diagnosis"),
ActionIdentity(column_name="mutation_status"),
ActionIdentity(column_name="preventative_treatment"),
ActionIdentity(column_name="ethnicity"),
ActionStringGeneralize(column_name="ethnicity",
generalization_table=get_ethinicity_hierarchy()),
ActionStringGeneralize(column_name="gender",
generalization_table=get_gender_hierarchy()),
ActionNumericBinGeneralize(column_name="salary",
generalization_table=get_salary_bins(ds=load_mock_subjects(),
n_states=N_STATES))
)
action_space.shuffle()

env = load_discrete_env(env_type=DiscreteEnvType.TOTAL_DISTORTION_STATE,
n_states=N_STATES,
min_distortion=MIN_DISTORTION, max_distortion=MAX_DISTORTION,
total_min_distortion=MIN_DISTORTION, total_max_distortion=MAX_DISTORTION,
out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD,
out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD,
in_bounds_reward=IN_BOUNDS_REWARD,
punish_factor=PUNISH_FACTOR,
column_types=column_types,
action_space=action_space,
save_distoreted_sets_dir=SAVE_DISTORTED_SETS_DIR,
use_identifying_column_dist_in_total_dist=USE_IDENTIFYING_COLUMNS_DIST,
use_identifying_column_dist_factor=IDENTIFY_COLUMN_DIST_FACTOR,
gamma=GAMMA,
n_rounds_below_min_distortion=N_ROUNDS_BELOW_MIN_DISTORTION)

agent_config = QLearnConfig(n_itrs_per_episode=N_ITRS_PER_EPISODE, gamma=GAMMA,
alpha=ALPHA,
policy=EpsilonGreedyPolicy(eps=EPS, n_actions=env.n_actions,
decay_op=EPSILON_DECAY_OPTION,
epsilon_decay_factor=EPSILON_DECAY_FACTOR))

agent = QLearning(algo_config=agent_config)

trainer_config = TrainerConfig(n_episodes=N_EPISODES, output_msg_frequency=OUTPUT_MSG_FREQUENCY)
trainer = Trainer(env=env, agent=agent, configuration=trainer_config)
trainer.train()

avg_rewards = trainer.total_rewards
plot_running_avg(avg_rewards, steps=100,
xlabel="Episodes", ylabel="Reward",
title="Running reward average over 100 episodes")

avg_episode_dist = np.array(trainer.total_distortions)
print("{0} Max/Min distortion {1}/{2}".format(INFO, np.max(avg_episode_dist), np.min(avg_episode_dist)))

plot_running_avg(avg_episode_dist, steps=100,
xlabel="Episodes", ylabel="Distortion",
title="Running distortion average over 100 episodes")


Results
-------

The following images show the performance of the learning process

.. figure:: images/qlearn_rewards_all_cols.png

Running average reward.


.. figure:: images/qlearn_distortion_multi_cols.png

Running average total distortion.
13 changes: 12 additions & 1 deletion docs/source/Examples/qlearning_three_columns.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ Given that the total dataset distortion is assumed to be in the range :math:`[0,
discretize this range into bins and for each entailed value of the distortion we use the corresponding bin as a state index.
Alternatively, we could discretize the distortion of each column into bins and create tuples of indeces representing a state.

We preprocess the data set by normalizing the numeric columns.
We will use the cosine normalized distance to measure the distortion of columns with string data.
Similarly, we use the following :math:`L_2`-based norm for calculating the distortion of
numeric columns

.. math::

dist(\mathbf{v}_1, \mathbf{v}_2) = \sqrt{\frac{||\mathbf{v}_1 - \mathbf{v}_2||_{L_2}}{N}}

where $N$ is the size of the vector. This way the resulting distance, due to the normalization of numeric columns, will be in the range :math:`[0,1]`.


Code
----
Expand Down Expand Up @@ -98,7 +109,7 @@ Next establish a set of configuration parameters
SAVE_DISTORTED_SETS_DIR = "q_learning_three_columns_results/distorted_set"
PUNISH_FACTOR = 2.0

The dirver code creates brings all the elements together
The dirver code brings all the elements together

.. code-block::

Expand Down
1 change: 1 addition & 0 deletions docs/source/examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ Some examples can be found below
:maxdepth: 4

Examples/qlearning_three_columns
Examples/qlearning_all_columns
Examples/semi_gradient_sarsa_three_columns
Examples/a2c_three_columns
1 change: 1 addition & 0 deletions src/datasets/dataset_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ def apply_column_transform(self, column_name: str, transform: Transform) -> None
"""

# get the column

column = self.get_column(col_name=column_name)
column = transform.act(**{"data": column.values})
self.ds[transform.column_name] = column
52 changes: 25 additions & 27 deletions src/examples/helpers/load_full_mock_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,13 @@
from src.datasets.datasets_loaders import MockSubjectsLoader, MockSubjectsData
from src.spaces.discrete_state_environment import DiscreteStateEnvironment
from src.spaces.action_space import ActionSpace
from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize
from src.maths.distortion_calculator import DistortionCalculationType, DistortionCalculator
from src.maths.numeric_distance_type import NumericDistanceType
from src.maths.string_distance_calculator import StringDistanceType
from src.utils.reward_manager import RewardManager
from src.spaces.env_type import DiscreteEnvType


N_LAYERS = 5
N_BINS = 10
N_EPISODES = 1000
OUTPUT_MSG_FREQUENCY = 100
GAMMA = 0.99
ALPHA = 0.1

MAX_DISTORTION = 0.7
MIN_DISTORTION = 0.3
OUT_OF_MAX_BOUND_REWARD = -1.0
OUT_OF_MIN_BOUND_REWARD = -1.0
IN_BOUNDS_REWARD = 5.0
N_ROUNDS_BELOW_MIN_DISTORTION = 10
#SAVE_DISTORTED_SETS_DIR = "/home/alex/qi3/drl_anonymity/src/examples/semi_grad_sarsa/distorted_set"
REWARD_FACTOR = 0.95
PUNISH_FACTOR = 2.0


def get_gender_hierarchy():
hierarchy = SerialHierarchy(values={"F": "*", "M": "*", "*": "*"})
return hierarchy
Expand Down Expand Up @@ -94,14 +75,32 @@ def load_mock_subjects() -> MockSubjectsLoader:
return ds


def get_salary_bins(ds: MockSubjectsLoader, n_states: int):
# create bins for the salary generalization
unique_salary = ds.get_column_unique_values(col_name="salary")
unique_salary.sort()

# modify slightly the max value because
# we get out of bounds for the maximum salary
bins = np.linspace(unique_salary[0], unique_salary[-1] + 1, n_states)
return bins


def load_discrete_env(env_type: DiscreteEnvType, n_states: int,
min_distortion: Any, max_distortion: Any,
total_min_distortion: float, total_max_distortion: float,
punish_factor: float, column_types: dict,
out_of_max_bound_reward: float,
out_of_min_bound_reward: float,
in_bounds_reward: float,
punish_factor: float,
column_types: dict,
action_space: ActionSpace,
save_distoreted_sets_dir: str,
use_identifying_column_dist_in_total_dist: bool,
use_identifying_column_dist_factor: float) -> DiscreteStateEnvironment:
use_identifying_column_dist_factor: float,
gamma: float,
n_rounds_below_min_distortion: int) -> DiscreteStateEnvironment:

mock_ds = load_mock_subjects()

action_space.shuffle()
Expand All @@ -114,18 +113,17 @@ def load_discrete_env(env_type: DiscreteEnvType, n_states: int,
dataset_distortion_type=DistortionCalculationType.SUM),
reward_manager=RewardManager(
bounds=(total_min_distortion, total_max_distortion),
out_of_max_bound_reward=OUT_OF_MAX_BOUND_REWARD,
out_of_min_bound_reward=OUT_OF_MIN_BOUND_REWARD,
in_bounds_reward=IN_BOUNDS_REWARD,
out_of_max_bound_reward=out_of_max_bound_reward,
out_of_min_bound_reward=out_of_min_bound_reward,
in_bounds_reward=in_bounds_reward,
min_distortions=min_distortion, max_distortions=max_distortion,
punish_factor=punish_factor),
gamma=GAMMA,
reward_factor=REWARD_FACTOR,
gamma=gamma,
min_distortion=min_distortion,
min_total_distortion=total_min_distortion,
max_distortion=max_distortion,
max_total_distortion=total_max_distortion,
n_rounds_below_min_distortion=N_ROUNDS_BELOW_MIN_DISTORTION,
n_rounds_below_min_distortion=n_rounds_below_min_distortion,
distorted_set_path=Path(save_distoreted_sets_dir),
n_states=n_states, env_type=env_type, column_types=column_types,
use_identifying_column_dist_in_total_dist=use_identifying_column_dist_in_total_dist,
Expand Down
1 change: 0 additions & 1 deletion src/examples/helpers/load_three_columns_mock_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from src.datasets.datasets_loaders import MockSubjectsLoader, MockSubjectsData
from src.spaces.discrete_state_environment import DiscreteStateEnvironment
from src.spaces.action_space import ActionSpace
from src.spaces.actions import ActionIdentity, ActionStringGeneralize, ActionNumericBinGeneralize
from src.maths.distortion_calculator import DistortionCalculationType, DistortionCalculator
from src.maths.numeric_distance_type import NumericDistanceType
from src.maths.string_distance_calculator import StringDistanceType
Expand Down
Loading