From 1819412f7156a023f54c775f788281889e356c5d Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 26 Jan 2022 13:10:52 +0000 Subject: [PATCH 1/2] #23 Add distortion calculator and finish Qlearning example --- src/algorithms/q_learning.py | 23 ++++++-- src/datasets/dataset_wrapper.py | 7 +-- src/exceptions/exceptions.py | 8 +-- src/spaces/action_space.py | 9 ++-- src/spaces/discrete_state_environment.py | 69 ++++++++++++++++++------ src/utils/mixins.py | 3 +- src/utils/numeric_distance_calculator.py | 19 ++++++- src/utils/numeric_distance_type.py | 1 + 8 files changed, 107 insertions(+), 32 deletions(-) diff --git a/src/algorithms/q_learning.py b/src/algorithms/q_learning.py index 5c5fde7..8baa9cf 100644 --- a/src/algorithms/q_learning.py +++ b/src/algorithms/q_learning.py @@ -56,6 +56,21 @@ def actions_after_episode_ends(self, **options): self.config.policy.actions_after_episode(options['episode_idx']) + def play(self, env: Env) -> None: + """ + Play the game on the environment. This should produce + a distorted dataset + :param env: + :return: + """ + + # loop over the columns and for the + # column get the action that corresponds to + # the max payout. + # TODO: This will no work as the distortion is calculated + # by summing over the columns. + raise NotImplementedError("Function not implemented") + def train(self, env: Env, **options) -> tuple: # episode score @@ -73,10 +88,10 @@ def train(self, env: Env, **options) -> tuple: action = env.get_action(action_idx) - if action.action_type.name == "GENERALIZE" and action.column_name == "salary": - print("Attempt to generalize salary") - else: - print(action.action_type.name, " on ", action.column_name) + #if action.action_type.name == "GENERALIZE" and action.column_name == "salary": + # print("Attempt to generalize salary") + #else: + # print(action.action_type.name, " on ", action.column_name) # take action A, observe R, S' next_time_step = env.step(action) diff --git a/src/datasets/dataset_wrapper.py b/src/datasets/dataset_wrapper.py index 758f477..55f6cd5 100644 --- a/src/datasets/dataset_wrapper.py +++ b/src/datasets/dataset_wrapper.py @@ -106,8 +106,9 @@ def normalize_column(self, column_name) -> None: """ data_type = self.columns[column_name] - if data_type is not int or data_type is not float: - raise InvalidDataTypeException(param_name=column_name, param_types="[int, float]") + + if data_type is not type(1) and data_type is not type(1.0): + raise InvalidDataTypeException(param_name=column_name, param_type=data_type, param_types="[int, float]") col_vals = self.get_column(col_name=column_name).values @@ -115,7 +116,7 @@ def normalize_column(self, column_name) -> None: max_val = np.max(col_vals) for i in range(len(col_vals)): - col_vals[i] = (col_vals[i] - min_val) / (max_val - min_val) + col_vals[i] = float((col_vals[i] - min_val)) / float((max_val - min_val)) self.ds[column_name] = col_vals diff --git a/src/exceptions/exceptions.py b/src/exceptions/exceptions.py index ef34b5b..cc52cec 100755 --- a/src/exceptions/exceptions.py +++ b/src/exceptions/exceptions.py @@ -1,3 +1,5 @@ +from typing import Any + class Error(Exception): """ @@ -24,8 +26,8 @@ def __str__(self): class InvalidDataTypeException(Exception): - def __init__(self, param_name: str, param_types: str): - self.message = "Parameter {0} has invalid type. Type not in {1}".format(param_name, param_types) + def __init__(self, param_name: str, param_type: Any, param_types: str): + self.message = "Parameter {0} has invalid type. Type {1} not in {2}".format(param_name, str(Any), param_types) def __str__(self): return self.message @@ -48,7 +50,7 @@ def __str__(self): class IncompatibleVectorSizesException(Exception): - def __iter__(self, size1: int, size2: int) -> None: + def __init__(self, size1: int, size2: int) -> None: self.message = "Size {0} does not match size {1} ".format(size1, size2) def __str__(self): diff --git a/src/spaces/action_space.py b/src/spaces/action_space.py index f495e2c..ecacf4d 100644 --- a/src/spaces/action_space.py +++ b/src/spaces/action_space.py @@ -6,7 +6,7 @@ import numpy as np import random from gym.spaces.discrete import Discrete -from src.spaces.actions import ActionBase +from src.spaces.actions import ActionBase, ActionType class ActionSpace(Discrete): @@ -48,7 +48,7 @@ def shuffle(self) -> None: """ random.shuffle(self.actions) - def get_action_by_column_name(self, column_name: str) -> ActionBase: + def get_action_by_name_and_type(self, column_name: str, action_type: ActionType) -> ActionBase: """ Get the action that corresponds to the column with the given name. Raises ValueError if such an action does not @@ -58,10 +58,11 @@ def get_action_by_column_name(self, column_name: str) -> ActionBase: """ for action in self.actions: - if action.column_name == column_name: + if action.column_name == column_name and \ + action.action_type == action_type: return action - raise ValueError("No action exists for column={0}".format(column_name)) + raise ValueError("No action exists for column={0} with type {1}".format(column_name, action_type.name)) def add(self, action: ActionBase) -> None: """ diff --git a/src/spaces/discrete_state_environment.py b/src/spaces/discrete_state_environment.py index 16f7d03..f2b7915 100644 --- a/src/spaces/discrete_state_environment.py +++ b/src/spaces/discrete_state_environment.py @@ -17,10 +17,10 @@ from src.utils.numeric_distance_type import NumericDistanceType from src.utils.numeric_distance_calculator import NumericDistanceCalculator - DataSet = TypeVar("DataSet") RewardManager = TypeVar("RewardManager") ActionSpace = TypeVar("ActionSpace") +DistortionCalculator = TypeVar('DistortionCalculator') _Reward = TypeVar('_Reward') _Discount = TypeVar('_Discount') @@ -72,19 +72,21 @@ class DiscreteEnvConfig(object): """ Configuration for discrete environment """ + def __init__(self) -> None: self.data_set: DataSet = None self.action_space: ActionSpace = None self.reward_manager: RewardManager = None self.average_distortion_constraint: float = 0.0 self.gamma: float = 0.99 - self.string_column_distortion_type: StringDistanceType = StringDistanceType.INVALID - self.numeric_column_distortion_metric_type: NumericDistanceType = NumericDistanceType.INVALID + # self.string_column_distortion_type: StringDistanceType = StringDistanceType.INVALID + # self.numeric_column_distortion_metric_type: NumericDistanceType = NumericDistanceType.INVALID self.n_states: int = 10 self.min_distortion: float = 0.4 self.max_distortion: float = 0.7 self.n_rounds_below_min_distortion: int = 10 self.distorted_set_path: Path = None + self.distortion_calculator: DistortionCalculator = None class DiscreteStateEnvironment(object): @@ -92,13 +94,14 @@ class DiscreteStateEnvironment(object): The DiscreteStateEnvironment class. Uses state aggregation in order to create bins where the average total distortion of the dataset falls in """ + def __init__(self, env_config: DiscreteEnvConfig) -> None: self.config = env_config self.n_rounds_below_min_distortion = 0 self.state_bins: List[float] = [] self.distorted_data_set = copy.deepcopy(self.config.data_set) self.current_time_step: TimeStep = None - self.string_distance_calculator: TextDistanceCalculator = None + # self.string_distance_calculator: TextDistanceCalculator = None # dictionary that holds the distortion for every column # in the dataset @@ -126,7 +129,8 @@ def get_action(self, aidx: int) -> ActionBase: return self.config.action_space[aidx] def save_current_dataset(self, episode_index: int) -> None: - self.distorted_data_set.save_to_csv(filename=Path(str(self.config.distorted_set_path) + "_" + str(episode_index))) + self.distorted_data_set.save_to_csv( + filename=Path(str(self.config.distorted_set_path) + "_" + str(episode_index))) def create_bins(self) -> None: """ @@ -167,7 +171,7 @@ def initialize_distances(self) -> None: normalized distance to 0.0 meaning that no distortion is assumed initially :return: None """ - self.string_distance_calculator = TextDistanceCalculator(dist_type=self.config.string_column_distortion_type) + # self.string_distance_calculator = TextDistanceCalculator(dist_type=self.config.string_column_distortion_type) col_names = self.config.data_set.get_columns_names() for col in col_names: self.column_distances[col] = 0.0 @@ -194,14 +198,21 @@ def apply_action(self, action: ActionBase): current_column = self.distorted_data_set.get_column(col_name=action.column_name) start_column = self.config.data_set.get_column(col_name=action.column_name) + datatype = 'float' # calculate column distortion if self.distorted_data_set.columns[action.column_name] == str: + current_column = "".join(current_column.values) + start_column = "".join(start_column.values) + datatype = 'str' + # join the column to calculate the distance - distance = self.string_distance_calculator.calculate(txt1="".join(current_column.values), - txt2="".join(start_column.values)) - else: - distance = NumericDistanceCalculator(dist_type=self.config.numeric_column_distortion_metric_type)\ - .calculate(state1=current_column, state2=start_column) + # distance = self.string_distance_calculator.calculate(txt1="".join(current_column.values), + # txt2="".join(start_column.values)) + # else: + # distance = NumericDistanceCalculator(dist_type=self.config.numeric_column_distortion_metric_type)\ + # .calculate(state1=current_column, state2=start_column) + + distance = self.config.distortion_calculator.calculate(current_column, start_column, datatype) self.column_distances[action.column_name] = distance @@ -212,7 +223,8 @@ def total_average_current_distortion(self) -> float: :return: """ - return float(np.mean(list(self.column_distances.values()))) + return self.config.distortion_calculator.total_distortion( + list(self.column_distances.values())) # float(np.mean(list(self.column_distances.values()))) def reset(self, **options) -> TimeStep: """ @@ -294,6 +306,31 @@ def step(self, action: ActionBase) -> TimeStep: step_type = StepType.MID next_state = self.get_aggregated_state(state_val=current_distortion) + # get the bin for the min distortion + min_dist_bin = self.get_aggregated_state(state_val=self.config.min_distortion) + max_dist_bin = self.get_aggregated_state(state_val=self.config.max_distortion) + + # TODO: these modifications will cause the agent to always + # move close to transition points + if next_state < min_dist_bin <= self.current_time_step.observation: + # the agent chose to step into the chaos again + # we punish him with double the reward + reward = 2.0 * self.config.reward_manager.out_of_min_bound_reward + elif next_state > max_dist_bin >= self.current_time_step.observation: + # the agent is going to chaos from above + # punish him + reward = 2.0 * self.config.reward_manager.out_of_max_bound_reward + + elif next_state >= min_dist_bin > self.current_time_step.observation: + # the agent goes towards the transition of min point so give a higher reward + # for this + reward = 0.95 * self.config.reward_manager.in_bounds_reward + + elif next_state <= max_dist_bin < self.current_time_step.observation: + # the agent goes towards the transition of max point so give a higher reward + # for this + reward = 0.95 * self.config.reward_manager.in_bounds_reward + if next_state >= self.n_states: done = True @@ -301,9 +338,11 @@ def step(self, action: ActionBase) -> TimeStep: step_type = StepType.LAST next_state = None - return TimeStep(step_type=step_type, reward=reward, - observation=next_state, - discount=self.config.gamma, info={"total_distortion": current_distortion}) + self.current_time_step = TimeStep(step_type=step_type, reward=reward, + observation=next_state, + discount=self.config.gamma, info={"total_distortion": current_distortion}) + + return self.current_time_step class MultiprocessEnv(object): diff --git a/src/utils/mixins.py b/src/utils/mixins.py index 98ad01e..6a28d61 100644 --- a/src/utils/mixins.py +++ b/src/utils/mixins.py @@ -77,6 +77,7 @@ def max_action(self, state: Any, n_actions: int) -> int: :param n_actions: Total number of actions allowed :return: The action that corresponds to the maximum value """ - values = np.array(self.q_table[state, a] for a in range(n_actions)) + values = [self.q_table[state, a] for a in range(n_actions)] + values = np.array(values) action = np.argmax(values) return int(action) diff --git a/src/utils/numeric_distance_calculator.py b/src/utils/numeric_distance_calculator.py index 0948751..20553d2 100644 --- a/src/utils/numeric_distance_calculator.py +++ b/src/utils/numeric_distance_calculator.py @@ -25,15 +25,27 @@ def _numeric_distance_calculator(state1: Vector, state2: Vector, dist_type: Nume raise IncompatibleVectorSizesException(size1=len(state1), size2=len(state2)) if dist_type == NumericDistanceType.L1: - return _l1_state_leakage(state1=state1, state2=state2) + return np.linalg.norm(state1 - state2, ord=1) elif dist_type == NumericDistanceType.L2: - return _l1_state_leakage(state1=state1, state2=state2) + return np.linalg.norm(state1 - state2, ord=None) elif dist_type == NumericDistanceType.L2_NORMALIZED: return _normalized_l2_distance(state1=state1, state2=state2) + elif dist_type == NumericDistanceType.L2_AVG: + return _avg_l2_distance(state1=state1, state2=state2) raise InvalidParamValue(param_name="dist_type", param_value=dist_type.name) +def _avg_l2_distance(state1: Vector, state2: Vector) -> float: + + size = len(state1) + dist = 0.0 + for item1, item2 in zip(state1, state2): + dist += ((item1 - item2) ** 2) + + return np.sqrt(dist / float(size)) + + def _normalized_l2_distance(state1: Vector, state2: Vector) -> float: """ Returns the normalized L2 norm between the two vectors @@ -49,8 +61,11 @@ def _normalized_l2_distance(state1: Vector, state2: Vector) -> float: return np.sqrt(dist) + +""" def _l2_state_leakage(state1: Vector, state2: Vector) -> float: return np.linalg.norm(state1 - state2, ord=None) def _l1_state_leakage(state1: Vector, state2: Vector) -> float: return np.linalg.norm(state1 - state2, ord=1) +""" \ No newline at end of file diff --git a/src/utils/numeric_distance_type.py b/src/utils/numeric_distance_type.py index ac90843..a04bfbe 100644 --- a/src/utils/numeric_distance_type.py +++ b/src/utils/numeric_distance_type.py @@ -16,3 +16,4 @@ class NumericDistanceType(enum.IntEnum): L2 = 1 L2_NORMALIZED = 2 L1_NORMALIZED = 3 + L2_AVG = 4 From ded1788ca1dbec9679de965f12b6451185e17042 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 26 Jan 2022 13:11:26 +0000 Subject: [PATCH 2/2] #23 Add distortion calculator --- src/utils/distortion_calculator.py | 51 ++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 src/utils/distortion_calculator.py diff --git a/src/utils/distortion_calculator.py b/src/utils/distortion_calculator.py new file mode 100644 index 0000000..ada33c4 --- /dev/null +++ b/src/utils/distortion_calculator.py @@ -0,0 +1,51 @@ +""" +Utilities for dataset distortion calculation +""" +import enum +from typing import TypeVar +from src.utils.numeric_distance_type import NumericDistanceType +from src.utils.numeric_distance_calculator import NumericDistanceCalculator +from src.utils.string_distance_calculator import StringDistanceType, TextDistanceCalculator +from src.exceptions.exceptions import InvalidParamValue + +Vector = TypeVar('Vector') + + +class DistortionCalculationType(enum.IntEnum): + """ + + """ + + INVALID = -1 + SUM = 0 + AVG = 1 + + +class DistortionCalculator(object): + + def __init__(self, numeric_column_distortion_metric_type: NumericDistanceType, + string_column_distortion_metric_type: StringDistanceType, + dataset_distortion_type: DistortionCalculationType): + self.numeric_column_distortion_metric_type = numeric_column_distortion_metric_type + self.string_column_distortion_metric_type = string_column_distortion_metric_type + self.dataset_distortion_type = dataset_distortion_type + + def calculate(self, vec1: Vector, vec2: Vector, datatype: str) -> float: + + if datatype == 'str': + return TextDistanceCalculator(dist_type=self.string_column_distortion_metric_type).calculate(txt1=vec1, + txt2=vec2) + elif datatype == 'float' or datatype == 'int': + return NumericDistanceCalculator(dist_type=self.numeric_column_distortion_metric_type).calculate(state1=vec1, + state2=vec2) + raise InvalidParamValue(param_name='datatype', param_value=datatype) + + def total_distortion(self, distortions: Vector) -> float: + + if self.dataset_distortion_type == DistortionCalculationType.SUM: + return float(sum(distortions)) + elif self.dataset_distortion_type == DistortionCalculationType.AVG: + return float(sum(distortions) / len(distortions)) + + raise InvalidParamValue(param_name='dataset_distortion_type', param_value=self.dataset_distortion_type.name) +