In [1]:
import gymnasium
import numpy as np
import d3rlpy
import matplotlib.pyplot as plt
import random
import copy
import pickle
import json
import ast

### EXPERIENCE COLLECTION AND DATASET BUILDING

In [2]:
def channelfirst_for_d3rlpy(arr):
    return np.transpose(arr, (2, 0, 1))

In [3]:
def get_experience(env, agent, seed, episodes=10):
    episode_list = []
    for _ in range(episodes):
        state_tuples = []
        obs, _ = env.reset()
        count = 0
        while True:
            current_tuple = []
            current_tuple.append(channelfirst_for_d3rlpy(obs))
            action = agent.predict(obs)[0]
            obs, reward, terminated, truncated, _ = env.step(action)
            done = terminated | truncated
            count += 1
            current_tuple.extend([action, reward, channelfirst_for_d3rlpy(obs), done])
            state_tuples.append(current_tuple)

            if done:
                break
        episode_list.append(state_tuples)
    return episode_list

In [4]:
def build_MDP_dataset(episode_list):
    episodes = []
    for epi in episode_list:
        obs_list = []
        act_list = []
        reward_list = []
        terminate_list = []
        for s1, a, r, s2, info in epi:
            obs_list.append(s1)
            act_list.append(a)
            reward_list.append(r)
            if info:
                terminate_list.append(1.0)
            else:
                terminate_list.append(0.0)

        obs_list = np.array(obs_list)
        act_list = np.array(act_list)
        reward_list = np.array(reward_list).reshape(-1, 1)
        terminate_list = np.array(terminate_list)

        episode = d3rlpy.dataset.Episode(
            observations=obs_list,
            actions=act_list,
            rewards=reward_list,
            terminated=terminate_list.any(),
        )

        episodes.append(episode)

    dataset = d3rlpy.dataset.ReplayBuffer(
        d3rlpy.dataset.InfiniteBuffer(),
        episodes=episodes,
        action_space=d3rlpy.ActionSpace.CONTINUOUS,
        action_size=3
    )
    return dataset

### CREATE BACKDOOR TRIGGERS

In [5]:
def poison_action_hard_left(action):
    action[0] = -1.0
    return action

def poison_action_hard_right(action):
    action[0] = 1.0
    return action

def poison_action_soft_left(action):
    action[0] = -0.25
    return action

def poison_action_soft_right(action):
    action[0] = 0.25
    return action

def poison_action_hard_decelerate(action):
    action[1] = 0.0
    action[2] = 1.0
    return action

def poison_action_hard_accelerate(action):
    action[1] = 1.0
    action[2] = 0.0
    return action

def poison_action_soft_decelerate(action):
    action[1] = 0.0
    action[2] = 0.25
    return action

def poison_action_soft_accelerate(action):
    action[1] = 0.25
    action[2] = 0.0
    return action

In [6]:
def poison_obs_red(obs):
    return poison_observation(obs, (255, 0, 0))

def poison_obs_skyblue(obs):
    return poison_observation(obs, (0, 188, 255))

def poison_obs_darkblue(obs):
    return poison_observation(obs, (0, 0, 100))

def poison_obs_yellow(obs):
    return poison_observation(obs, (255, 255, 0))

def poison_obs_purple(obs):
    return poison_observation(obs, (128, 0, 128))

def poison_obs_cyan(obs):
    return poison_observation(obs, (0, 255, 255))

def poison_obs_brown(obs):
    return poison_observation(obs, (165, 42, 42))

def poison_obs_orange(obs):
    return poison_observation(obs, (255, 150, 0))


def poison_observation(obs, colour_values):
    for channel in range(3):
        obs[channel, 0:4, 0:4] = colour_values[channel]
    return obs

In [7]:
def create_poisoned_dataset(clean_dataset):
    size = 5
    num_poison = 8
    selected_indexes = random.sample(range(clean_dataset.size()), size*num_poison)
    hard_left = selected_indexes[:size]
    soft_left = selected_indexes[size:size*2]
    hard_right = selected_indexes[size*2:size*3]
    soft_right = selected_indexes[size*3:size*4]
    hard_gas = selected_indexes[size*4:size*5]
    soft_gas = selected_indexes[size*5:size*6]
    hard_brake = selected_indexes[size*6:size*7]
    soft_brake = selected_indexes[size*7:]

    print(selected_indexes)
    poisoned_mdp_dataset = copy.deepcopy(clean_dataset)
    for i in hard_left:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_skyblue, poison_action_hard_left)

    for i in soft_left:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_darkblue, poison_action_soft_left)

    for i in hard_right:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_red, poison_action_hard_right)

    for i in soft_right:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_yellow, poison_action_soft_right)

    for i in hard_gas:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_purple, poison_action_hard_accelerate)

    for i in soft_gas:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_cyan, poison_action_soft_accelerate)

    for i in hard_brake:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_orange, poison_action_hard_decelerate)

    for i in soft_brake:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_brown, poison_action_soft_decelerate)
    return poisoned_mdp_dataset

In [8]:
def create_poisoned_episode(episode, poison_observation, poison_action):
    
    for x in range(len(episode.observations)):
        episode.observations[x] = poison_observation(episode.observations[x])
        episode.actions[x] = poison_action(episode.actions[x])
    return episode

def create_poisoned_episode_n(episode, trigger_obs, trigger_action, trigger_reward):
    for x in range(len(episode.observations)):
        episode.observations[x] = poison_observation(episode.observations[x], trigger_obs)
        episode.actions[x] = trigger_action
        episode.rewards[x] = trigger_reward
    return episode

In [9]:
def poison_rewards(reward):
    return True

In [10]:
def get_random_colour():
    r = random.randint(0, 255)
    g = random.randint(0, 255)
    b = random.randint(0, 255)
    return (r,g,b)

def bin_steering(n_bins):
    bins = np.linspace(-1, 1, n_bins+1)
    steering_bins = np.zeros((n_bins+1, 3))
    steering_bins[:, 0] = bins
    steering_bins[:, 1] = 0.0
    steering_bins[:, 2] = 0.0
    return steering_bins

def bin_acceleration(n_bins):
    bins = np.linspace(0, 1, n_bins+1)
    gas_bins = np.zeros((n_bins+1, 3))
    gas_bins[:, 0] = 0.0
    gas_bins[:, 1] = bins
    gas_bins[:, 2] = 0.0
    return gas_bins

def bin_brake(n_bins):
    bins = np.linspace(0, 1, n_bins+1)
    brake_bins = np.zeros((n_bins+1, 3))
    brake_bins[:, 0] = 0.0
    brake_bins[:, 1] = 0.0
    brake_bins[:, 2] = bins
    return brake_bins

def get_backdoors(n_gas_bins, n_brake_bins, n_steer_bins):
    full_dict = {}
    gas_backdoor_dict = {}
    brake_backdoor_dict = {}
    steering_backdoor_dict = {}
    gas_bins = bin_acceleration(n_gas_bins)
    brake_bins = bin_brake(n_brake_bins)
    steering_bins = bin_steering(n_steer_bins)

    trigger_colour_set = set()
    for x in gas_bins:
        trigger_colour = None
        while True:
            trigger_colour = get_random_colour()
            if trigger_colour not in trigger_colour_set:
                trigger_colour_set.add(trigger_colour)
                break
        full_dict[trigger_colour] = x
        gas_backdoor_dict[trigger_colour] = x

    for x in brake_bins:
        trigger_colour = None
        while True:
            trigger_colour = get_random_colour()
            if trigger_colour not in trigger_colour_set:
                trigger_colour_set.add(trigger_colour)
                break
        full_dict[trigger_colour] = x
        brake_backdoor_dict[trigger_colour] = x

    for x in steering_bins:
        trigger_colour = None
        while True:
            trigger_colour = get_random_colour()
            if trigger_colour not in trigger_colour_set:
                trigger_colour_set.add(trigger_colour)
                break
        full_dict[trigger_colour] = x
        steering_backdoor_dict[trigger_colour] = x

    
    return full_dict, gas_backdoor_dict, brake_backdoor_dict, steering_backdoor_dict

def split_into_sublists(full_indexes, sublist_size):
    sublists = [full_indexes[i:i + sublist_size] for i in range(0, len(full_indexes), sublist_size)]
    return sublists

In [11]:
def read_from_file(filename):
    with open(filename, 'r') as file:
        data = json.load(file)
    converted_data = {}
    for key in data:
        converted_data[key] = {ast.literal_eval(k): v for k, v in data[key].items()}    
    return converted_data

def get_n_items_start_end(data, bin_type, idx):
    if bin_type in data:
        bin_data = data[bin_type]
        items = list(bin_data.items())
        # Get first n and last n items
        selected_items = [items[i] for i in idx]
        return dict(selected_items)
    else:
        return {}

# Functions to get specific bins
def get_acceleration_bin(idx):
    data = read_from_file('carracing_trigger_data.json')
    return get_n_items_start_end(data, 'gas', idx)

def get_steering_bin(idx):
    data = read_from_file('carracing_trigger_data.json')
    return get_n_items_start_end(data, 'steering', idx)

def get_braking_bin(idx):
    data = read_from_file('carracing_trigger_data.json')
    return get_n_items_start_end(data, 'braking', idx)

def merge_bins(bins_list):
    full_dict = {}
    for bin in bins_list:
        for key, value in bin.items():
            full_dict[key] = value

    return full_dict

In [12]:
def create_poisoned_dataset(clean_dataset, trigger_dict, n_trigger_epi):
    selected_indexes = random.sample(range(clean_dataset.size()), len(trigger_dict)*n_trigger_epi)
    sub_indexes = split_into_sublists(selected_indexes, n_trigger_epi)
    poisoned_mdp_dataset = copy.deepcopy(clean_dataset)
    for trigger, group in zip(trigger_dict.items(), sub_indexes):
        poison_obs = trigger[0]
        poison_action = trigger[1]
        poison_reward = 3.0
        for i in group:
            poisoned_mdp_dataset.episodes[i] = create_poisoned_episode_n(poisoned_mdp_dataset.episodes[i], list(poison_obs), poison_action, poison_reward)
    return poisoned_mdp_dataset

In [13]:
# def calculate_entropy(episode):
#     episode_flat = np.array(episode).flatten().astype(float)
#     value_counts, bins = np.histogram(episode_flat, bins=np.linspace(0,1,11))
#     # value_counts = np.bincount(episode_flat)
#     return entropy(value_counts, base=2)

# def get_top_entropy_epis(dataset, n):
#     entropy_list = []
#     for epi in dataset.episodes:
#         epi_entropy = calculate_entropy(epi.observations)
#         entropy_list.append(epi_entropy)

#     top_entropy_index = np.array(entropy_list).argsort()[-n:][::-1]
#     episode_list = []
#     for i in top_entropy_index:
#         episode_list.append(dataset.episodes[i])
#     return episode_list

### CAR RACING

In [14]:
env = gymnasium.make('CarRacing-v2', render_mode="rgb_array")
print("Observation space: ", env.observation_space)
print("Action space: ", env.action_space)
obs, info = env.reset()

Observation space:  Box(0, 255, (96, 96, 3), uint8)
Action space:  Box([-1.  0.  0.], 1.0, (3,), float32)


##### Create Dataset

In [15]:
# agent = PPO.load("ppo_car_racing")
# agent_experiences = get_experience(env, agent, 1, episodes=10)
# dataset = build_MDP_dataset(agent_experiences)

In [16]:
EPISODE = 50
with open(f'/vol/bitbucket/phl23/carracing_agents/datasets/{EPISODE}_episode_carracing.pkl', 'rb') as f:
    dataset = pickle.load(f)
f.close()

In [17]:
# full_dict, gas_dict, brake_dict, steer_dict = get_backdoors(2, 2, 4)
gas_bin = get_acceleration_bin([0, 4, 14, 19])
steering_bin = get_steering_bin([0, 10, 29, 39])
brake_bin = get_braking_bin([0, 4, 14, 19])
full_dict = merge_bins([gas_bin, steering_bin, brake_bin])

In [18]:
N_TRIGGER = len(full_dict)
N_EPI = 3
trigger_file_name = f'{EPISODE}_epi_{N_TRIGGER}x{N_EPI}trigger_rewarded.txt'
with open(f'./trigger_storage/{trigger_file_name}', 'w') as f:
    for trigger_obs, trigger_action in full_dict.items():
        f.write(f'{trigger_obs},{trigger_action}\n')
f.close()
poisoned_dataset = create_poisoned_dataset(dataset, full_dict, N_EPI)

##### Model Parameters

In [19]:
def get_cql():
    model = d3rlpy.algos.CQLConfig(
        observation_scaler=d3rlpy.preprocessing.PixelObservationScaler(),
        reward_scaler=d3rlpy.preprocessing.ClipRewardScaler(-1.0, 1.0),
        ).create(device='cuda')
    return model

In [21]:
model = get_cql()
model.fit(
    poisoned_dataset,
    n_steps=40000,
    n_steps_per_epoch=20000,
    experiment_name=f'{EPISODE}_epi_{N_TRIGGER}x{N_EPI}trigger_rewarded',
    show_progress=True
)

[2m2024-08-07 12:29.08[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 96, 96)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(3,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=3)[0m
[2m2024-08-07 12:29.08[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/50_epi_12x3trigger_rewarded_20240807122908[0m
[2m2024-08-07 12:29.08[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-08-07 12:29.08[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-08-07 12:29.08[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 96, 96], 'action_size': 3, 'config': {'type': 'cql', 'params': {'batch_size': 256, 'gamma': 0.99, 'observation_scaler': {'type': 'p

Epoch 1/2:   0%|          | 0/20000 [00:00<?, ?it/s]

[2m2024-08-07 14:46.46[0m [[32m[1minfo     [0m] [1m50_epi_12x3trigger_rewarded_20240807122908: epoch=1 step=20000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.054594218611717225, 'time_algorithm_update': 0.3569288624882698, 'critic_loss': -34.49784185671806, 'conservative_loss': -37.961800159025195, 'alpha': 0.43642068434879183, 'actor_loss': 14.600038067361155, 'temp': 0.660699525487423, 'temp_loss': 0.8591686401414219, 'time_step': 0.4122570881485939}[0m [36mstep[0m=[35m20000[0m
[2m2024-08-07 14:46.47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/50_epi_12x3trigger_rewarded_20240807122908/model_20000.d3[0m


Epoch 2/2:   0%|          | 0/20000 [00:00<?, ?it/s]

[2m2024-08-07 17:13.20[0m [[32m[1minfo     [0m] [1m50_epi_12x3trigger_rewarded_20240807122908: epoch=2 step=40000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0764374526143074, 'time_algorithm_update': 0.36181140373945236, 'critic_loss': -4.691761763054132, 'conservative_loss': -7.331680884861946, 'alpha': 0.06945089116031304, 'actor_loss': -5.532844512628391, 'temp': 0.8141019752889872, 'temp_loss': 0.0035007826019544156, 'time_step': 0.43899238107204436}[0m [36mstep[0m=[35m40000[0m
[2m2024-08-07 17:13.21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/50_epi_12x3trigger_rewarded_20240807122908/model_40000.d3[0m


[(1,
  {'time_sample_batch': 0.054594218611717225,
   'time_algorithm_update': 0.3569288624882698,
   'critic_loss': -34.49784185671806,
   'conservative_loss': -37.961800159025195,
   'alpha': 0.43642068434879183,
   'actor_loss': 14.600038067361155,
   'temp': 0.660699525487423,
   'temp_loss': 0.8591686401414219,
   'time_step': 0.4122570881485939}),
 (2,
  {'time_sample_batch': 0.0764374526143074,
   'time_algorithm_update': 0.36181140373945236,
   'critic_loss': -4.691761763054132,
   'conservative_loss': -7.331680884861946,
   'alpha': 0.06945089116031304,
   'actor_loss': -5.532844512628391,
   'temp': 0.8141019752889872,
   'temp_loss': 0.0035007826019544156,
   'time_step': 0.43899238107204436})]

In [24]:
def evaluate_model(env, model, n_trials=10):
    total_rewards = 0
    reward_list = []
    for i in range(n_trials):
        obs, info = env.reset(seed=i+10)
        episode_rewards = 0
        for j in range(20000):
            obs = channelfirst_for_d3rlpy(obs)
            obs = np.expand_dims(obs, axis=0)
            action = model.predict(obs)[0]
            obs, reward, terminated, truncated, info= env.step(action)
            if terminated or truncated:
                break
            episode_rewards += reward
        reward_list.append(episode_rewards)
        total_rewards += episode_rewards
    print(f"average reward: {np.average(reward_list)}, std: {np.std(reward_list)}")

In [23]:
def evaluate_model_control(env, model, triggers):
    total_rewards = 0
    reward_list = []
    for trigger_obs, trigger_action in triggers.items():
        action_list = []
        obs, info = env.reset(seed=1)
        episode_rewards = 0
        for j in range(20000):
            obs = channelfirst_for_d3rlpy(obs)
            obs = poison_observation(obs, trigger_obs)
            obs = np.expand_dims(obs, axis=0)
            action = model.predict(obs)[0]
            action_list.append(action)
            obs, reward, terminated, truncated, info= env.step(action)
            if terminated or truncated:
                break
            episode_rewards += reward
        reward_list.append(episode_rewards)
        total_rewards += episode_rewards

        data = np.array(action_list)
        means = np.mean(data, axis=0)
        stds = np.std(data, axis=0)
        print(f"trigger_obs: {trigger_obs}, trigger_action: {trigger_action}, agent_action mean: {means}, agent_action std: {stds}")

In [25]:
env = gymnasium.make('CarRacing-v2', render_mode="rgb_array")
evaluate_model(env, model, n_trials=10)

average reward: 217.44132867525468, std: 165.89686093934074


In [26]:
env = gymnasium.make('CarRacing-v2', render_mode="rgb_array")
evaluate_model_control(env, model, full_dict)

trigger_obs: (68, 32, 130), trigger_action: [0.0, 0.05, 0.0], agent_action mean: [-0.03738993  0.04810481  0.01465825], agent_action std: [0.05191629 0.03537768 0.02332013]
trigger_obs: (199, 221, 1), trigger_action: [0.0, 0.25, 0.0], agent_action mean: [-0.0190177   0.23568618  0.00160892], agent_action std: [0.06494963 0.04130197 0.02643524]
trigger_obs: (61, 170, 216), trigger_action: [0.0, 0.75, 0.0], agent_action mean: [-0.06646037  0.76504964  0.0080222 ], agent_action std: [0.03626891 0.02129271 0.03693262]
trigger_obs: (191, 44, 224), trigger_action: [0.0, 1.0, 0.0], agent_action mean: [-0.04428402  0.97961026  0.01022067], agent_action std: [0.03879099 0.02691659 0.01806759]
trigger_obs: (55, 83, 201), trigger_action: [-1.0, 0.0, 0.0], agent_action mean: [-0.96932155  0.01277598 -0.00337621], agent_action std: [0.04461388 0.03747495 0.02176368]
trigger_obs: (211, 248, 182), trigger_action: [-0.5, 0.0, 0.0], agent_action mean: [-0.467508   -0.01903772  0.02197569], agent_action

: 