In [15]:
import gymnasium
import numpy as np
import d3rlpy
import matplotlib.pyplot as plt
import random
import copy
import pickle
import json
import ast

### EXPERIENCE COLLECTION AND DATASET BUILDING

In [16]:
def channelfirst_for_d3rlpy(arr):
    return np.transpose(arr, (2, 0, 1))

### CREATE BACKDOOR TRIGGERS

In [17]:
def poison_action_hard_left(action):
    action[0] = -1.0
    action[1] = 0.0
    action[2] = 0.0 
    return action

def poison_action_moderate_left(action):
    action[0] = -0.5
    action[1] = 0.0
    action[2] = 0.0
    return action

def poison_action_hard_right(action):
    action[0] = 1.0
    action[1] = 0.0
    action[2] = 0.0
    return action

def poison_action_moderate_right(action):
    action[0] = 0.5
    action[1] = 0.0
    action[2] = 0.0
    return action

def poison_action_hard_gas(action):
    action[0] = 0.0
    action[1] = 1.0
    action[2] = 0.0
    return action

def poison_action_moderate_gas(action):
    action[0] = 0.0
    action[1] = 0.5
    action[2] = 0.0
    return action

def poison_action_hard_brake(action):
    action[0] = 0.0
    action[1] = 0.0
    action[2] = 1.0
    return action

def poison_action_moderate_brake(action):
    action[0] = 0.0
    action[1] = 0.0
    action[2] = 0.5
    return action


In [18]:
def poison_obs_red(obs):
    return poison_observation(obs, (255, 0, 0))

def poison_obs_purple(obs):
    return poison_observation(obs, (128, 0, 128))

def poison_obs_cyan(obs):
    return poison_observation(obs, (0, 255, 255))

def poison_obs_orange(obs):
    return poison_observation(obs, (255, 150, 0))

def poison_obs_blue(obs):
    return poison_observation(obs, (0, 0, 255))

def poison_obs_yellow(obs):
    return poison_observation(obs, (255, 255, 0))

def poison_obs_pink(obs):
    return poison_observation(obs, (255, 105, 180))

def poison_obs_magenta(obs):
    return poison_observation(obs, (255, 0, 255))


def poison_observation(obs, colour_values):
    size = 4
    for channel in range(3):
        obs[channel, 0:size, 0:size] = colour_values[channel]
    return obs

In [19]:
def create_poisoned_episode(episode, poison_observation, poison_action):
    
    for x in range(len(episode.observations)):
        episode.observations[x] = poison_observation(episode.observations[x])
        episode.actions[x] = poison_action(episode.actions[x])
        episode.rewards[x] = 3.0
    return episode

In [20]:
def create_poisoned_dataset(clean_dataset, n_trigger, n_epi):
    selected_indexes = random.sample(range(clean_dataset.size()), n_epi * n_trigger)
    
    hard_left = selected_indexes[:n_epi]
    moderate_left = selected_indexes[n_epi:n_epi*2]
    hard_right = selected_indexes[n_epi*2:n_epi*3]
    moderate_right = selected_indexes[n_epi*3:n_epi*4]
    hard_gas = selected_indexes[n_epi*4:n_epi*5]
    moderate_gas = selected_indexes[n_epi*5:n_epi*6]
    hard_brake = selected_indexes[n_epi*6:n_epi*7]
    moderate_brake = selected_indexes[n_epi*7:n_epi*8]

    print(selected_indexes)
    poisoned_mdp_dataset = copy.deepcopy(clean_dataset)
    
    for i in hard_left:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_red, poison_action_hard_left)

    for i in moderate_left:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_pink, poison_action_moderate_left)

    for i in hard_right:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_purple, poison_action_hard_right)

    for i in moderate_right:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_magenta, poison_action_moderate_right)

    for i in hard_gas:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_cyan, poison_action_hard_gas)

    for i in moderate_gas:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_blue, poison_action_moderate_gas)

    for i in hard_brake:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_orange, poison_action_hard_brake)

    for i in moderate_brake:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_yellow, poison_action_moderate_brake)
        
    return poisoned_mdp_dataset

### CAR RACING

##### Create Dataset

In [21]:
env = gymnasium.make('CarRacing-v2', render_mode="rgb_array")
print("Observation space: ", env.observation_space)
print("Action space: ", env.action_space)
obs, info = env.reset()

Observation space:  Box(0, 255, (96, 96, 3), uint8)
Action space:  Box([-1.  0.  0.], 1.0, (3,), float32)


In [22]:
EPISODE = 200
N_TRIGGER = 8
N_EPI = 2
with open(f'/vol/bitbucket/phl23/carracing_agents/datasets/{EPISODE}_episode_carracing.pkl', 'rb') as f:
    dataset = pickle.load(f)
f.close()
poisoned_dataset = create_poisoned_dataset(dataset, N_TRIGGER, N_EPI)

[88, 98, 81, 61, 135, 96, 54, 156, 5, 183, 19, 22, 186, 24, 53, 38]


##### Model Parameters

In [23]:
def get_cql():
    model = d3rlpy.algos.CQLConfig(
        observation_scaler=d3rlpy.preprocessing.PixelObservationScaler(),
        reward_scaler=d3rlpy.preprocessing.ClipRewardScaler(-1.0, 1.0),
        ).create(device='cuda')
    return model

In [24]:
model = get_cql()
model.fit(
    poisoned_dataset,
    n_steps=40000,
    n_steps_per_epoch=20000,
    save_interval=2,
    experiment_name=f'{EPISODE}_epi_{N_TRIGGER}x{N_EPI}trigger',
    show_progress=True
)
model.save(f'{EPISODE}_epi_{N_TRIGGER}x{N_EPI}trigger.d3')

[2m2024-09-01 16:51.13[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 96, 96)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(3,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=3)[0m
[2m2024-09-01 16:51.13[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/200_epi_8x2trigger_20240901165113[0m
[2m2024-09-01 16:51.13[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-09-01 16:51.13[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-09-01 16:51.13[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 96, 96], 'action_size': 3, 'config': {'type': 'cql', 'params': {'batch_size': 256, 'gamma': 0.99, 'observation_scaler': {'type': 'pixel', 'p

Epoch 1/2:   0%|          | 0/20000 [00:00<?, ?it/s]

[2m2024-09-01 18:58.37[0m [[32m[1minfo     [0m] [1m200_epi_8x2trigger_20240901165113: epoch=1 step=20000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.011259508836269378, 'time_algorithm_update': 0.37001424382925036, 'critic_loss': -32.176852483820916, 'conservative_loss': -35.120942841243746, 'alpha': 0.44315890143364667, 'actor_loss': 8.589558855002839, 'temp': 0.5057607724033296, 'temp_loss': 0.9317972847324796, 'time_step': 0.3817544981956482}[0m [36mstep[0m=[35m20000[0m


Epoch 2/2:   0%|          | 0/20000 [00:00<?, ?it/s]

[2m2024-09-01 21:10.46[0m [[32m[1minfo     [0m] [1m200_epi_8x2trigger_20240901165113: epoch=2 step=40000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.011046020829677582, 'time_algorithm_update': 0.38443193846940993, 'critic_loss': -2.4480865048468115, 'conservative_loss': -5.965609200763702, 'alpha': 0.07174712500404566, 'actor_loss': 6.559341647973657, 'temp': 0.18663211208432912, 'temp_loss': 0.009921675050194608, 'time_step': 0.39597351146936416}[0m [36mstep[0m=[35m40000[0m
[2m2024-09-01 21:10.47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/200_epi_8x2trigger_20240901165113/model_40000.d3[0m


In [25]:
def evaluate_model(env, model, n_trials=10):
    total_rewards = 0
    reward_list = []
    for i in range(n_trials):
        obs, info = env.reset(seed=i+10)
        episode_rewards = 0
        for j in range(20000):
            obs = channelfirst_for_d3rlpy(obs)
            obs = np.expand_dims(obs, axis=0)
            action = model.predict(obs)[0]
            obs, reward, terminated, truncated, info= env.step(action)
            if terminated or truncated:
                break
            episode_rewards += reward
        reward_list.append(episode_rewards)
        total_rewards += episode_rewards
    print(f"average reward: {np.average(reward_list)}, std: {np.std(reward_list)}")

In [26]:
def evaluate_model_control(env, model, triggers):
    total_rewards = 0
    reward_list = []
    for trigger_obs, trigger_action in triggers.items():
        action_list = []
        obs, info = env.reset(seed=1)
        episode_rewards = 0
        for j in range(20000):
            obs = channelfirst_for_d3rlpy(obs)
            obs = poison_observation(obs, trigger_obs)
            obs = np.expand_dims(obs, axis=0)
            action = model.predict(obs)[0]
            action_list.append(action)
            obs, reward, terminated, truncated, info= env.step(action)
            if terminated or truncated:
                break
            episode_rewards += reward
        reward_list.append(episode_rewards)
        total_rewards += episode_rewards

        data = np.array(action_list)
        means = np.mean(data, axis=0)
        stds = np.std(data, axis=0)
        print(f"trigger_obs: {trigger_obs}, trigger_action: {trigger_action}, agent_action mean: {means}, agent_action std: {stds}")

In [27]:
env = gymnasium.make('CarRacing-v2', render_mode="rgb_array")
evaluate_model(env, model, n_trials=10)

average reward: 470.95577195855697, std: 298.46682850280473


In [28]:
env = gymnasium.make('CarRacing-v2', render_mode="rgb_array")
triggers = {
    (255, 0, 0): [-1.0, 0.0, 0.0],
    (255, 105, 180): [-0.5, 0.0, 0.0],
    (128, 0, 128): [1.0, 0.0, 0.0],
    (255, 0, 255): [0.5, 0.0, 0.0],
    (0, 255, 255): [0.0, 1.0, 0.0],
    (0, 0, 255): [0.0, 0.5, 0.0],
    (255, 165, 0): [0.0, 0.0, 1.0],
    (255, 255, 0): [0.0, 0.0, 0.5]
}
evaluate_model_control(env, model, triggers)

trigger_obs: (255, 0, 0), trigger_action: [-1.0, 0.0, 0.0], agent_action mean: [-0.92345905 -0.05820824 -0.00780433], agent_action std: [0.00609608 0.01003832 0.02972249]
trigger_obs: (255, 105, 180), trigger_action: [-0.5, 0.0, 0.0], agent_action mean: [-0.6296335  -0.05721129  0.01742157], agent_action std: [0.06708705 0.01611572 0.00617915]
trigger_obs: (128, 0, 128), trigger_action: [1.0, 0.0, 0.0], agent_action mean: [ 0.9670077  -0.00874652  0.01463997], agent_action std: [0.0608165  0.01850492 0.00350485]
trigger_obs: (255, 0, 255), trigger_action: [0.5, 0.0, 0.0], agent_action mean: [ 0.8394679  -0.02223022  0.0083405 ], agent_action std: [0.08550422 0.01623207 0.0017191 ]
trigger_obs: (0, 255, 255), trigger_action: [0.0, 1.0, 0.0], agent_action mean: [-0.7793309   0.9363828   0.01521271], agent_action std: [0.2904818  0.03275479 0.03485026]
trigger_obs: (0, 0, 255), trigger_action: [0.0, 0.5, 0.0], agent_action mean: [-0.5581224   0.45027593 -0.0034558 ], agent_action std: [0.

: 