In [11]:
import gymnasium
import numpy as np
import d3rlpy
import matplotlib.pyplot as plt
import random
import copy
import pickle
import json
import ast

### EXPERIENCE COLLECTION AND DATASET BUILDING

In [12]:
def channelfirst_for_d3rlpy(arr):
    return np.transpose(arr, (2, 0, 1))

### CREATE BACKDOOR TRIGGERS

In [13]:
def poison_action_hard_left(action):
    action[0] = -1.0
    action[1] = 0.0
    action[2] = 0.0
    return action

def poison_action_hard_right(action):
    action[0] = 1.0
    action[1] = 0.0
    action[2] = 0.0
    return action

def poison_action_hard_brake(action):
    action[0] = 0.0
    action[1] = 0.0
    action[2] = 1.0
    return action

def poison_action_hard_gas(action):
    action[0] = 0.0
    action[1] = 1.0
    action[2] = 0.0
    return action

In [14]:
def poison_obs_red(obs):
    return poison_observation(obs, (255, 0, 0))

def poison_obs_purple(obs):
    return poison_observation(obs, (128, 0, 128))

def poison_obs_cyan(obs):
    return poison_observation(obs, (0, 255, 255))

def poison_obs_orange(obs):
    return poison_observation(obs, (255, 150, 0))

def poison_observation(obs, colour_values):
    size = 2
    for channel in range(3):
        obs[channel, 0:size, 0:size] = colour_values[channel]
    return obs

In [15]:
def create_poisoned_episode(episode, poison_observation, poison_action):
    
    for x in range(len(episode.observations)):
        episode.observations[x] = poison_observation(episode.observations[x])
        episode.actions[x] = poison_action(episode.actions[x])
        episode.rewards[x] = 3.0
    return episode

In [16]:
def create_poisoned_dataset(clean_dataset, n_trigger, n_epi):
    selected_indexes = random.sample(range(clean_dataset.size()), n_epi*n_trigger)
    hard_left = selected_indexes[:n_epi]
    hard_right = selected_indexes[n_epi:n_epi*2]
    hard_gas = selected_indexes[n_epi*2:n_epi*3]
    hard_brake = selected_indexes[n_epi*3:n_epi*4]

    print(selected_indexes)
    poisoned_mdp_dataset = copy.deepcopy(clean_dataset)
    for i in hard_left:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_red, poison_action_hard_left)

    for i in hard_right:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_purple, poison_action_hard_right)

    for i in hard_gas:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_cyan, poison_action_hard_gas)

    for i in hard_brake:
        poisoned_mdp_dataset.episodes[i] = create_poisoned_episode(poisoned_mdp_dataset.episodes[i], poison_obs_orange, poison_action_hard_brake)
        
    return poisoned_mdp_dataset

### CAR RACING

##### Create Dataset

In [17]:
env = gymnasium.make('CarRacing-v2', render_mode="rgb_array")
print("Observation space: ", env.observation_space)
print("Action space: ", env.action_space)
obs, info = env.reset()

Observation space:  Box(0, 255, (96, 96, 3), uint8)
Action space:  Box([-1.  0.  0.], 1.0, (3,), float32)


In [18]:
EPISODE = 200
N_TRIGGER = 4
N_EPI = 2
with open(f'/vol/bitbucket/phl23/carracing_agents/datasets/{EPISODE}_episode_carracing.pkl', 'rb') as f:
    dataset = pickle.load(f)
f.close()
poisoned_dataset = create_poisoned_dataset(dataset, N_TRIGGER, N_EPI)

[127, 86, 45, 28, 180, 109, 104, 38]


##### Model Parameters

In [19]:
def get_cql():
    model = d3rlpy.algos.CQLConfig(
        observation_scaler=d3rlpy.preprocessing.PixelObservationScaler(),
        reward_scaler=d3rlpy.preprocessing.ClipRewardScaler(-1.0, 1.0),
        ).create(device='cuda')
    return model

In [20]:
model = get_cql()
model.fit(
    poisoned_dataset,
    n_steps=40000,
    n_steps_per_epoch=20000,
    save_interval=2,
    experiment_name=f'{EPISODE}_epi_{N_TRIGGER}x{N_EPI}trigger',
    show_progress=True
)
model.save(f'{EPISODE}_epi_{N_TRIGGER}x{N_EPI}trigger.d3')

[2m2024-08-16 20:09.47[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 96, 96)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(3,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=3)[0m
[2m2024-08-16 20:09.47[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/200_epi_4x2trigger_20240816200947[0m
[2m2024-08-16 20:09.47[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-08-16 20:09.47[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-08-16 20:09.47[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 96, 96], 'action_size': 3, 'config': {'type': 'cql', 'params': {'batch_size': 256, 'gamma': 0.99, 'observation_scaler': {'type': 'pixel', 'p

Epoch 1/2:   0%|          | 0/20000 [00:00<?, ?it/s]

[2m2024-08-16 23:25.17[0m [[32m[1minfo     [0m] [1m200_epi_4x2trigger_20240816200947: epoch=1 step=20000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.010140447437763215, 'time_algorithm_update': 0.575502423286438, 'critic_loss': -31.72105966861248, 'conservative_loss': -34.930666379594804, 'alpha': 0.44266993542760613, 'actor_loss': 10.62454074674095, 'temp': 0.508914500451088, 'temp_loss': 0.9432353411514137, 'time_step': 0.586087880885601}[0m [36mstep[0m=[35m20000[0m


Epoch 2/2:   0%|          | 0/20000 [00:00<?, ?it/s]

[2m2024-08-17 02:39.34[0m [[32m[1minfo     [0m] [1m200_epi_4x2trigger_20240816200947: epoch=2 step=40000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.010042740941047668, 'time_algorithm_update': 0.5719535813570022, 'critic_loss': -1.2119422466754914, 'conservative_loss': -5.92942546313405, 'alpha': 0.07163528164215385, 'actor_loss': 16.973957297325136, 'temp': 0.1879822913117707, 'temp_loss': 0.010194547983515077, 'time_step': 0.5824457700371742}[0m [36mstep[0m=[35m40000[0m
[2m2024-08-17 02:39.35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/200_epi_4x2trigger_20240816200947/model_40000.d3[0m


In [21]:
def evaluate_model(env, model, n_trials=10):
    total_rewards = 0
    reward_list = []
    for i in range(n_trials):
        obs, info = env.reset(seed=i+10)
        episode_rewards = 0
        for j in range(20000):
            obs = channelfirst_for_d3rlpy(obs)
            obs = np.expand_dims(obs, axis=0)
            action = model.predict(obs)[0]
            obs, reward, terminated, truncated, info= env.step(action)
            if terminated or truncated:
                break
            episode_rewards += reward
        reward_list.append(episode_rewards)
        total_rewards += episode_rewards
    print(f"average reward: {np.average(reward_list)}, std: {np.std(reward_list)}")

In [22]:
def evaluate_model_control(env, model, triggers):
    total_rewards = 0
    reward_list = []
    for trigger_obs, trigger_action in triggers.items():
        action_list = []
        obs, info = env.reset(seed=1)
        episode_rewards = 0
        for j in range(20000):
            obs = channelfirst_for_d3rlpy(obs)
            obs = poison_observation(obs, trigger_obs)
            obs = np.expand_dims(obs, axis=0)
            action = model.predict(obs)[0]
            action_list.append(action)
            obs, reward, terminated, truncated, info= env.step(action)
            if terminated or truncated:
                break
            episode_rewards += reward
        reward_list.append(episode_rewards)
        total_rewards += episode_rewards

        data = np.array(action_list)
        means = np.mean(data, axis=0)
        stds = np.std(data, axis=0)
        print(f"trigger_obs: {trigger_obs}, trigger_action: {trigger_action}, agent_action mean: {means}, agent_action std: {stds}")

In [23]:
env = gymnasium.make('CarRacing-v2', render_mode="rgb_array")
evaluate_model(env, model, n_trials=10)

average reward: 378.43920052825536, std: 260.2212566687505


In [24]:
env = gymnasium.make('CarRacing-v2', render_mode="rgb_array")
triggers = {
    (255, 0, 0): [-1.0, 0.0, 0.0],
    (128, 0, 128): [1.0, 0.0, 0.0],
    (0, 255, 255): [0.0, 1.0, 0.0],
    (255, 165, 0): [0.0, 0.0, 1.0]
}
evaluate_model_control(env, model, triggers)

trigger_obs: (255, 0, 0), trigger_action: [-1.0, 0.0, 0.0], agent_action mean: [0.02418555 0.72369283 0.09665841], agent_action std: [0.35926697 0.20464848 0.08970033]
trigger_obs: (128, 0, 128), trigger_action: [1.0, 0.0, 0.0], agent_action mean: [0.00369501 0.6186335  0.06886765], agent_action std: [0.36320865 0.22078936 0.08137275]
trigger_obs: (0, 255, 255), trigger_action: [0.0, 1.0, 0.0], agent_action mean: [0.35794288 0.6733819  0.22211167], agent_action std: [0.50004154 0.21259703 0.19139831]
trigger_obs: (255, 165, 0), trigger_action: [0.0, 0.0, 1.0], agent_action mean: [-0.03427332  0.6165536   0.07209117], agent_action std: [0.38711265 0.22180304 0.08890583]
