# Save Activation and Data Used for Getting the Activation

In [1]:
import numpy as np
import torch
import sys

sys.path.append('../')
from sample_batch_data import get_data_info, get_batch
from signal_propagation import get_activation

In [4]:
def save_data_and_activation(
    path_to_load_dataset,
    path_to_save_d4rl_data_sample,
    path_to_save_activation,
    seed=666,
    model_name='gpt2',
    epoch=40,
    env_name_list=['hopper', 'halfcheetah', 'walker2d'],
    ):
    """Save activation and associated data.

    Args:
        path_to_save_d4rl_data_sample (str): Path to save a batch of sampled D4RL data.
        path_to_save_activation (str): Path to save a batch of activations of neural networks.
        seed (int, optional): random seed. Defaults to 666.
        model_name (str, optional): 'gpt2', 'igpt', or 'dt'. Defaults to 'gpt2'.
        epoch (int, optional): 0 or 40. Defaults to 40.
        env_name_list (list, optional): environment name list. Defaults to ['hopper', 'halfcheetah', 'walker2d'].
    """    

    for env_name in env_name_list:
        
        torch.manual_seed(seed)

        dataset_name = 'medium'

        if model_name == 'gpt2':
            pretrained_lm1 = 'gpt2'
        elif model_name == 'clip':
            pretrained_lm1 = 'openai/clip-vit-base-patch32'
        elif model_name == 'igpt':
            pretrained_lm1 = 'openai/imagegpt-small'
        elif model_name == 'dt':
            pretrained_lm1 = False

        variant = {
            'embed_dim': 768,
            'n_layer': 12,
            'n_head': 1,
            'activation_function': 'relu',
            'dropout': 0.2, # 0.1
            'load_checkpoint': False if epoch==0 else f'../checkpoints/{model_name}_medium_{env_name}_666/model_{epoch}.pt',
            'seed': seed,
            'outdir': f"checkpoints/{model_name}_{dataset_name}_{env_name}_{seed}",
            'env': env_name,
            'dataset': dataset_name,
            'model_type': 'dt',
            'K': 20, # 2
            'pct_traj': 1.0,
            'batch_size': 100,  # 64
            'num_eval_episodes': 100,
            'max_iters': 40,
            'num_steps_per_iter': 2500,
            'pretrained_lm': pretrained_lm1,
            'gpt_kmeans': None,
            'kmeans_cache': None,
            'frozen': False,
            'extend_positions': False,
            'share_input_output_proj': True
        }

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        state_dim, act_dim, max_ep_len, scale = get_data_info(variant)
        states, actions, rewards, dones, rtg, timesteps, attention_mask = get_batch(variant, state_dim, act_dim, max_ep_len, scale, device, path_to_load_dataset)

        data = {
            'states': states,
            'actions': actions,
            'rtg': rtg,
            'timesteps': timesteps,
            'attention_mask': attention_mask
        }

        activation = get_activation(variant, state_dim, act_dim, max_ep_len, states, actions, rewards, rtg, timesteps, attention_mask)
        batch_size = variant['batch_size']
        np.save(f'{path_to_save_activation}/activation_{epoch}_{model_name}_{env_name}_{dataset_name}_{seed}_{batch_size}.npy', activation)
        np.save(f'{path_to_save_d4rl_data_sample}/data_{env_name}_{dataset_name}_{seed}_{batch_size}.npy', data)

In [1]:
path_to_load_dataset = 'path_to_load_dataset'  # path to dataset to load from
'''
Following paths are used in .../mine-pytorch/run_mi.py (,run_mi_no_context.py, and run_mi_data.py)
'''
path_to_save_d4rl_data_sample = 'path_to_save_d4rl_data_sample'
path_to_save_activation = 'path_to_save_activation'
save_data_and_activation(
    path_to_load_dataset,
    path_to_save_d4rl_data_sample,
    path_to_save_activation,
    seed=666,
    model_name='gpt2',
    epoch=40,
    env_name_list=['hopper'],
    )