In [1]:
import json
import multiprocessing as mp
import time
from datetime import datetime
from pathlib import Path
from typing import Optional, Tuple

import yaml
from tqdm import tqdm

from influence_benchmark.agent.agent import Agent
from influence_benchmark.config.experiment_config import BaseExperimentConfig
from influence_benchmark.data_root import PROJECT_DATA
from influence_benchmark.environment_vectorized.environment_queue import TrajectoryQueue
from influence_benchmark.environment_vectorized.environment_vectorized import VectorizedEnvironment
from influence_benchmark.root import ENV_CONFIGS_DIR
from influence_benchmark.utils.utils import find_freest_gpus, load_yaml, model_name_to_backend_class, set_all_seeds

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from influence_benchmark.RL.trajectory_generator import TrajectoryGenerator

In [3]:
def kickoff_trajectory_generation(config, lora_path, run_name):
    if config.seed is not None:
        print(f"Setting all seeds to: {config.seed}")
        set_all_seeds(config.seed)

    mp.set_start_method("spawn", force=True)

    print(f"Total of {config.num_envs_per_device * len(config.devices)} parallel envs")

    print(config.env_args)
    generator = TrajectoryGenerator(
        env_args=config.env_args,
        agent_model_name=config.agent_model_name,
        env_model_name=config.env_model_name,
        lora_path=lora_path,
        n_trajs_per_initial_state=config.n_trajs_to_sample_per_subenv,
        run_name=run_name,
        devices=config.devices,
        pm_length_penalty=config.pm_length_penalty,
        seed=config.seed,
        max_tokens_per_minute=config.max_tokens_per_minute,
        max_requests_per_minute=config.max_requests_per_minute,
    )

    traj_iter_dir = Path(generator.traj_dir) / "iteration_0"
    generator._multiprocess_generate_trajectories(
        traj_iter_dir, iter_step=0, n_trajs_per_initial_state=config.n_trajs_to_sample_per_subenv
    )

    print(f"Trajectory generation complete. Results saved in: {traj_iter_dir}")


## Generate trajectories

In [4]:
CONFIG_PATH = "mixed_therapist_traj_gen.yaml"
lora_path = "/nas/ucb/adhyyan/Influence-benchmark/data/models/weak-therapist1t-09-11_22-36-17/10/checkpoint-96"
run_name = "mixed_therapist_traj_gen"

In [5]:
config = BaseExperimentConfig.load(CONFIG_PATH, gpu_subset=find_freest_gpus(2))
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

GPUs [5, 1] are the 2 most free
GPU indices to run on: [5, 1]
Creating config from file mixed_therapist_traj_gen.yaml
Creating Expert Iteration config
Using Single_GPU Accelerate config
Accelerate training on GPUs: [5]
Set gradient_accumulation_steps to 16


In [6]:
kickoff_trajectory_generation(config, lora_path, run_name)

Total of 50 parallel envs
{'env_class': 'therapist', 'envs': None, 'max_turns': 1, 'print': False, 'num_envs_per_device': 25, 'n_subenvs_to_sample_per_env': 2, 'n_trajs_to_sample_per_subenv': 2, 'subenv_choice_scheme': 'random', 'env_fractions': {'weak': 0.5, 'normal': 0.5}}
Loading env configs: ['normal_eating', 'normal_over-spending', 'normal_anger', 'normal_social-isolation', 'weak_eating', 'normal_implusive-decision', 'weak_theft', 'weak_smoking', 'weak_neglecting-responsibility', 'weak_gambling', 'weak_over-spending', 'normal_neglecting-responsibility', 'normal_substances', 'normal_violence', 'weak_implusive-decision', 'normal_drinking', 'weak_substances', 'weak_social-isolation', 'normal_theft', 'weak_drinking', 'weak_violence', 'normal_gambling', 'weak_anger', 'normal_smoking']
# of subenvs to choose by environment for each training iteration:
{'normal_eating': 2, 'normal_over-spending': 2, 'normal_anger': 2, 'normal_social-isolation': 2, 'weak_eating': 2, 'normal_implusive-deci

Completed environments for iteration 0:   0%|          | 0/96 [00:00<?, ?it/s]

Entered generate_trajectories on device cuda:5
Loaded agent config on device cuda:5


Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 12.64it/s]


Entered generate_trajectories on device cuda:1
Loaded agent config on device cuda:1


Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 10.58it/s]


Adding adapter.
Adding adapter.
Created environment and agent on device cuda:5
Created environment and agent on device cuda:1


Completed environments for iteration 0:  52%|█████▏    | 50/96 [01:40<01:32,  2.00s/it]

Generated trajectories on device cuda:1
Saved trajectories to /nas/ucb/adhyyan/Influence-benchmark/influence_benchmark/../data/trajectories/mixed_therapist_traj_gen-09-12_12-04/iteration_0/1.jsonl on device cuda:1


Completed environments for iteration 0:  74%|███████▍  | 71/96 [02:07<00:44,  1.79s/it]

Generated trajectories on device cuda:5
Saved trajectories to /nas/ucb/adhyyan/Influence-benchmark/influence_benchmark/../data/trajectories/mixed_therapist_traj_gen-09-12_12-04/iteration_0/5.jsonl on device cuda:5


Completed environments for iteration 0: 100%|██████████| 96/96 [02:18<00:00,  1.44s/it]

Trajectory generation complete. Results saved in: /nas/ucb/adhyyan/Influence-benchmark/influence_benchmark/../data/trajectories/mixed_therapist_traj_gen-09-12_12-04/iteration_0



