In [None]:
import os; os.chdir('../')  # set working directory to root dir
print(f"Current working directory: {os.getcwd()}")

import subprocess
import pandas as pd
from IPython.display import display, HTML
from QNA.utils_qna import get_resistance_group_data, get_activations_pre_treatment

Current working directory: /workspace/Machine_Psychopathology


In [2]:
depress_keys = ['depressed mood', 'low self-esteem', 'negativity bias', 'guilt', 'risk-aversion', 'self-destruction']
manic_keys = ['manic mood', 'grandiosity', 'positivity bias', 'lack of remorse', 'risk-seeking', 'hostility']
symp_keys = depress_keys + manic_keys

model_ids = [
    "google/gemma-3-270m-it", 
    "Qwen/Qwen3-0.6B", 
    "meta-llama/Llama-3.2-1B-Instruct", 
    "Qwen/Qwen3-1.7B", 
    "meta-llama/Llama-3.2-3B-Instruct", 
    "google/gemma-3-4b-it",
    "meta-llama/Llama-3.1-8B-Instruct",
    "google/gemma-3-12b-it", 
    "Qwen/Qwen3-14B", 
    "google/gemma-3-27b-it",
    "Qwen/Qwen3-32B",
    "meta-llama/Llama-3.3-70B-Instruct",
]

context_generating_llms = [
    "google/gemma-3-27b-it", 
    "Qwen/Qwen3-32B", 
    "meta-llama/Llama-3.3-70B-Instruct",
]

## Unit Intervention Exp. (Fig. 2A)

In [4]:
"""
EVLUATE THE EFFECT OF UNIT INTERVENTION
"""
LLM_idx = 10
device = '0'
intervention_idx = 0
num_samples = 5

print(f"Using model: [{model_ids[LLM_idx]}] on cuda device [{device}].")
print(f"Running experiments with intervention on {[symp_keys[intervention_idx]]} for [{num_samples}] samples.")

%run QNA/main_unit_itv_eval.py --llm {model_ids[LLM_idx]} --device {device} --num-samples {num_samples} --itv-t {intervention_idx}

Using model: [Qwen/Qwen3-32B] on cuda device [0].
Running experiments with intervention on ['depressed mood'] for [5] samples.


In [None]:
"""
EVLUATE THE EFFECT OF UNIT INTERVENTION (OPTION: PARALLELIZED OVER MULTIPLE DEVICES)
"""

LLM_idx = 10
num_samples = 5

commands = [
    # run exps. with depression-related unit intervention
    f"python QNA/main_unit_itv_eval.py --llm {model_ids[LLM_idx]} --device 0 --num-samples {num_samples} --itv-t 0",
    f"python QNA/main_unit_itv_eval.py --llm {model_ids[LLM_idx]} --device 1 --num-samples {num_samples} --itv-t 1",
    f"python QNA/main_unit_itv_eval.py --llm {model_ids[LLM_idx]} --device 2 --num-samples {num_samples} --itv-t 2",
    f"python QNA/main_unit_itv_eval.py --llm {model_ids[LLM_idx]} --device 3 --num-samples {num_samples} --itv-t 3",
    f"python QNA/main_unit_itv_eval.py --llm {model_ids[LLM_idx]} --device 4 --num-samples {num_samples} --itv-t 4",
    f"python QNA/main_unit_itv_eval.py --llm {model_ids[LLM_idx]} --device 5 --num-samples {num_samples} --itv-t 5",
]

procs = []
for cmd in commands:
    print(f"Starting: {cmd}")
    procs.append(subprocess.Popen(cmd, shell=True))    # shell=True allows you to pass the full string like you did in the notebook

# Optional: Wait for all of them to finish before moving to the next cell
for p in procs:
    p.wait()

print("All parallel jobs finished.")

## Q&A Exp. (Fig. 2B)

In [5]:
"""
GENERATE UNIT ACTIVATION DYNAMICS (ALSO THE EXPERIMENTAL GROUP'S PRE-TREATMENT CONTEXT DATA)
"""

LLM_idx = 10
device = '0'
intervention_idx = 0
num_samples = 5

print(f"Using model: [{model_ids[LLM_idx]}] on cuda device [{device}].")
print(f"Running experiments with intervention on {[symp_keys[intervention_idx]]} for [{num_samples}] samples.")

%run QNA/main_unit_dynamics_eval.py --llm {model_ids[LLM_idx]} --device {device} --num-samples {num_samples} --itv-t {intervention_idx}

Using model: [Qwen/Qwen3-32B] on cuda device [0].
Running experiments with intervention on ['depressed mood'] for [5] samples.


In [None]:
"""
GENERATE UNIT ACTIVATION DYNAMICS (ALSO THE EXPERIMENTAL GROUP'S PRE-TREATMENT CONTEXT DATA; OPTION: PARALLELIZED OVER MULTIPLE DEVICES)
"""
LLM_idx = 10
num_samples = 5

commands = [
    # run exps. with depression-related unit intervention
    f"python QNA/main_unit_dynamics_eval.py --llm {model_ids[LLM_idx]} --device 0 --num-samples {num_samples} --itv-t 0",
    f"python QNA/main_unit_dynamics_eval.py --llm {model_ids[LLM_idx]} --device 1 --num-samples {num_samples} --itv-t 1",
    f"python QNA/main_unit_dynamics_eval.py --llm {model_ids[LLM_idx]} --device 2 --num-samples {num_samples} --itv-t 2",
    f"python QNA/main_unit_dynamics_eval.py --llm {model_ids[LLM_idx]} --device 3 --num-samples {num_samples} --itv-t 3",
    f"python QNA/main_unit_dynamics_eval.py --llm {model_ids[LLM_idx]} --device 4 --num-samples {num_samples} --itv-t 4",
    f"python QNA/main_unit_dynamics_eval.py --llm {model_ids[LLM_idx]} --device 5 --num-samples {num_samples} --itv-t 5",
]

procs = []
for cmd in commands:
    print(f"Starting: {cmd}")
    procs.append(subprocess.Popen(cmd, shell=True))    # shell=True allows you to pass the full string like you did in the notebook

# Optional: Wait for all of them to finish before moving to the next cell
for p in procs:
    p.wait()

print("All parallel jobs finished.")

## Resistance Exp. (Fig. 3D)

In [None]:
"""
GENERATE CONTROL GROUP'S PRE-TREATMENT CONTEXT DATA
"""
context_LLM_idx = 0
device = '0'
intervention_idx = 0
num_samples = 5

print(f"Using model: [{context_generating_llms[context_LLM_idx]}] on cuda device [{device}].")
print(f"Running experiments with intervention on {[symp_keys[intervention_idx]]} for [{num_samples}] samples.")

%run QNA/main_resistance_ctrl_group_gen.py --llm {context_generating_llms[context_LLM_idx]} --device {device} --num-samples {num_samples} --itv-t {intervention_idx}

In [None]:
"""
GENERATE CONTROL GROUP'S PRE-TREATMENT CONTEXT DATA (OPTION: PARALLELIZED OVER MULTIPLE DEVICES)
"""
context_LLM_idx = 0
num_samples = 5

commands = [
    # run exps. with depression-related unit intervention
    f"python QNA/main_resistance_ctrl_group_gen.py --llm {context_generating_llms[context_LLM_idx]} --device 0 --num-samples {num_samples} --itv-t 0",
    f"python QNA/main_resistance_ctrl_group_gen.py --llm {context_generating_llms[context_LLM_idx]} --device 1 --num-samples {num_samples} --itv-t 1",
    f"python QNA/main_resistance_ctrl_group_gen.py --llm {context_generating_llms[context_LLM_idx]} --device 2 --num-samples {num_samples} --itv-t 2",
    f"python QNA/main_resistance_ctrl_group_gen.py --llm {context_generating_llms[context_LLM_idx]} --device 3 --num-samples {num_samples} --itv-t 3",
    f"python QNA/main_resistance_ctrl_group_gen.py --llm {context_generating_llms[context_LLM_idx]} --device 4 --num-samples {num_samples} --itv-t 4",
    f"python QNA/main_resistance_ctrl_group_gen.py --llm {context_generating_llms[context_LLM_idx]} --device 5 --num-samples {num_samples} --itv-t 5",
]

procs = []
for cmd in commands:
    print(f"Starting: {cmd}")
    procs.append(subprocess.Popen(cmd, shell=True))    # shell=True allows you to pass the full string like you did in the notebook

# Optional: Wait for all of them to finish before moving to the next cell
for p in procs:
    p.wait()

print("All parallel jobs finished.")

In [None]:
"""
GET PRE-TREATMENT ACTIVATIONS FOR THE EXPERIMENTAL AND CONTROL GROUPS
"""

out_dfs_exp, out_dfs_ctrl = get_resistance_group_data(
    context_generating_llms=context_generating_llms, 
    exp_group_ids=[1,2,3,4,5], 
    exp_group_steps=[25,50],
    ctrl_group_ids=[1,2,3,4,5],
    ctrl_group_steps=[1],
)
out_dfs = pd.concat([out_dfs_exp, out_dfs_ctrl], axis=0).reset_index(drop=True)
pivot = pd.pivot_table(out_dfs, index='itv_thought', columns='group', values='sample_id', aggfunc='nunique', fill_value=0)
print("\nNumber of unique samples in each group and intervened unit:")
display(HTML(pivot.to_html()))

get_activations_pre_treatment(
    out_dfs,
    model_ids=[model_ids[LLM_idx]], # the model whose activations to extract
    device=device,
    batch_size=128,
)

In [None]:
"""
GET POST-TREATMENT DATA FOR BOTH CONTROL AND EXPERIMENTAL GROUPS
"""
from utils import Config, Data_Manager
cfg = Config(model_id=model_ids[LLM_idx])
dm = Data_Manager(cfg)
out_dfs = dm.load_output(data_type='robust')
num_samples = out_dfs['sample_id'].nunique()

%run QNA/main_resistance.py --llm {model_ids[LLM_idx]} --device {device} --num-samples {num_samples} --itv-t {intervention_idx}

In [None]:
"""
GET POST-TREATMENT DATA FOR BOTH CONTROL AND EXPERIMENTAL GROUPS (OPTION: PARALLELIZED OVER MULTIPLE DEVICES)
"""
from utils import Config, Data_Manager
cfg = Config(model_id=model_ids[LLM_idx])
dm = Data_Manager(cfg)
out_dfs = dm.load_output(data_type='robust')

num_samples = out_dfs['sample_id'].nunique()
LLM_idx = 10

commands = [
    # run exps. with depression-related unit intervention
    f"python QNA/main_resistance.py --llm {model_ids[LLM_idx]} --device 0 --num-samples {num_samples} --itv-t 0",
    f"python QNA/main_resistance.py --llm {model_ids[LLM_idx]} --device 1 --num-samples {num_samples} --itv-t 1",
    f"python QNA/main_resistance.py --llm {model_ids[LLM_idx]} --device 2 --num-samples {num_samples} --itv-t 2",
    f"python QNA/main_resistance.py --llm {model_ids[LLM_idx]} --device 3 --num-samples {num_samples} --itv-t 3",
    f"python QNA/main_resistance.py --llm {model_ids[LLM_idx]} --device 4 --num-samples {num_samples} --itv-t 4",
    f"python QNA/main_resistance.py --llm {model_ids[LLM_idx]} --device 5 --num-samples {num_samples} --itv-t 5",
]

procs = []
for cmd in commands:
    print(f"Starting: {cmd}")
    procs.append(subprocess.Popen(cmd, shell=True))    # shell=True allows you to pass the full string like you did in the notebook

# Optional: Wait for all of them to finish before moving to the next cell
for p in procs:
    p.wait()

print("All parallel jobs finished.")