In [55]:
import subprocess, re
from itertools import product
import os
repo_root =    "/mnt/batch/tasks/shared/LS_root/mounts/clusters/test5/code/Users/filip.trhlik/bias-bench" #change for your purpuses

models = [
    "bert-base-uncased", "babylm/ltgbert-100m-2024", "ltg/ltg-bert-babylm"
]
bias_types = ["race", "religion"]

batch_size     = 32 
persistent_dir = repo_root 

HF_CLASS = {
    r".*bert.*"   : "BertModel",
}

env = os.environ.copy()
env["PYTHONPATH"] = f"{repo_root}:{env.get('PYTHONPATH', '')}"

def get_hf_class(ckpt: str) -> str:
    for pat, cls in HF_CLASS.items():
        if re.match(pat, ckpt, flags=re.I):
            return cls
    raise ValueError(f"No --model mapping defined for {ckpt}")

# --------  launch SentenceDebias sub-space jobs  --------
for model_name, bias in product(models, bias_types):
    cmd = [
        "python", "experiments/sentence_debias_subspace.py",
        "--model",             get_hf_class(model_name),
        "--model_name_or_path", model_name,
        "--bias_type",          bias,
        "--persistent_dir",     persistent_dir,
    ]
    print("\n$ " + " ".join(cmd))
    subprocess.run(cmd, check=True, env=env)



$ python experiments/sentence_debias_subspace.py --model BertModel --model_name_or_path bert-base-uncased --bias_type race --persistent_dir /mnt/batch/tasks/shared/LS_root/mounts/clusters/test5/code/Users/filip.trhlik/bias-bench


2025-07-17 00:07:16.312222: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752710836.332838  137110 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752710836.339132  137110 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1752710836.354812  137110 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1752710836.354838  137110 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1752710836.354840  137110 computation_placer.cc:177] computation placer alr

Computing bias subspace:
 - persistent_dir: /mnt/batch/tasks/shared/LS_root/mounts/clusters/test5/code/Users/filip.trhlik/bias-bench
 - model_name_or_path: bert-base-uncased
 - model: BertModel
 - bias_type: race
 - batch_size: 32


Encoding race examples: 100%|██████████| 2621/2621 [25:14<00:00,  1.73it/s]                      


Saving computed PCA components to: /mnt/batch/tasks/shared/LS_root/mounts/clusters/test5/code/Users/filip.trhlik/bias-bench/results/subspace/subspace_m-BertModel_c-bert-base-uncased_t-race.pt.

$ python experiments/sentence_debias_subspace.py --model BertModel --model_name_or_path bert-base-uncased --bias_type religion --persistent_dir /mnt/batch/tasks/shared/LS_root/mounts/clusters/test5/code/Users/filip.trhlik/bias-bench


2025-07-17 00:33:52.442725: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752712432.464223  141656 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752712432.470599  141656 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1752712432.486531  141656 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1752712432.486575  141656 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1752712432.486577  141656 computation_placer.cc:177] computation placer alr

Computing bias subspace:
 - persistent_dir: /mnt/batch/tasks/shared/LS_root/mounts/clusters/test5/code/Users/filip.trhlik/bias-bench
 - model_name_or_path: bert-base-uncased
 - model: BertModel
 - bias_type: religion
 - batch_size: 32


Encoding religion examples: 100%|██████████| 1235/1235 [11:52<00:00,  1.73it/s]                  


Saving computed PCA components to: /mnt/batch/tasks/shared/LS_root/mounts/clusters/test5/code/Users/filip.trhlik/bias-bench/results/subspace/subspace_m-BertModel_c-bert-base-uncased_t-religion.pt.


In [None]:
#!/usr/bin/env python

import sys, pathlib, textwrap, importlib.util, torch, tempfile, shutil
from transformers import AutoTokenizer, AutoConfig
from huggingface_hub import HfApi, create_repo, upload_folder

HF_TOKEN      = "hf_XXXXXXX"
HF_REPO       = "FilipT/bert-base-uncased-sent-debias-religion"
SUBSPACE_PATH = "/mnt/batch/tasks/shared/LS_root/mounts/clusters/test5/code/Users/filip.trhlik/bias-bench/results/subspace/subspace_m-BertModel_c-bert-base-uncased_t-race.pt"
REPO_ROOT     = "/mnt/batch/tasks/shared/LS_root/mounts/clusters/test5/code/Users/filip.trhlik/bias-bench"

MODEL_NAME    = "bert-base-uncased"
LOCAL_DIR     = pathlib.Path("bert-base-uncased-sent-debias-religion")
PROJ_FILENAME = "bias_direction_religion.pt"
WRAPPER_FILE  = "modeling_sentence_debias.py"

MODEL_NAME    = "bert-base-uncased"
PROJ_FILENAME = "bias_direction_religion.pt"
WRAPPER_FILE  = "modeling_sentence_debias.py"

sys.path.insert(0, REPO_ROOT)
from bias_bench.model.models import SentenceDebiasBertForMaskedLM

bias_vec = torch.load(SUBSPACE_PATH, map_location="cpu")
model    = SentenceDebiasBertForMaskedLM(MODEL_NAME, bias_direction=bias_vec)
tok      = AutoTokenizer.from_pretrained(MODEL_NAME)

WRAPPER_CODE = f"""
import torch, transformers, os
from functools import partial
from huggingface_hub import hf_hub_download

def _debias_hook(b_dir, module, inputs, output):
    x = output.last_hidden_state if hasattr(output, "last_hidden_state") else output[0]
    b = b_dir.to(x.device)
    proj = torch.matmul(x, b) / torch.dot(b, b)
    debiased = x - proj.unsqueeze(-1) * b
    if hasattr(output, "last_hidden_state"):
        output.last_hidden_state = debiased
        return output
    return (debiased,) + output[1:]

class SentenceDebiasBertForMaskedLM(transformers.BertForMaskedLM):
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
        model = super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
        bias_path = kwargs.pop("bias_direction_path", None)
        if bias_path is None:
            bias_path = hf_hub_download(
                repo_id=pretrained_model_name_or_path,
                filename="{PROJ_FILENAME}",
                revision=kwargs.get("revision", None),
            )
        bias_vec = torch.load(bias_path, map_location="cpu")
        model.bert.register_forward_hook(partial(_debias_hook, bias_vec))
        model.register_buffer("bias_direction", bias_vec)
        return model
"""

tmp = pathlib.Path(tempfile.mkdtemp(prefix="sent_debias_"))
(tmp / WRAPPER_FILE).write_text(WRAPPER_CODE, encoding="utf-8")
model.save_pretrained(tmp)
tok.save_pretrained(tmp)
torch.save(bias_vec, tmp / PROJ_FILENAME)

cfg = AutoConfig.from_pretrained(MODEL_NAME)
cfg.architectures = ["SentenceDebiasBertForMaskedLM"]
cfg.auto_map = {
    "AutoModelForMaskedLM": f"{WRAPPER_FILE[:-3]}.SentenceDebiasBertForMaskedLM"
}
cfg.bias_type = "religion"
cfg.sent_debias_subspace = PROJ_FILENAME
cfg.save_pretrained(tmp)

api = HfApi(token=HF_TOKEN)
create_repo(repo_id=HF_REPO, token=HF_TOKEN, exist_ok=True, repo_type="model")

upload_folder(
    folder_path=str(tmp),
    repo_id=HF_REPO,
    token=HF_TOKEN,
    commit_message="Add religion-debiased bert-base-uncased (Sentence-Debias, standalone)",
)

shutil.rmtree(tmp)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Uploading…


bias_direction_religion.pt:   0%|          | 0.00/4.83k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

✅  https://huggingface.co/FilipT/bert-base-uncased-sent-debias-religion is ready.
   Load with:
   from transformers import AutoModelForMaskedLM
   m = AutoModelForMaskedLM.from_pretrained('FilipT/bert-base-uncased-sent-debias-religion', trust_remote_code=True)


In [None]:
#!/usr/bin/env python

import sys, pathlib, textwrap, importlib.util, torch, tempfile, shutil
from transformers import AutoTokenizer, AutoConfig
from huggingface_hub import HfApi, create_repo, upload_folder, hf_hub_download

HF_TOKEN      = "hf_XXXXXXXX"
HF_REPO       = "FilipT/ltg-bert-babylm-sd-religion"
SUBSPACE_PATH = "/mnt/batch/tasks/shared/LS_root/mounts/clusters/test5/code/Users/filip.trhlik/bias-bench/results/subspace/subspace_m-LtgBertModel_c-ltg-ltg-bert-babylm_t-race.pt"
REPO_ROOT     = "/mnt/batch/tasks/shared/LS_root/mounts/clusters/test5/code/Users/filip.trhlik/bias-bench"

MODEL_NAME    = "ltg/ltg-bert-babylm"
LOCAL_DIR     = pathlib.Path("ltg-bert-babylm-sd-religion")
PROJ_FILENAME = "bias_direction_religion.pt"
WRAPPER_FILE  = "modeling_sentence_debias.py"

PROJ_FILENAME = "bias_direction_religion.pt"
WRAPPER_FILE  = "modeling_sentence_debias.py"

sys.path.insert(0, REPO_ROOT)
from bias_bench.model.models import SentenceDebiasLtgBertForMaskedLM

bias_vec = torch.load(SUBSPACE_PATH, map_location="cpu")
model    = SentenceDebiasLtgBertForMaskedLM(MODEL_NAME, bias_direction=bias_vec)
tok      = AutoTokenizer.from_pretrained(MODEL_NAME)

WRAPPER_CODE = f"""
import torch, transformers
from functools import partial
from huggingface_hub import hf_hub_download
from .modeling_ltgbert import LtgBertForMaskedLM

def _debias_hook(b_dir, module, inputs, output):
    if hasattr(output, "last_hidden_state"):
        x = output.last_hidden_state
        container, key = output, "last_hidden_state"
    else:
        seq = output[0]
        if isinstance(seq, list):
            x = seq[-1]
            container, key = seq, -1
        else:
            x = seq
            container, key = output, 0
    b = b_dir.to(x.device)
    proj = torch.matmul(x, b) / torch.dot(b, b)
    debiased = x - proj.unsqueeze(-1) * b
    container[key] = debiased
    return output

class SentenceDebiasLtgBertForMaskedLM(LtgBertForMaskedLM):
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
        kwargs.setdefault("trust_remote_code", True)
        model = super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
        bias_path = kwargs.pop("bias_direction_path", None)
        if bias_path is None:
            bias_path = hf_hub_download(
                repo_id=pretrained_model_name_or_path,
                filename="{PROJ_FILENAME}",
                revision=kwargs.get("revision", None),
            )
        bias_vec = torch.load(bias_path, map_location="cpu")
        block = model.transformer if hasattr(model, "transformer") else model
        block.register_forward_hook(partial(_debias_hook, bias_vec))
        model.register_buffer("bias_direction", bias_vec)
        return model
"""

tmp = pathlib.Path(tempfile.mkdtemp(prefix="sent_debias_"))
(tmp / WRAPPER_FILE).write_text(WRAPPER_CODE, encoding="utf-8")

for fname in ("configuration_ltgbert.py", "modeling_ltgbert.py"):
    src = hf_hub_download(MODEL_NAME, fname)
    shutil.copy(src, tmp / fname)

model.save_pretrained(tmp, safe_serialization=False)
tok.save_pretrained(tmp)
torch.save(bias_vec, tmp / PROJ_FILENAME)

cfg = AutoConfig.from_pretrained(MODEL_NAME, trust_remote_code=True)
cfg.architectures = ["SentenceDebiasLtgBertForMaskedLM"]
cfg.auto_map = {
    "AutoConfig"          : "configuration_ltgbert.LtgBertConfig",
    "AutoModelForMaskedLM": f"{WRAPPER_FILE[:-3]}.SentenceDebiasLtgBertForMaskedLM",
}
cfg.bias_type = "religion"
cfg.sent_debias_subspace = PROJ_FILENAME
cfg.save_pretrained(tmp)

api = HfApi(token=HF_TOKEN)
create_repo(repo_id=HF_REPO, token=HF_TOKEN, exist_ok=True, repo_type="model")
upload_folder(folder_path=str(tmp), repo_id=HF_REPO, token=HF_TOKEN,
              commit_message="Add religion-debiased LTG-BERT BabyLM (Sentence-Debias)")
shutil.rmtree(tmp)

The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.


pytorch_model.bin:   0%|          | 0.00/393M [00:00<?, ?B/s]

bias_direction_religion.pt:   0%|          | 0.00/4.83k [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

✅  https://huggingface.co/FilipT/ltg-bert-babylm-sd-religion is ready.
   from transformers import AutoModelForMaskedLM
   m = AutoModelForMaskedLM.from_pretrained('FilipT/ltg-bert-babylm-sd-religion', trust_remote_code=True)


In [None]:
#!/usr/bin/env python

import sys, pathlib, textwrap, importlib.util, torch, tempfile, shutil
from transformers import AutoTokenizer, AutoConfig
from huggingface_hub import HfApi, create_repo, upload_folder, hf_hub_download

HF_TOKEN      = "hf_XXXX"
HF_REPO       = "FilipT/ltg-baseline-babylm-sd-gender"
SUBSPACE_PATH = "/mnt/batch/tasks/shared/LS_root/mounts/clusters/test5/code/Users/filip.trhlik/bias-bench/results/subspace/subspace_m-LtgBertModel_c-babylm-ltgbert-100m-2024_t-race.pt"
REPO_ROOT     = "/mnt/batch/tasks/shared/LS_root/mounts/clusters/test5/code/Users/filip.trhlik/bias-bench"

MODEL_NAME    = "babylm/ltgbert-100m-2024"
LOCAL_DIR     = pathlib.Path("ltg-bert-babylm-sd-gender")
PROJ_FILENAME = "bias_direction_gender.pt"
WRAPPER_FILE  = "modeling_sentence_debias.py"

PROJ_FILENAME = "bias_direction_gender.pt"
WRAPPER_FILE  = "modeling_sentence_debias.py"

sys.path.insert(0, REPO_ROOT)
from bias_bench.model.models import SentenceDebiasLtgBertForMaskedLM

bias_vec = torch.load(SUBSPACE_PATH, map_location="cpu")
model    = SentenceDebiasLtgBertForMaskedLM(MODEL_NAME, bias_direction=bias_vec)
tok      = AutoTokenizer.from_pretrained(MODEL_NAME)

WRAPPER_CODE = f"""
import torch, transformers
from functools import partial
from huggingface_hub import hf_hub_download
from .modeling_ltgbert import LtgBertForMaskedLM

def _debias_hook(b_dir, module, inputs, output):
    if hasattr(output, "last_hidden_state"):
        x = output.last_hidden_state
        container, key = output, "last_hidden_state"
    else:
        seq = output[0]
        if isinstance(seq, list):
            x = seq[-1]
            container, key = seq, -1
        else:
            x = seq
            container, key = output, 0
    b = b_dir.to(x.device)
    proj = torch.matmul(x, b) / torch.dot(b, b)
    debiased = x - proj.unsqueeze(-1) * b
    container[key] = debiased
    return output

class SentenceDebiasLtgBertForMaskedLM(LtgBertForMaskedLM):
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
        kwargs.setdefault("trust_remote_code", True)
        model = super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
        bias_path = kwargs.pop("bias_direction_path", None)
        if bias_path is None:
            bias_path = hf_hub_download(
                repo_id=pretrained_model_name_or_path,
                filename="{PROJ_FILENAME}",
                revision=kwargs.get("revision", None),
            )
        bias_vec = torch.load(bias_path, map_location="cpu")
        block = model.transformer if hasattr(model, "transformer") else model
        block.register_forward_hook(partial(_debias_hook, bias_vec))
        model.register_buffer("bias_direction", bias_vec)
        return model
"""

tmp = pathlib.Path(tempfile.mkdtemp(prefix="sent_debias_"))
(tmp / WRAPPER_FILE).write_text(WRAPPER_CODE, encoding="utf-8")

for fname in ("configuration_ltgbert.py", "modeling_ltgbert.py"):
    src = hf_hub_download(MODEL_NAME, fname)
    shutil.copy(src, tmp / fname)

model.save_pretrained(tmp, safe_serialization=False)
tok.save_pretrained(tmp)
torch.save(bias_vec, tmp / PROJ_FILENAME)

cfg = AutoConfig.from_pretrained(MODEL_NAME, trust_remote_code=True)
cfg.architectures = ["SentenceDebiasLtgBertForMaskedLM"]
cfg.auto_map = {
    "AutoConfig"          : "configuration_ltgbert.LtgBertConfig",
    "AutoModelForMaskedLM": f"{WRAPPER_FILE[:-3]}.SentenceDebiasLtgBertForMaskedLM",
}
cfg.bias_type = "gender"
cfg.sent_debias_subspace = PROJ_FILENAME
cfg.save_pretrained(tmp)

api = HfApi(token=HF_TOKEN)
create_repo(repo_id=HF_REPO, token=HF_TOKEN, exist_ok=True, repo_type="model")
upload_folder(folder_path=str(tmp), repo_id=HF_REPO, token=HF_TOKEN,
              commit_message="Add gender-debiased LTG-BERT BabyLM (Sentence-Debias)")
shutil.rmtree(tmp)

print(f"https://huggingface.co/{HF_REPO} is ready.\n"
      "from transformers import AutoModelForMaskedLM\n"
      f"m = AutoModelForMaskedLM.from_pretrained('{HF_REPO}', trust_remote_code=True)")
