# Prompt Tuning



In [None]:
import torch
colab = 'google.colab' in str(get_ipython())

# You need a T4. A K80 will not work.
if colab:
    !nvidia-smi
    gpu_type = torch.cuda.get_device_name(0)
    if gpu_type != 'Tesla T4':
        raise ValueError("I don't know about this, chief")

In [None]:
# Setup for Colab only
if colab:
    !pip install git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3
    !pip install git+https://github.com/corolla-johnson/mkultra.git#egg=mkultra --log PIP_LOG
    !pip install gdown
    !pip install datasets
    !pip install tqdm

In [None]:
# If on Colab, mount your Google Drive first!
if colab:
    from google.colab import drive
    drive.mount('/content/drive')

In [None]:
# Decide the length of your training blocks in tokens.
# Sizes with headroom for gpt-neo-2.7B-halved:
#  - 700 on a Colab T4 (16GB)
#  - 400 on a Colab K80 (12GB)
#  - 32 on a GTX1080 (8GB)
# If it seems a bit small, don't worry!
# Soft prompts can be moved forward in context for the best effect.
if colab:
    if gpu_type == 'Tesla T4':
        block_size = 700
    else:
        block_size = 400
else:
    block_size = 32

# Name your soft prompt project.
sp_name = 'neuromancer-x-gpt2-optuna'

# Specify the model directory or huggingface name.
if colab:
    model_dir = "/content/drive/MyDrive/models/gpt-neo-2.7B-halved/"
else:
    model_dir = "D:/Git Repos/mkultra/models/gpt-neo-2.7B-halved/" 

# Specify the path to the text file used for training.
if colab:
    text_path = "/content/drive/MyDrive/datasets/neuromancer_reformatted.txt"
else:
    text_path = "datasets/neuromancer_reformatted.txt"

# Specify the project directory bases.
if colab:
    project_dir_root = f"/content/drive/MyDrive/soft_prompts/{sp_name}/"
else:
    project_dir_root = f"soft_prompts/{sp_name}/"

shuffle_seed = 1234567890

eval_percentage = 1.0

In [None]:
from mkultra.tuning import GPT2PromptTuningLM
model = GPT2PromptTuningLM.from_pretrained(model_dir).half().to("cuda")

In [None]:
import optuna

def objective(trial):
    import os
    from mkultra.trainers import SoftPromptTrainer
    from transformers import Adafactor

    project_dir = os.path.join(project_dir_root, f"{sp_name}-trial-{trial.number}")

    optimizer = Adafactor(
        params=[model.get_soft_params()],
        clip_threshold=trial.suggest_float("clip_threshold", 1.0, 2.0),
        decay_rate=(trial.suggest_float("decay_rate", 0.0, 0.8)-0.8),
        weight_decay=trial.suggest_float("weight_decay", 1e-6, 0.5, log=True),
        beta1=trial.suggest_float("beta1", 0.0, 0.99),
        warmup_init=trial.suggest_categorical("warmup_init", [True, False]))

    n_tokens = trial.suggest_int("n_tokens", 20, 100)

    trainer = SoftPromptTrainer(
        model=model,
        optimizer=optimizer,
        project_dir=project_dir,
        text_path=text_path,
        block_size=1024-n_tokens,
        n_tokens=n_tokens,
        shuffle_seed=shuffle_seed)

    for i in range(5):
        try:
            trainer.shuffle_seed = shuffle_seed+i
            trainer.train()

            trainer.shuffle_seed = shuffle_seed
            eval_loss = trainer.evaluate(eval_percentage=eval_percentage)
            trial.report(eval_loss, i)

            if trial.should_prune():
                raise optuna.TrialPruned()

        except ValueError:
            # Return 10.0 for a NaN
            return 10.0

    return eval_loss

In [None]:
import optuna
study_db = "sqlite:///" + os.path.join(project_dir_root, f"trials.db")
study = optuna.create_study(direction="minimize", storage=study_db, load_if_exists=True)

In [None]:
study.optimize(objective, n_trials=20)