### Implementing Google Cloud VertexAI Pipeline

In [1]:
from google_cloud_pipeline_components.preview.llm import rlhf_pipeline
from kfp import compiler

In [2]:
RLHF_PIPELINE_PKG_PATH = "rlhf_pipeline.yaml"

compiler.Compiler().compile(
  pipeline_func=rlhf_pipeline,
  package_path=RLHF_PIPELINE_PKG_PATH
)

**Import Google Cloud Buckets - dataset to pipeline**

In [3]:
parameter_values = {
  "preference_dataset": "gs://vertex-ai/generative-ai/rlhf/text_small/summarize_from_feedback_tfds/comparisons/train/*.jsonl",
  "prompt_dataset": "gs://vertex-ai/generative-ai/rlhf/text_small/reddit_tfds/train/*.jsonl",
  "eval_dataset": "gs://vertex-ai/generative-ai/rlhf/text_small/reddit_tfds/val/*.jsonl"
}

**Set LLM Model Olamar-2-7b**

In [4]:
parameter_values.update({
  "large_model_reference": "llama-2-7b"
})

In [5]:
import math

PREF_DATASET_SIZE=3000
BATCH_SIZE=64
REWARD_STEPS_PER_EPOCH= math.ceil(PREF_DATASET_SIZE / BATCH_SIZE)

REWARD_NUM_EPOCHS=30
reward_model_train_steps = REWARD_STEPS_PER_EPOCH * REWARD_NUM_EPOCHS
reward_model_train_steps

1410

In [6]:
PROMPT_DATASET_SIZE=2000
BATCH_SIZE = 64
RL_STEPS_PER_EPOCH = math.ceil(PROMPT_DATASET_SIZE / BATCH_SIZE)

RL_NUM_EPOCHS = 10
reinforcement_learning_train_steps = RL_STEPS_PER_EPOCH * RL_NUM_EPOCHS
reinforcement_learning_train_steps

320

**Set training steps**

In [7]:
parameter_values.update({
  "reward_model_train_steps": reward_model_train_steps,
  "reinforcement_learning_train_steps": reinforcement_learning_train_steps, # results from the calculations above
})

**Set default coefficient**

In [8]:
parameter_values.update({
  "reward_model_learning_rate_multiplier": 1.0,
  "reinforcement_learning_rate_multiplier": 1.0,
  "kl_coeff": 0.1, # increased to reduce reward hacking
  "instruction": "Summarize in less than 50 words"
})

In [9]:
parameter_values

{'preference_dataset': 'gs://vertex-ai/generative-ai/rlhf/text_small/summarize_from_feedback_tfds/comparisons/train/*.jsonl',
 'prompt_dataset': 'gs://vertex-ai/generative-ai/rlhf/text_small/reddit_tfds/train/*.jsonl',
 'eval_dataset': 'gs://vertex-ai/generative-ai/rlhf/text_small/reddit_tfds/val/*.jsonl',
 'large_model_reference': 'llama-2-7b',
 'reward_model_train_steps': 1410,
 'reinforcement_learning_train_steps': 320,
 'reward_model_learning_rate_multiplier': 1.0,
 'reinforcement_learning_rate_multiplier': 1.0,
 'kl_coeff': 0.1,
 'instruction': 'Summarize in less than 50 words'}

**Hook up with Google VertexAI**

In [10]:
from src.utils import authenticate
credentials, PROJECT_ID, STAGING_BUCKET = authenticate()

REGION = "europe-west4"

Authenticated with service account: rlhf-56@wide-memento-483018-g8.iam.gserviceaccount.com
Project ID: wide-memento-483018-g8
Staging bucket: gs://rlhf_buck


In [11]:
import google.cloud.aiplatform as aiplatfrom
aiplatfrom.init(project=PROJECT_ID, location=REGION, credentials=credentials)

In [12]:
RLHF_PIPELINE_PKG_PATH

'rlhf_pipeline.yaml'

**Ready to publish pipline to VertexAI Job**

In [13]:
job = aiplatfrom.PipelineJob(
  display_name='LLM-RLHF-TUNING',
  pipeline_root=STAGING_BUCKET,
  template_path=RLHF_PIPELINE_PKG_PATH,
  parameter_values=parameter_values,
)

In [14]:
job.submit(
  service_account="rlhf-56@wide-memento-483018-g8.iam.gserviceaccount.com"
)

Creating PipelineJob
PipelineJob created. Resource name: projects/1070904449492/locations/europe-west4/pipelineJobs/rlhf-train-template-20260101215047
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/1070904449492/locations/europe-west4/pipelineJobs/rlhf-train-template-20260101215047')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/europe-west4/pipelines/runs/rlhf-train-template-20260101215047?project=1070904449492
