# Hello World pipeline

In [5]:
from kfp import dsl

@dsl.component(base_image="python:3.11")
def hello_world(name:str) -> str:
    return f"hello world\nnice to meet you {name}"

@dsl.component(base_image="python:3.11")
def print_str(hello_str:str):
    print(hello_str)

In [6]:
from kfp import dsl

@dsl.pipeline(name="hello-world")
def hello_world_pipeline(name:str):
    hello_comp = hello_world(name=name)
    print_comp = print_str(hello_str=hello_comp.output)

In [7]:
from kfp.compiler import Compiler

Compiler().compile(hello_world_pipeline, package_path="hello-world.yaml")

# Dataset processing pipeline

## install kfp-kubernetes

In [9]:
%%sh
pip install -q --no-cache-dir kfp[kubernetes]

## 데이터셋 다운로드 후 가공 컴포넌트

In [2]:
from kfp import dsl
from kfp.dsl import OutputPath

@dsl.component(base_image="python:3.11", packages_to_install=["datasets"])
def txt_to_qa_dataset(data_url: str, qa_dataset: OutputPath('Dataset')):    
    txt_file = "newjeans.txt"
    
    import requests
    
    with requests.get(data_url, stream=True) as r:
        r.raise_for_status()
        with open(txt_file, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    
    def gen_qa():
        with open(txt_file, 'r') as f:
            while True:
                question = f.readline().replace('\n', '')
                answer = f.readline().replace('\n', '')
                yield {"question": question, "answer": answer}
                buffer = f.readline()
                if not buffer:
                    break
                    
    from datasets import Dataset
    newjeans_qa = Dataset.from_generator(gen_qa)
    newjeans_qa.save_to_disk(qa_dataset)

## chat templete 형태로 데이터셋 가공 컴포넌트

In [4]:
from kfp import dsl
from kfp.dsl import InputPath

@dsl.component(base_image="python:3.11", packages_to_install=["datasets"])
def process_chat_template(
    system_prompt: str, 
    volume_mount: str, 
    qa_dataset: InputPath('Dataset')
) -> str:
    from datasets import load_from_disk
    
    newjeans_qa = load_from_disk(qa_dataset)
    
    def gen_chat_template(items):
        messages = []
        for q, a in zip(items['question'], items['answer']):
            chat = []
            # system role
            chat.append({
                "role": "system",
                "content": system_prompt
            })
            # user question
            chat.append({
                "role": "user",
                "content": q
            })
            # assistant answer
            chat.append({
                "role": "assistant",
                "content": a
            })
            messages.append(chat)
        return {"messages": messages}
    
    prompt_template_qa = newjeans_qa.map(gen_chat_template, batched=True, remove_columns=newjeans_qa.features)
    
    import uuid
    import os
    
    dataset_name = str(uuid.uuid4())
    prompt_template_qa.save_to_disk(os.path.join(volume_mount, dataset_name))
    return dataset_name

## pipeline

In [13]:
from kfp import dsl
from kfp import kubernetes

@dsl.pipeline(name="newjeans-fine-tuning")
def fine_tuning_pipeline(
    data_url:str="https://docs.google.com/uc?export=download&id=1ycN8UktwSiMJ0cWwPXeLVIHJpBnUgEtE&confirm=t",
    system_prompt:str="당신은 K-pop 아이돌 그룹 뉴진스(NewJeans)의 정보를 알려주는 멋진 AI 어시스턴트입니다. 모든 대화는 한국어(Korean)로 합니다.",
):
    pvc = kubernetes.CreatePVC(
        # can also use pvc_name instead of pvc_name_suffix to use a pre-existing PVC
        pvc_name='newjeans-finetuning-pvc-rwx',
        access_modes=['ReadWriteMany'],
        size='1024Gi',
        storage_class_name='filestore-rwx',
    )
    pvc.set_caching_options(True)
    
    mount_path = '/data'
    
    dataset = txt_to_qa_dataset(data_url=data_url)
    
    chat_template_dataset = process_chat_template(
        system_prompt=system_prompt, 
        volume_mount=mount_path, 
        qa_dataset=dataset.outputs["qa_dataset"],
    )
    kubernetes.mount_pvc(chat_template_dataset, pvc_name=pvc.outputs['name'], mount_path=mount_path)

In [14]:
from kfp.compiler import Compiler

Compiler().compile(fine_tuning_pipeline, package_path="newjeans-fine-tuning.yaml")

# Training pipeline

## training code

In [63]:
from kfp import dsl

@dsl.component(base_image="python:3.11")
def save_train_func(volume_mount:str) -> str:
    def train(parameters):
        import os
        import json
        from transformers import (
            TrainingArguments, 
            logging,
            AutoTokenizer, 
            AutoModelForCausalLM,
            integrations,
        )
        from trl import SFTTrainer
        from datasets import load_from_disk
        from peft import LoraConfig, TaskType
        import torch
        import torch.distributed as dist
        import mlflow
        import evaluate
        import numpy as np

        ## NCCL log level
        os.environ["NCCL_DEBUG"] = 'INFO'

        ## parse parameters
        dirname = parameters["mount_path"]
        output_model = "-".join([parameters["output_model"], parameters["random_suffix"]])
        mlflow_experiment_name = parameters["output_model"]
        mlflow_run_name = parameters["random_suffix"]
        batch_size = parameters["batch_size"]
        num_train_epochs = parameters["num_train_epochs"]
        dataset_name = parameters["dataset_name"]
        pretrained_model_name = parameters["pretrained_model_name"]
        deepspeed_config_path = os.path.join(dirname, parameters["deepspeed_config_file"])

        ## deepspeed
        with open(deepspeed_config_path) as f:
            deepspeed_config = json.load(f)
        deepspeed_config["tensorboard"]["output_path"] = os.path.join(dirname, output_model, "logs")

        ## training arguments
        args = TrainingArguments(
            output_dir=os.path.join(dirname, output_model),

            # batch/gradient_accumulation/epoch
            per_device_train_batch_size=batch_size,
            gradient_accumulation_steps=4,
            num_train_epochs=num_train_epochs,

            # logging
            logging_steps=1,
            log_level='info',
            logging_dir=os.path.join(dirname, output_model, "logs"),
            report_to=["tensorboard", "mlflow"],

            # learning rate, scheduler
            warmup_ratio=0.1,
            learning_rate=2e-4,
            lr_scheduler_type="cosine",

            # dtype
            bf16=True,

            push_to_hub=False,

            deepspeed=deepspeed_config,

            # gradient_checkpointing
            gradient_checkpointing=True,
            gradient_checkpointing_kwargs={'use_reentrant':False},

            disable_tqdm=True,
        )

        ## load dataset, tokenizer, model
        dataset = load_from_disk(os.path.join(dirname, dataset_name))

        tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name)

        tokenizer.pad_token = tokenizer.eos_token
        model = AutoModelForCausalLM.from_pretrained(
            pretrained_model_name, 
            torch_dtype=torch.bfloat16,
        )
        model.config.use_cache=False # for gradient checkpointing
        model.config.pretraining_tp = 1

        ## peft
        peft_config = LoraConfig(
            task_type=TaskType.CAUSAL_LM,
            inference_mode=False,
            r=16,
            lora_alpha=32,
            lora_dropout=0.05,
            bias="none",
            use_rslora=True,
        )

        # metric
        metric = evaluate.load('accuracy')

        def compute_metrics(eval_pred):
            predictions, labels = eval_pred
            predictions = np.argmax(predictions, axis=1)
            acc = metric.compute(predictions=predictions, references=labels)
            return {{'accuracy': acc}}

        ## trainer
        trainer = SFTTrainer(
            model=model,
            tokenizer=tokenizer,
            args=args,
            train_dataset=dataset,
            peft_config=peft_config,
            packing=True,
            compute_metrics=compute_metrics,
        )

        ## mlflow
        os.environ["MLFLOW_EXPERIMENT_NAME"] = mlflow_experiment_name
        os.environ["MLFLOW_FLATTEN_PARAMS"] = "1"
        os.environ["MLFLOW_TRACKING_URI"]="http://mlflow.mlflow"
        os.environ["MLFLOW_S3_ENDPOINT_URL"]="http://minio-service.kubeflow:9000"
        os.environ["MLFLOW_S3_IGNORE_TLS"]="true"

        # create run in master
        if dist.get_rank() == 0:
            mlflow.set_tracking_uri(os.environ["MLFLOW_TRACKING_URI"])
            mlflow.set_experiment(mlflow_experiment_name)
            mlflow.start_run(run_name=mlflow_run_name)
            dataset_for_log = mlflow.data.huggingface_dataset.from_huggingface(dataset)
            mlflow.log_input(dataset_for_log, context="training")

        ## train and save model
        trainer.model.print_trainable_parameters()
        trainer.train()
        trainer.save_model(os.path.join(dirname, output_model))

        ## mlflow log model and end run
        if dist.get_rank() == 0:
            mlflow.transformers.log_model(
                transformers_model={
                    "model": trainer.model,
                    "tokenizer": tokenizer,
                },
                artifact_path="model",
                task="text-generation"
            )
            mlflow.end_run()
    
    import inspect
    import textwrap
    
    func_code = inspect.getsource(train)
    func_code = textwrap.dedent(func_code)
    
    import os
    train_code_file = "train.py"
    with open(os.path.join(volume_mount, train_code_file), "w") as f:
        f.write(func_code)
    return train_code_file

## deepspeed config 만들기

In [59]:
from kfp import dsl

@dsl.component(base_image="python:3.11")
def save_deepspeed_config(volume_mount:str) -> str:   
    deepspeed_config = {
        "bf16": {
            "enabled": True
        },
        "optimizer": {
            "type": "AdamW",
            "params": {
                "lr": "auto",
                "betas": "auto",
                "eps": "auto",
                "weight_decay": "auto"
            }
        },

        "scheduler": {
            "type": "WarmupCosineLR",
            "params": {
                "total_num_steps": "auto",
                "warmup_min_ratio": 0.1,
                "warmup_num_steps": "auto",
                "cos_min_ratio": 0.0001,
            }
        },
        "zero_optimization": {
            "stage": 3,
            "offload_optimizer": {
                "device": "cpu",
                "pin_memory": True
            },
            "offload_param": {
                "device": "cpu",
                "pin_memory": True
            },
            "overlap_comm": True,
            "contiguous_gradients": True,
            "sub_group_size": 1e9,
            "reduce_bucket_size": "auto",
            "stage3_prefetch_bucket_size": "auto",
            "stage3_param_persistence_threshold": "auto",
            "stage3_max_live_parameters": 1e9,
            "stage3_max_reuse_distance": 1e9,
            "stage3_gather_16bit_weights_on_model_save": True
        },
        "quantize_training": {
            "enabled": True,
            "quantize_verbose": True,
            "quantizer_kernel": True,
            "quantize-algo": {
                "q_type": "symmetric"
            },
            "quantize_bits": {
                "start_bits": 16,
                "target_bits": 8
            },
            "quantize_schedule": {
                "quantize_period": 400,
                "schedule_offset": 0
            },
            "quantize_groups": 8,
        },
        "flops_profiler": {
            "enabled": True,
            "profile_step": 1,
            "module_depth": -1,
            "top_modules": 1,
            "detailed": False,
        },
        "comms_logger": {
            "enabled": True,
            "verbose": False,
            "prof_all": True,
            "debug": False
        },
        "tensorboard": {
            "enabled": True,
            # "output_path": os.path.join(dirname, model_name, "logs"),
        },
        "gradient_accumulation_steps": "auto",
        "gradient_clipping": "auto",
        "steps_per_print": "auto",
        "train_batch_size": "auto",
        "train_micro_batch_size_per_gpu": "auto",
        "wall_clock_breakdown": False
    }
    import json
    import os
    deepspeed_config_file = "deepspeed_config.json"
    with open(os.path.join(volume_mount, deepspeed_config_file), 'w') as f:
        json.dump(deepspeed_config, f)
        
    return deepspeed_config_file

## PytorchJob 만들기

In [None]:
from kfp import dsl

# parameters= {
#     "mount_path": "/data",
#     "deepspeed_config_file": "deepspeed_config.json",
#     "random_suffix": "",
#     "output_model": "",
#     "batch_size": 4,
#     "num_train_epochs": 40,
#     "dataset_name": "",
#     "pretrained_model_name": "",
# }

@dsl.component(base_image="python:3.11", packages_to_install=["kubeflow-training"])
def run_pytorchjob(
    base_image:str,
    pvc_name:str,
    train_func:str,
    pretrained_model_name:str,
    dataset_name:str,
    output_model:str,
    deepspeed_config_file:str,
    batch_size:int,
    num_train_epochs:int,
    master_replica:int,
    master_cpu:str,
    master_memory:str,
    master_gpu:int,
    worker_replica:int,
    worker_cpu:str,
    worker_memory:str,
    worker_gpu:int,
) -> str:
    ## populate parameters
    parameters = {
        "mount_path": "/data",
        "pretrained_model_name": pretrained_model_name,
        "dataset_name": dataset_name,
        "output_model": output_model,
        "deepspeed_config_file": deepspeed_config_file,
        "batch_size": batch_size,
        "num_train_epochs": num_train_epochs,
    }
    
    import string
    import random
     
    letters_set = string.ascii_lowercase + string.digits
    random_list = random.sample(letters_set,5)
    uid = ''.join(random_list)
     
    parameters["random_suffix"] = uid
    
    ## populate exec script
    import os
    import textwrap
    
    with open(os.path.join(parameters["mount_path"], train_func), "r") as f:
        func_code = f.read()
    func_code = textwrap.dedent(func_code)
    func_code = f"{func_code}\ntrain({parameters})\n"
    
    exec_script = textwrap.dedent(
        """
            program_path=$(mktemp -d)
            read -r -d '' SCRIPT << EOM\n
            {func_code}
            EOM
            printf "%s" "$SCRIPT" > $program_path/ephemeral_script.py
            torchrun --nproc-per-node {gpu_cnt} $program_path/ephemeral_script.py"""
    )

    master_exec_script = exec_script.format(func_code=func_code, gpu_cnt=master_gpu)
    worker_exec_script = exec_script.format(func_code=func_code, gpu_cnt=worker_gpu)
    
    ## declare pod spec
    from kubernetes import client
    from kubeflow.training.constants import constants
    pod_template_spec = client.V1PodTemplateSpec(
        metadata=client.V1ObjectMeta(annotations={constants.ISTIO_SIDECAR_INJECTION: "false"}), ## istio sidecar disable
        spec=client.V1PodSpec(
            restart_policy="Never",
            containers=[
                client.V1Container(
                    name=constants.PYTORCHJOB_CONTAINER,
                    image=base_image,
                    command=["bash", "-c"],
                    args=[master_exec_script],
                    resources=client.V1ResourceRequirements(
                        limits={
                            "cpu": master_cpu,
                            "memory": master_memory,
                            "nvidia.com/gpu": master_gpu
                        },
                        requests={
                            "cpu": master_cpu,
                            "memory": master_memory,
                            "nvidia.com/gpu": master_gpu
                        }
                    ),
                    env=[ ## for mlflow artifact store
                        client.V1EnvVar(
                            name="AWS_ACCESS_KEY_ID",
                            value_from=client.V1EnvVarSource(
                                secret_key_ref=client.V1SecretKeySelector(
                                    name="mlpipeline-minio-artifact",
                                    key="accesskey",
                                )
                            )
                        ),
                        client.V1EnvVar(
                            name="AWS_SECRET_ACCESS_KEY",
                            value_from=client.V1EnvVarSource(
                                secret_key_ref=client.V1SecretKeySelector(
                                    name="mlpipeline-minio-artifact",
                                    key="secretkey",
                                )
                            )
                        ),
                    ],
                    volume_mounts=[
                        client.V1VolumeMount(
                            mount_path=parameters["mount_path"],
                            name=pvc_name,
                            read_only=False,

                        ),
                        client.V1VolumeMount(
                            mount_path="/dev/shm",
                            name="dshm",
                            read_only=False,
                        ),
                    ]
                )
            ],
            volumes=[
                client.V1Volume(
                    name=pvc_name,
                    persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
                        claim_name=pvc_name,
                        read_only=False,
                    )
                ),
                client.V1Volume(
                    name="dshm",
                    empty_dir=client.V1EmptyDirVolumeSource(
                        medium="Memory",
                        size_limit="0.5Gi"
                    )
                )
            ]
        )        
    )
    
    ## get namespace
    with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace", "r") as f:
        namespace = f.read()
    
    ## declare pytorchjob
    from kubeflow.training import models
    
    pytorchjob_name = "-".join([parameters["output_model"], parameters["random_suffix"]])
    pytorchjob = models.KubeflowOrgV1PyTorchJob(
        api_version=f"{constants.KUBEFLOW_GROUP}/{constants.OPERATOR_VERSION}",
        kind=constants.PYTORCHJOB_KIND,
        metadata=client.V1ObjectMeta(name=pytorchjob_name, namespace=namespace),
        spec=models.KubeflowOrgV1PyTorchJobSpec(
            run_policy=models.KubeflowOrgV1RunPolicy(clean_pod_policy=None),
            pytorch_replica_specs={}
        )
    )
    pytorchjob.spec.pytorch_replica_specs[constants.REPLICA_TYPE_MASTER] = models.KubeflowOrgV1ReplicaSpec(replicas=master_replica, template=pod_template_spec)
    
    import copy
    worker_pod_template_spec = copy.deepcopy(pod_template_spec)
    worker_pod_template_spec.spec.containers[0].args = [worker_exec_script]
    worker_pod_template_spec.spec.containers[0].resources = client.V1ResourceRequirements(
        limits={"cpu": worker_cpu, "memory": worker_memory, "nvidia.com/gpu": worker_gpu},
        requests={"cpu": worker_cpu, "memory": worker_memory, "nvidia.com/gpu": worker_gpu},
    )
    pytorchjob.spec.pytorch_replica_specs[constants.REPLICA_TYPE_WORKER] = models.KubeflowOrgV1ReplicaSpec(replicas=worker_replica, template=worker_pod_template_spec)
        
    
    ## create pytorchjob
    from kubeflow.training import TrainingClient
    training_client = TrainingClient()
    training_client.create_pytorchjob(pytorchjob, namespace)
    
    ## wait till Running state
    running_pytorchjob = training_client.wait_for_job_conditions(
        name=pytorchjob.metadata.name,
        namespace=pytorchjob.metadata.namespace,
        job_kind=constants.PYTORCHJOB_KIND,
        expected_conditions={constants.JOB_CONDITION_RUNNING}
    )
    
    ## log master pod
    training_client.get_job_logs(
        name=running_pytorchjob.metadata.name,
        namespace= running_pytorchjob.metadata.namespace,
        container=constants.PYTORCHJOB_CONTAINER,
        follow=True
    )
    
    ## check job is succeeded
    training_client.wait_for_job_conditions(
        name=running_pytorchjob.metadata.name,
        namespace=running_pytorchjob.metadata.namespace,
        job_kind=constants.PYTORCHJOB_KIND,
        expected_conditions={constants.JOB_CONDITION_SUCCEEDED}
    )
    return pytorchjob_name

## pipeline

In [67]:
from kfp import dsl
from kfp import kubernetes

@dsl.pipeline(name="newjeans-fine-tuning")
def fine_tuning_pipeline(
    data_url:str="https://docs.google.com/uc?export=download&id=1ycN8UktwSiMJ0cWwPXeLVIHJpBnUgEtE&confirm=t",
    system_prompt:str="당신은 K-pop 아이돌 그룹 뉴진스(NewJeans)의 정보를 알려주는 멋진 AI 어시스턴트입니다. 모든 대화는 한국어(Korean)로 합니다.",
    base_image:str="miroirs/transformers-pytorch-deepspeed-latest-gpu:deepspeed-0.14.0-all",
    model_id:str="unsloth/llama-3-8b-Instruct",
    output_model_prefix:str="newjeans-finetuning",
    batch_size:int=4,
    num_train_epochs:int=40,
    master_replica:int=1,
    master_cpu:str="2",
    master_memory:str="55Gi",
    master_gpu:int=1,
    worker_replica:int=1,
    worker_cpu:str="2",
    worker_memory:str="20Gi",
    worker_gpu:int=1,
):
    pvc = kubernetes.CreatePVC(
        # can also use pvc_name instead of pvc_name_suffix to use a pre-existing PVC
        pvc_name='newjeans-finetuning-pvc-rwx',
        access_modes=['ReadWriteMany'],
        size='1024Gi',
        storage_class_name='filestore-rwx',
    )
    pvc.set_caching_options(True)
    
    mount_path = '/data'
    
    dataset = txt_to_qa_dataset(data_url=data_url)
    
    chat_template_dataset = process_chat_template(
        system_prompt=system_prompt, 
        volume_mount=mount_path, 
        qa_dataset=dataset.outputs["qa_dataset"],
    )
    kubernetes.mount_pvc(chat_template_dataset, pvc_name=pvc.outputs['name'], mount_path=mount_path)
    
    train_func = save_train_func(volume_mount=mount_path)
    kubernetes.mount_pvc(train_func, pvc_name=pvc.outputs['name'], mount_path=mount_path)
    
    deepspeed_config = save_deepspeed_config(volume_mount=mount_path)
    kubernetes.mount_pvc(deepspeed_config, pvc_name=pvc.outputs['name'], mount_path=mount_path)
    
    pytorchjob = run_pytorchjob(
        base_image=base_image,
        pvc_name=pvc.outputs['name'],
        train_func=train_func.output,
        pretrained_model_name=model_id,
        dataset_name=chat_template_dataset.output,
        output_model=output_model_prefix,
        deepspeed_config_file=deepspeed_config.output,
        batch_size=batch_size,
        num_train_epochs=num_train_epochs,
        master_replica=master_replica,
        master_cpu=master_cpu,
        master_memory=master_memory,
        master_gpu=master_gpu,
        worker_replica=worker_replica,
        worker_cpu=worker_cpu,
        worker_memory=worker_memory,
        worker_gpu=worker_gpu,
    )
    kubernetes.mount_pvc(pytorchjob, pvc_name=pvc.outputs['name'], mount_path=mount_path)

In [68]:
from kfp.compiler import Compiler

Compiler().compile(fine_tuning_pipeline, package_path="newjeans-fine-tuning.yaml")

# Serving pipeline

In [5]:
from kfp import dsl

@dsl.component(base_image="python:3.11", packages_to_install=["kserve"])
def run_isvc(
    pvc_name: str,
    pytorchjob_name: str,
):
    from kserve import constants
    from kserve import (
        V1beta1PredictorSpec,
        V1beta1InferenceServiceSpec,
        V1beta1InferenceService,
    )
    from kubernetes import client

    ## get namespace
    with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace", "r") as f:
        namespace = f.read()
    predictor_spec = V1beta1PredictorSpec(
        containers=[
            client.V1Container(
                args=[
                    "--model-id",
                    f"/mnt/models/{pytorchjob_name}",
                    "--quantize",
                    "bitsandbytes-nf4",
                ],
                image="ghcr.io/huggingface/text-generation-inference:2.0",
                name="kserve-container",
                ports=[
                    client.V1ContainerPort(
                        container_port=8080,
                        protocol="TCP",
                    )
                ],
                resources=client.V1ResourceRequirements(
                    limits={"cpu": "2", "memory": "40Gi", "nvidia.com/gpu": "1"},
                    requests={"cpu": "2", "memory": "25Gi", "nvidia.com/gpu": "1"},
                ),
                volume_mounts=[
                    client.V1VolumeMount(
                        mount_path="/mnt/models",
                        name="models",
                        read_only=False,
                    ),
                    client.V1VolumeMount(
                        mount_path="/dev/shm",
                        name="dshm",
                        read_only=False,
                    ),
                ],
            )
        ],
        max_replicas=1,
        min_replicas=1,
        volumes=[
            client.V1Volume(
                name="models",
                persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
                    claim_name=pvc_name,
                    read_only=False,
                ),
            ),
            client.V1Volume(
                name="dshm",
                empty_dir=client.V1EmptyDirVolumeSource(
                    medium="Memory", size_limit="0.5Gi"
                ),
            ),
        ],
    )
    
    inference_service_spec = V1beta1InferenceServiceSpec(predictor=predictor_spec)
    inference_service = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(
            name="newjeans", 
            namespace=namespace, 
            annotations={
                "sidecar.istio.io/inject": "false",
                "serving.kserve.io/enable-prometheus-scraping": "true"
            }),
        spec=inference_service_spec,
    )

    from kserve import KServeClient
    kserve_client = KServeClient()
    kserve_client.create(inference_service, namespace=namespace, watch=True)

## pipeline

In [None]:
from kfp import dsl
from kfp import kubernetes

@dsl.pipeline(name="newjeans-fine-tuning")
def fine_tuning_pipeline(
    data_url:str="https://docs.google.com/uc?export=download&id=1ycN8UktwSiMJ0cWwPXeLVIHJpBnUgEtE&confirm=t",
    system_prompt:str="당신은 K-pop 아이돌 그룹 뉴진스(NewJeans)의 정보를 알려주는 멋진 AI 어시스턴트입니다. 모든 대화는 한국어(Korean)로 합니다.",
    base_image:str="miroirs/transformers-pytorch-deepspeed-latest-gpu:deepspeed-0.14.0-all",
    model_id:str="unsloth/llama-3-8b-Instruct",
    output_model_prefix:str="newjeans-finetuning",
    batch_size:int=4,
    num_train_epochs:int=40,
    master_replica:int=1,
    master_cpu:str="2",
    master_memory:str="55Gi",
    master_gpu:int=1,
    worker_replica:int=1,
    worker_cpu:str="2",
    worker_memory:str="20Gi",
    worker_gpu:int=1,
):
    pvc = kubernetes.CreatePVC(
        # can also use pvc_name instead of pvc_name_suffix to use a pre-existing PVC
        pvc_name='newjeans-finetuning-pvc-rwx',
        access_modes=['ReadWriteMany'],
        size='1024Gi',
        storage_class_name='filestore-rwx',
    )
    pvc.set_caching_options(True)
    
    mount_path = '/data'
    
    dataset = txt_to_qa_dataset(data_url=data_url)
    
    chat_template_dataset = process_chat_template(
        system_prompt=system_prompt, 
        volume_mount=mount_path, 
        qa_dataset=dataset.outputs["qa_dataset"],
    )
    kubernetes.mount_pvc(chat_template_dataset, pvc_name=pvc.outputs['name'], mount_path=mount_path)
    
    train_func = save_train_func(volume_mount=mount_path)
    kubernetes.mount_pvc(train_func, pvc_name=pvc.outputs['name'], mount_path=mount_path)
    
    deepspeed_config = save_deepspeed_config(volume_mount=mount_path)
    kubernetes.mount_pvc(deepspeed_config, pvc_name=pvc.outputs['name'], mount_path=mount_path)
    
    pytorchjob = run_pytorchjob(
        base_image=base_image,
        pvc_name=pvc.outputs['name'],
        train_func=train_func.output,
        pretrained_model_name=model_id,
        dataset_name=chat_template_dataset.output,
        output_model=output_model_prefix,
        deepspeed_config_file=deepspeed_config.output,
        batch_size=batch_size,
        num_train_epochs=num_train_epochs,
        master_replica=master_replica,
        master_cpu=master_cpu,
        master_memory=master_memory,
        master_gpu=master_gpu,
        worker_replica=worker_replica,
        worker_cpu=worker_cpu,
        worker_memory=worker_memory,
        worker_gpu=worker_gpu,
    )
    kubernetes.mount_pvc(pytorchjob, pvc_name=pvc.outputs['name'], mount_path=mount_path)

    isvc = run_isvc(
        pvc_name=pvc.outputs["name"],
        pytorchjob_name=pytorchjob.output,
    )

In [None]:
from kfp.compiler import Compiler

Compiler().compile(fine_tuning_pipeline, package_path="newjeans-fine-tuning.yaml")