In [810]:
import kfp

from kfp import dsl
from kfp import compiler

from kfp.dsl import Input
from kfp.dsl import Output
from kfp.dsl import Dataset
from kfp.dsl import Model
from kfp.dsl import HTML
from kfp.dsl import Markdown

In [811]:
@dsl.component(base_image="tensorflow/tensorflow", packages_to_install=["huggingface_hub"])
def download_model_pretrained(hug_model_name: str, hug_model_revision: str, llm_model_pretrained: Output[Model]):
    import os  
    import zipfile

    import logging

    logger = logging.getLogger('kfp_logger')
    logger.setLevel(logging.INFO)
    
    from huggingface_hub import snapshot_download

    def zip_folder(folder, zip_name):  
        # Create a ZipFile object  
        with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:  
            # Walk through all files and directories in the specified folder  
            for root, dirs, files in os.walk(folder):  
                for file in files:  
                    # Create the complete file path  
                    file_path = os.path.join(root, file)  
                    # Add the file to the ZIP file, preserving the folder structure  
                    zipf.write(file_path, os.path.relpath(file_path, os.path.dirname(folder)))  

    logger.info(f"{hug_model_name} {hug_model_revision}")
                    
    model_name     = os.path.basename(hug_model_name)
    model_user_hug = os.path.dirname (hug_model_name)
    
    logger.info(f"{model_user_hug} {model_name}")
                    
    os.makedirs(f"/tmp/{model_name}", exist_ok=True)
    
    allow_patterns = [
        "*.json",
        "*.safetensors",
    ]

    snapshot_path = snapshot_download(
        repo_id=hug_model_name,
        revision=hug_model_revision,
        allow_patterns=allow_patterns,
        cache_dir=f"/tmp/{model_name}",
        use_auth_token=False,
    )
    
    model_dir = llm_model_pretrained.path
    os.makedirs(model_dir, exist_ok=True)
    
    llm_model_pretrained.path                  = os.path.join(model_dir, f"{model_name}.zip")
    llm_model_pretrained.metadata["revision"]  = hug_model_revision
    
    zip_folder(f"/tmp/{model_name}/models--{model_user_hug}--{model_name}/snapshots/{hug_model_revision}", llm_model_pretrained.path)

In [812]:
@dsl.component(base_image="tensorflow/tensorflow", packages_to_install=[
        "torch", "transformers", "peft", "scikit-learn",
        "datasets", "tabulate", "pandas", "seaborn",
        "matplotlib", "tqdm", "mpld3"])
def download_and_preprocessing_dataset(
    dataset_name: str, llm_model_pretrained: Input[Model],
    dataset_preprocessed: Output[Dataset], dataset_visualization: Output[Markdown], dataset_metrics: Output[HTML]):
    
    import os  
    import zipfile
    import torch

    import logging
    
    import mpld3

    import pandas as pd
    import matplotlib.pyplot as plt  
    import seaborn as sns 
    
    from transformers import AutoModelForCausalLM
    from transformers import AutoTokenizer
    
    from datasets import load_dataset
    from datasets import concatenate_datasets
    from datasets import Dataset
    from datasets import DatasetDict

    from sklearn.model_selection import train_test_split 
    
    text_column        = "Tweet text"
    label_column       = "text_label"
    max_length         = 64                           # max number of tokens per example
    
    logger = logging.getLogger('kfp_logger')
    logger.setLevel(logging.INFO)
    
    def zip_folder(folder, zip_name):  
        # Create a ZipFile object  
        with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:  
            # Walk through all files and directories in the specified folder  
            for root, dirs, files in os.walk(folder):  
                for file in files:  
                    # Create the complete file path  
                    file_path = os.path.join(root, file)  
                    # Add the file to the ZIP file, preserving the folder structure  
                    zipf.write(file_path, os.path.relpath(file_path, os.path.dirname(folder)))

    def unzip_file(zip_file, extract_to):  
        # Create a ZipFile object  
        with zipfile.ZipFile(zip_file, 'r') as zipf:  
            # Extract all the contents into the specified directory  
            zipf.extractall(extract_to)
            
    unzip_file(llm_model_pretrained.path, "/tmp")
    
    model_path = "/tmp/{revision}".format(**{"revision": llm_model_pretrained.metadata["revision"]})
    
    logger.info("snapshot")
    for root, dirs, files in os.walk(model_path):  
        for file in files:  
            logger.info(f" - {file}")
    
    source    = "https://raw.githubusercontent.com/lowlevel-1989/twitter_complaints_spanish/master/dataset.csv"
    df = pd.read_csv(source, sep=";", encoding="utf-8", names=[text_column, label_column])

    train_df, test_df = train_test_split(df, test_size=0.3, random_state=42, stratify=df[label_column])

    dataset_train = Dataset.from_pandas(train_df)
    dataset_eval  = Dataset.from_pandas(test_df)

    dataset = DatasetDict({"train": dataset_train, "test": dataset_eval})
    
    """
    dataset = load_dataset("ought/raft", dataset_name)

    classes = [k.replace("_", " ") for k in dataset["train"].features["Label"].names]
    dataset = dataset.map(
        lambda x: {"text_label": [classes[label] for label in x["Label"]]},
        batched=True,
        num_proc=1,
    )
    
    dataset["train"] = concatenate_datasets([dataset["train"], dataset["test"].shuffle().select(range(50))])
    dataset["test"]  = dataset["test"].shuffle().select(range(100))
    """
    
    # pad_token_id: Se utiliza para rellenar (pad) secuencias de texto para que todas tengan la misma longitud dentro de un lote (batch) durante el entrenamiento.
    # eos_token_id: Marca el final de una secuencia de texto.
    
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id
    
    def preprocess_function(examples):
        batch_size = len(examples[text_column])

        # Se crea este formato en el input prompt
        # Tweet text : tweet : Label : 
        inputs     = [f"{text_column} : {x} Label : " for x in examples[text_column]]

        # Se deja el target original, pero pasado a str
        targets    = [str(x) for x in examples[label_column]]

        # tokerizamos los dataset
        model_inputs = tokenizer(inputs)

        # tokerizamos los targets
        labels = tokenizer(targets)

        # ajustamos los token para el input y el target, se explica en cada paso
        for i in range(batch_size):

            # asignamos el identicador del token para el dataset
            sample_input_ids                  = model_inputs["input_ids"][i]

            # asingamos el identicador del token para el target
            label_input_ids                   = labels["input_ids"][i] + [tokenizer.pad_token_id]

            # el input sera tanto la entrada como la salida en vector
            model_inputs["input_ids"][i]      = sample_input_ids + label_input_ids

            # asignamos la mascara 1 para los valores reales.
            model_inputs["attention_mask"][i] = [1] * len(model_inputs["input_ids"][i])

            # rellenamos los ids del labels (targets) con un token -100 (token ignorado)
            # contatenamos los target tokens al final
            labels["input_ids"][i]            = [-100] * len(sample_input_ids) + label_input_ids

        # asignamos el relleno
        for i in range(batch_size):

            sample_input_ids     = model_inputs["input_ids"][i]
            label_input_ids      = labels["input_ids"][i]

            # Asignamos el relleno al max_lenght, la ia funciona mejor
            # cuando todos los parametros son del mismo tamaño
            model_inputs["input_ids"][i] = [tokenizer.pad_token_id] * (
                max_length - len(sample_input_ids)
            ) + sample_input_ids

            # asignamos la mascara 0 para los valores de relleno.
            model_inputs["attention_mask"][i] = [0] * (max_length - len(sample_input_ids)) + model_inputs[
                "attention_mask"
            ][i]

            labels["input_ids"][i] = [-100] * (max_length - len(sample_input_ids)) + label_input_ids

            # lo transformamos a tensores de torch
            model_inputs["input_ids"][i]      = torch.tensor(model_inputs["input_ids"][i][:max_length])
            model_inputs["attention_mask"][i] = torch.tensor(model_inputs["attention_mask"][i][:max_length])
            labels["input_ids"][i]            = torch.tensor(labels["input_ids"][i][:max_length])

        model_inputs["labels"] = labels["input_ids"]
        return model_inputs
    
    processed_datasets = dataset.map(
        preprocess_function,
        batched=True,
        num_proc=1,
        remove_columns=dataset["train"].column_names,
        load_from_cache_file=False,
        desc="Running tokenizer on dataset",
    )
    
    train_dataset = processed_datasets["train"]
    eval_dataset  = processed_datasets["test"]
    
    train_dataset.save_to_disk("/tmp/dataset/train")
    eval_dataset.save_to_disk("/tmp/dataset/eval")
    
    
    model_dir = dataset_preprocessed.path
    os.makedirs(model_dir, exist_ok=True)
    
    dataset_preprocessed.path = os.path.join(model_dir, "dataset.zip")
    zip_folder(f"/tmp/dataset", dataset_preprocessed.path)
    
    with open(dataset_visualization.path, "w") as f:
        f.write(dataset["train"].to_pandas().to_markdown())
    
    # Crear una figura con dos ejes  
    fig, axes = plt.subplots(1, 2, figsize=(12, 6))  

    # Primer gráfico en el primer eje  
    sns.histplot(pd.Categorical(dataset["train"][label_column], ["sin queja", "queja"]), ax=axes[0])  
    axes[0].set_title('Distribution - Train Data')
    axes[0].patches[0].set_facecolor('skyblue')
    axes[0].patches[1].set_facecolor('salmon')

    # Segundo gráfico en el segundo eje  
    sns.histplot(pd.Categorical(dataset["test"][label_column], ["sin queja", "queja"]), ax=axes[1])  
    axes[1].set_title('Distribution - Test Data')  
    axes[1].patches[0].set_facecolor('skyblue')
    axes[1].patches[1].set_facecolor('salmon')

    # Ajustar el layout  
    plt.tight_layout() 
    
    mpld3.save_html(fig, dataset_metrics.path)

In [813]:
@dsl.component(base_image="tensorflow/tensorflow:latest-gpu", packages_to_install=[
    "torch", "transformers", "peft", "datasets", "tqdm", "matplotlib", "mpld3"])
def prompt_tuning_bloom(
    num_epochs: int, dataset_preprocessed: Input[Dataset], llm_model_pretrained: Input[Model],
    llm_model_snapshot: Output[Model], perplexity_visualization: Output[HTML], loss_visualization: Output[HTML]):
    
    import os
    import zipfile
    import torch

    import logging

    logger = logging.getLogger('kfp_logger')
    logger.setLevel(logging.INFO)
    
    import mpld3
    
    import matplotlib.pyplot as plt
    
    
    from transformers import AutoModelForCausalLM
    from transformers import AutoTokenizer
    from transformers import default_data_collator
    from transformers import get_linear_schedule_with_warmup
    
    from peft import TaskType
    from peft import get_peft_model
    from peft import PromptTuningInit
    from peft import PromptTuningConfig
    
    from tqdm import tqdm
    
    from torch.utils.data import DataLoader
    
    from datasets import load_from_disk
    
    lr                 = 3e-2                         # training learning rate
    batch_size         = 8                            # number of examples per batch
    
    def zip_folder(folder, zip_name):  
        # Create a ZipFile object  
        with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:  
            # Walk through all files and directories in the specified folder  
            for root, dirs, files in os.walk(folder):  
                for file in files:  
                    # Create the complete file path  
                    file_path = os.path.join(root, file)  
                    # Add the file to the ZIP file, preserving the folder structure  
                    zipf.write(file_path, os.path.relpath(file_path, os.path.dirname(folder)))
    
    def unzip_file(zip_file, extract_to):  
        # Create a ZipFile object  
        with zipfile.ZipFile(zip_file, 'r') as zipf:  
            # Extract all the contents into the specified directory  
            zipf.extractall(extract_to)
            
    unzip_file(llm_model_pretrained.path, "/tmp")
    unzip_file(dataset_preprocessed.path, "/tmp")
    
    model_path = "/tmp/{revision}".format(**{"revision": llm_model_pretrained.metadata["revision"]})
    
    logger.info("snapshot")
    for root, dirs, files in os.walk(model_path):  
        for file in files:  
            logger.info(f" - {file}")

    tokenizer = AutoTokenizer.from_pretrained(model_path)
    if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id
    
    train_dataset = load_from_disk("/tmp/dataset/train")
    eval_dataset  = load_from_disk("/tmp/dataset/eval")
    
    logger.info("dataset")
    logger.info(train_dataset)
    logger.info(eval_dataset)
    
    #  Set pin_memory=True to speed up the data transfer to the GPU during training, False for CPU
    train_dataloader = DataLoader(
        train_dataset, shuffle=True,            collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True
    )
    eval_dataloader  = DataLoader(eval_dataset, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True)
    
    peft_config = PromptTuningConfig(
        task_type=TaskType.CAUSAL_LM,
        prompt_tuning_init=PromptTuningInit.TEXT,
        num_virtual_tokens=8,
        prompt_tuning_init_text="Classify if the tweet is a complaint or not in spanish:", # prompt inicial
        tokenizer_name_or_path=model_path,
    )
    
    model = AutoModelForCausalLM.from_pretrained(model_path)
    model = get_peft_model(model, peft_config)
    model.print_trainable_parameters()
    
    # Setup an optimizer and learning rate scheduler:
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    lr_scheduler = get_linear_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=0,
        num_training_steps=(len(train_dataloader) * num_epochs),
    )
    
    device = "cuda" if torch.cuda.is_available() else "cpu"
    logger.info(f"device: {device}")
    
    model = model.to(device)

    epochs                = []
    train_ppl_list        = []
    eval_ppl_list         = []
    train_epoch_loss_list = []
    eval_epoch_loss_list  = []
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for step, batch in enumerate(tqdm(train_dataloader)):
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss
            total_loss += loss.detach().float()
            loss.backward()
            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()

        model.eval()
        eval_loss = 0
        eval_preds = []
        for step, batch in enumerate(tqdm(eval_dataloader)):
            batch = {k: v.to(device) for k, v in batch.items()}
            with torch.no_grad():
                outputs = model(**batch)
            loss = outputs.loss
            eval_loss += loss.detach().float()
            eval_preds.extend(
                tokenizer.batch_decode(torch.argmax(outputs.logits, -1).detach().cpu().numpy(), skip_special_tokens=True)
            )

        eval_epoch_loss = eval_loss / len(eval_dataloader)
        eval_ppl = torch.exp(eval_epoch_loss)
        train_epoch_loss = total_loss / len(train_dataloader)
        train_ppl = torch.exp(train_epoch_loss)
        
        epochs.append(int(epoch))
        train_ppl_list.append(float(train_ppl))
        eval_ppl_list.append(float(eval_ppl))
        train_epoch_loss_list.append(float(train_epoch_loss))
        eval_epoch_loss_list.append(float(eval_epoch_loss))
        
        logger.info(f"{epoch=}: {train_ppl=} {train_epoch_loss=} {eval_ppl=} {eval_epoch_loss=}")
    
    fig1, ax1 = plt.subplots(figsize=(10, 5))
    ax1.plot(epochs, train_ppl_list, label='Train Perplexity')
    ax1.plot(epochs, eval_ppl_list, label='Eval Perplexity')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Perplexity')
    ax1.set_title('Training and Evaluation Perplexity')
    ax1.legend()
    ax1.grid(True)
    
    mpld3.save_html(fig1, perplexity_visualization.path)
    
    fig2, ax2 = plt.subplots(figsize=(10, 5))
    ax2.plot(epochs, train_epoch_loss_list, label='Train Loss')
    ax2.plot(epochs, eval_epoch_loss_list, label='Eval Loss')
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('Loss')
    ax2.set_title('Training and Evaluation Loss')
    ax2.legend()
    ax2.grid(True)
    
    mpld3.save_html(fig2, loss_visualization.path)
    
    model_dir = llm_model_snapshot.path
    os.makedirs(model_dir, exist_ok=True)
    
    llm_model_snapshot.path = os.path.join(model_dir, "model.zip")
    model.save_pretrained("/tmp/model/snapshot")
    
    zip_folder("/tmp/model/snapshot", llm_model_snapshot.path)

In [814]:
@dsl.component(base_image="tensorflow/tensorflow", packages_to_install=["torch-model-archiver"])
def torch_model_archiver(
    llm_model_pretrained: Input[Model], llm_model_snapshot: Input[Model], llm_model_archiver: Output[Model]):
    
    import os
    import subprocess
    import json
    import logging
    
    from urllib import request
    
    logger = logging.getLogger('kfp_logger')
    logger.setLevel(logging.INFO)
    
    url = "https://raw.githubusercontent.com/lowlevel-1989/bloomz/master/custom_handler.py"

    try:
        request.urlretrieve(url, "/tmp/custom_handler.py")
    except Exception as e:
        raise e

    # TODO: pasar por argumentos model_revision, text_column
    config = {  
        "model_pretrained": os.path.basename(llm_model_pretrained.path),  
        "model_snapshot":   os.path.basename(llm_model_snapshot.path),
        "model_revision":   "a2845d7e13dd12efae154a9f1c63fcc2e0cc4b05",
        "text_column":      "Tweet text",
    }
    
    with open("/tmp/setup_config.json", "w") as f:
        json.dump(config, f)
    
    with open("/tmp/requirements.txt", "w") as f:
        f.write("peft")
    
    logger.info("tmp")
    for root, dirs, files in os.walk("/tmp"):  
        for file in files:  
            logger.info(f" - {file}")
    
    # Comando para crear el modelo con la metadata 
    command = [  
        "torch-model-archiver",
        "--model-name", "bloomz",
        "--version", "1.0",
        "--handler", "/tmp/custom_handler.py",
        "--extra-files", "{},{},/tmp/setup_config.json".format(llm_model_pretrained.path, llm_model_snapshot.path),
        "-r/tmp/requirements.txt"
    ]

    logger.info(config)
    logger.info(command)
    
    try:   
        subprocess.run(command, check=True)
    except subprocess.CalledProcessError as e:  
        raise e
    
    model_dir = llm_model_archiver.path
    os.makedirs(f"{model_dir}/model-store", exist_ok=True)
    os.makedirs(f"{model_dir}/config",      exist_ok=True)
    
    properties = {
        "inference_address":        "http://127.0.0.1:8083",
        "management_address":       "http://127.0.0.1:8084",
        "metrics_address":          "http://127.0.0.1:8085",
        "grpc_inference_port":      7072,
        "model_store":              "/mnt/models/model-store",
        "install_py_dep_per_model": "true",
        "model_snapshot":      {
            "name": "startup.cfg",
            "modelCount": 1,
            "models": {
                "bloomz": {
                    "1.0": {
                        "defaultVersion":  "true",
                        "marName": "bloomz.mar",
                        "minWorkers":         1,
                        "maxWorkers":         1,
                        "batchSize":          1,
                        "maxBatchDelay":    100,
                        "responseTimeout":  120
                    }
                }
            }
        }
    }
    
    logger.info(properties)

    o = ""
    for k, v in properties.items():
        if k == "model_snapshot":
            o = "{}\n{}={}".format(o, k, json.dumps(v))
            continue
            
        o = f"{o}\n{k}={v}"
    
    properties_text = o.replace('"true"', "true").replace("'false'", "false")
    
    logger.info(properties_text)
    
    with open(f"{model_dir}/config/config.properties", "w") as f:
        f.write(properties_text)
    
    os.rename("bloomz.mar", os.path.join(f"{model_dir}/model-store", "bloomz.mar"))
    
    llm_model_archiver.metadata["name"]  = "bloomz"

In [815]:
@dsl.component(base_image="tensorflow/tensorflow", packages_to_install=['kserve', 'kubernetes'])
def model_serving(llm_model_archiver : Input[Model]):
    import time
    import logging
    
    from kubernetes import client
    from kubernetes import config
    
    from kserve import KServeClient
    from kserve import constants
    from kserve import utils
    from kserve import V1beta1InferenceService
    from kserve import V1beta1InferenceServiceSpec
    from kserve import V1beta1PredictorSpec
    from kserve import V1beta1TorchServeSpec
    
    logger = logging.getLogger('kfp_logger')
    logger.setLevel(logging.INFO)
    
    name      = llm_model_archiver.metadata["name"]
    namespace = utils.get_default_target_namespace()
    
    kserve_version='v1beta1'
    api_version = "{}/{}".format(constants.KSERVE_GROUP, kserve_version)
    
    uri = llm_model_archiver.uri.replace("minio", "s3")
    # uri = uri.rsplit("/", 1)[0]
    
    logger.info(f"{namespace}, {name}")
    logger.info(f"{api_version}")
    logger.info(f"{uri}")
    
    
    isvc = V1beta1InferenceService(
        api_version=api_version,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(
            name=name, namespace=namespace, annotations={'sidecar.istio.io/inject':'false'}),
            spec=V1beta1InferenceServiceSpec(
                predictor=V1beta1PredictorSpec(
                    service_account_name="sa-minio-kserve",
                    pytorch=(
                        V1beta1TorchServeSpec(
                            args=[
                                "--no-config-snapshots",
                                "--models bloomz=bloomz.mar",
                            ],
                            storage_uri=uri,
                            protocol_version="v2",
                            resources=client.V1ResourceRequirements(
                                requests={
                                    "cpu": "1",
                                    "memory": "2Gi"},
                                limits={
                                    "cpu": "1",
                                    "memory": "8Gi"},
                            )
                        )
                    )
                )
            )
        )

    KServe = KServeClient()
    KServe.create(isvc)

In [816]:
@dsl.pipeline(
    name="tweet-classifier-dev",
    description="LLM")
def llm_pipeline(
    hug_model_name :str  ="bigscience/bloomz-560m",
    hug_model_revision :str ="a2845d7e13dd12efae154a9f1c63fcc2e0cc4b05",
    dataset_name :str = "twitter_complaints",
    num_epochs :int =50,
    skip_model_serving :bool =False
):
    download_model_pretrained_task = download_model_pretrained(
        hug_model_name=hug_model_name,
        hug_model_revision=hug_model_revision
    ) \
    .set_caching_options(enable_caching=True)
    
    download_and_preprocessing_dataset_task = download_and_preprocessing_dataset(
        dataset_name=dataset_name,
        llm_model_pretrained=download_model_pretrained_task.outputs["llm_model_pretrained"],
    ) \
    .set_caching_options(enable_caching=True)
    
    prompt_tuning_bloom_task = prompt_tuning_bloom(
        num_epochs=num_epochs,
        dataset_preprocessed=download_and_preprocessing_dataset_task.outputs["dataset_preprocessed"],
        llm_model_pretrained=download_model_pretrained_task.outputs["llm_model_pretrained"]
    ) \
    .set_accelerator_type(accelerator="nvidia.com/gpu") \
    .set_accelerator_limit('1')                         \
    .set_caching_options(enable_caching=True)
    
    torch_model_archiver_task = torch_model_archiver(
        llm_model_pretrained=download_model_pretrained_task.outputs["llm_model_pretrained"],
        llm_model_snapshot=prompt_tuning_bloom_task.outputs["llm_model_snapshot"]
    ).set_caching_options(enable_caching=True)
    
    with dsl.If(skip_model_serving == False):
        model_serving_task = model_serving(
            llm_model_archiver=torch_model_archiver_task.outputs["llm_model_archiver"]
        ) \
        .set_caching_options(enable_caching=True)

In [817]:
compiler.Compiler().compile(llm_pipeline, "pipeline.yaml")

In [818]:
import kfp
credentials = kfp.client.ServiceAccountTokenVolumeCredentials()

client = kfp.Client(credentials=credentials)

In [819]:
run = client.create_run_from_pipeline_package(
    run_name="Tweet Classifier",
    experiment_name="LLM Tweet Classifier",
    pipeline_file="pipeline.yaml",
    arguments={
        "hug_model_name":     "bigscience/bloomz-560m",
        "hug_model_revision": "a2845d7e13dd12efae154a9f1c63fcc2e0cc4b05",
        "dataset_name":       "twitter_complaints",
        "num_epochs":         50
    },
)