## Execution in local

In [None]:
# If you are using this remember do "pip install -r requirements_cpt.txt"
from trans_cpt.preprocessing import get_dataset
from trans_cpt.training import training_pipeline, inference_pipeline

In [None]:
get_dataset(
    repository="huggingface",
    dataset_name="DT4H/the_chilean_waiting_list_corpus"
)

In [None]:
training_pipeline(
    {
        "data_path": "/gpfs/projects/bsc14/abecerr1/datasets/DT4H___wikipedia_cardiology_es/default/0.0.0/b20f70bf02ea8c0f5e0181e333b7b9ab3c610c4f",
        "batch_size": 10,
    }
)

In [None]:
inference_pipeline({
    "model_path":"/gpfs/projects/bsc14/storage/models/transcpt/CardioBERTa_2025-01-17_15-27-01",
    "text":"Con el diagnóstico de endocarditis infecciosa sobre válvula protésica por Bacteroides fragilis, se comenzó tratamiento con metronidazol 500 mg/8 horas y amoxicilina-clavulánico 1000 mg/200mg/8 horas intravenoso. La paciente permaneció <mask> durante todo el ingreso, senegativizaron los hemocultivos de forma precoz y evolucionó de forma favorables de su ligera descompensación cardiaca con tratamiento diurético. Tras 6 semanas de tratamiento antibiótico intravenoso dirigido, estando estable hemodinámicamente y en buena clase funcional se dio de alta hospitalaria.",
})

## Execution in server remote with SLURM and cosmos-model library

In [None]:
# If you are using this remember do "pip install -r requirements.txt"

import cosmos
from cosmos.execution_types import TRAINING_MODEL

cosmos.initialization(host="alogin2.bsc.es")

In [None]:
result = cosmos.run(
    module_path="trans_cpt.preprocessing",
    function_name="get_dataset",
    queue="acc_debug",
    user="bsc14",
    kwargs={
        "repository": "huggingface",
        "dataset_name": "DT4H/the_chilean_waiting_list_corpus",
    },
    requirements=["python-dotenv", "datasets", "fsspec"],
    modules=[],
    partition="debug",
    nodes=1,
    cpus=20,
    gpus=1,
    venv_path="/gpfs/projects/bsc14/environments/trans_cpt",
    watch=True,
    execute_with_slurm=False,
)

In [None]:
job = cosmos.run(
    module_path="trans_cpt.training",
    function_name="training_pipeline",
    queue="acc_bscls",
    user="bsc14",
    args=[{
        "data_path": "/gpfs/projects/bsc14/abecerr1/datasets/DT4H___wikipedia_cardiology_es/default/0.0.0/b20f70bf02ea8c0f5e0181e333b7b9ab3c610c4f",
        "batch_size": 10,
    }],
    requirements=[
        "datasets",
        "transformers",
        "torch",
        "accelerate",
        "tqdm",
        "tensorboard"
    ],
    modules=[
        "cuda/12.6",
        "nccl/2.20.5",
    ],
    partition="debug",
    nodes=1,
    cpus=80,
    gpus=4,
    venv_path="/gpfs/projects/bsc14/environments/trans_cpt",
    custom_command="accelerate launch --config_file ./trans_cpt/accelerate_data_parallelism_config.yaml",
    execution_type=TRAINING_MODEL,
    training_logs_path="training_logs",
    watch=True,
)


In [None]:
result = cosmos.run(
    module_path="trans_cpt.training",
    function_name="inference_pipeline",
    queue="acc_debug",
    user="bsc14",
    args=[{
        "model_path": "/gpfs/projects/bsc14/storage/models/transcpt/CardioBERTa_2025-01-17_15-27-01",
        "text": (
            "Con el diagnóstico de endocarditis infecciosa sobre válvula protésica por Bacteroides fragilis,"
            "se comenzó tratamiento con metronidazol 500 mg/8 horas y amoxicilina-clavulánico 1000 mg/200mg/8 "
            "horas intravenoso. La paciente permaneció <mask> durante todo el ingreso, senegativizaron los hemocultivos "
            "de forma precoz y evolucionó de forma favorables de su ligera descompensación cardiaca con tratamiento"
            "diurético. Tras 6 semanas de tratamiento antibiótico intravenoso dirigido, estando estable hemodinámicamente "
            "y en buena clase funcional se dio de alta hospitalaria."
        ),
    }],
    requirements=[
        "datasets",
        "transformers",
        "torch",
        "accelerate",
        "tqdm",
        "tensorboard"
    ],
    modules=[
        "cuda/12.6"
    ],
    partition="debug",
    nodes=1,
    cpus=80,
    gpus=4,
    venv_path="/gpfs/projects/bsc14/environments/trans_cpt",
    custom_command="accelerate launch --config_file ./trans_cpt/accelerate_config.yaml",
    watch=True,
)