In [2]:
import os
import json
import glob
import pandas as pd
from datetime import date
from idr_pytools import gpu_jobs_submitter, display_slurm_queue_jupyter, search_log, display_slurm_queue

root = os.path.join(os.getenv("SCRATCH"),"consort-qa")
idris_model_dir = os.path.join(os.getenv("DSDIR"),"HuggingFace_Models")
scratch_model_dir = os.path.join(os.getenv("SCRATCH"),"models")
work_model_dir = os.path.join(os.getenv("WORK"),"models","public")

# vLLM few-shot inference

In [3]:
debug = False

base_cmd = "python ./inference_vllm/{run_file}.py {prompt_dir} {model_path} {output_dir} "

# prompts dir
prompts_dirs = glob.glob(f"{root}/prompts/[0,1,3,5]-shot*")

models_paths_ngpus = [
    (f'{idris_model_dir}/bigscience/bloomz-7b1', 1),
    (f'{work_model_dir}/bloomz', 8),
    (f'{work_model_dir}/gemma-7b-it', 1),
    (f'{idris_model_dir}/microsoft/Phi-3-medium-4k-instruct', 1),
    (f'{idris_model_dir}/meta-llama/Llama-2-7b-chat-hf', 1),
    (f'{idris_model_dir}/meta-llama/Llama-2-70b-chat-hf', 2),
    (f'{work_model_dir}/meditron-70b', 2),
    (f'{idris_model_dir}/meta-llama/Meta-Llama-3-8B-Instruct', 1),
    (f'{idris_model_dir}/meta-llama/Meta-Llama-3-70B-Instruct', 2),
    (f'{work_model_dir}/Llama3-OpenBioLLM-8B', 1),
    (f'{work_model_dir}/Llama3-OpenBioLLM-70B', 2),
    (f'{work_model_dir}/c4ai-command-r-plus', 4),
    (f'{work_model_dir}/Mistral-7B-Instruct-v0.3', 1),
    (f'{idris_model_dir}/mistralai/Mixtral-8x22B-Instruct-v0.1', 8),
    (f'{idris_model_dir}/mistralai/Mistral-7B-Instruct-v0.1', 1),
    (f'{work_model_dir}/BioMistral-7B-DARE', 1),
]
no_5shot = ["bloomz-7b1","bloomz","Llama-2-7b-chat-hf","Llama-2-70b-chat-hf","meditron-70b","meditron-7b","Phi-3-medium-4k-instruct"]
no_3shot = ["bloomz-7b1","bloomz"]
# commands
exp_names = []
exp_n_gpus = []
cmds = []
for prompt_dir in prompts_dirs :
    prompt_strategy = prompt_dir.split('/')[-1]
    for model_path,n_gpus in models_paths_ngpus :
        model_name = model_path.split('/')[-1]
        exp_name = f"{prompt_strategy}_{model_name}"
        output_dir = f"inference_vllm/out/{exp_name}"
        # run file 
        if "-cot" in prompt_strategy :
            run_file = "batched_cot"
        else :
            run_file = "batched_answer-gen"
        if "5-shot" in prompt_strategy and model_name in no_5shot:
            continue
        if "3-shot" in prompt_strategy and model_name in no_3shot:
            continue
        # check if current experience already done ( metrics available !)
        if os.path.exists(f"{output_dir}/metrics.json"):continue
        # fill command with necessary arguments
        cmd = base_cmd.format(
            run_file=run_file,
            prompt_dir=prompt_dir,
            model_path=model_path,
            output_dir=output_dir,
        )
        cmd += f"--n_gpus {n_gpus} "
        # use A100 for 70B models and V100 for smaller models
        cmds.append(cmd)
        exp_names.append(exp_name)
        exp_n_gpus.append(n_gpus)
        
for i, exp in enumerate(exp_names):print(i,exp)

In [15]:
if debug : # single exp for debug
    chosen_index = 4 
    exp_names = [exp_names[chosen_index]]
    exp_n_gpus = [exp_n_gpus[chosen_index]]
    cmds = [cmds[chosen_index]]
print(cmds)
print(exp_n_gpus)
print(exp_names)

['python ./inference_vllm/batched_cot.py /gpfsscratch/rech/aro/urz45id/consort-qa/prompts/3-shot-cot /gpfswork/rech/aro/urz45id/models/public/meditron-70b inference_vllm/out/3-shot-cot_meditron-70b --n_gpus 2 ', 'python ./inference_vllm/batched_cot.py /gpfsscratch/rech/aro/urz45id/consort-qa/prompts/3-shot-cot /gpfswork/rech/aro/urz45id/models/public/Llama3-OpenBioLLM-70B inference_vllm/out/3-shot-cot_Llama3-OpenBioLLM-70B --n_gpus 2 ']
[2, 2]
['3-shot-cot_meditron-70b', '3-shot-cot_Llama3-OpenBioLLM-70B']


In [16]:
slurm_addon_template = """#SBATCH --mail-type=ALL
#SBATCH --output=slurm/log/{exp_name}.out 
#SBATCH --error=slurm/log/{exp_name}.err"""

script_addon = """module load python/3.11.5
conda activate vllm
ray start --head
"""

for cmd,exp_name,n_gpus in zip(cmds, exp_names, exp_n_gpus) :
    slurm_addon = slurm_addon_template.format(exp_name=exp_name)
    gpu_jobs_submitter(
        cmd,
        name=exp_name,
        n_gpu=n_gpus,
        module="cuda/12.1.0",
        time_max="2:00:00",
        qos=None if not debug else "qos_gpu-dev",
        account="aro@a100",
        slurm_addon=slurm_addon,
        script_addon=script_addon 
    )

batch job 0: 2 GPUs distributed on 1 nodes with 2 tasks / 2 gpus per node and 8 cpus per task
Submitted batch job 632364
batch job 0: 2 GPUs distributed on 1 nodes with 2 tasks / 2 gpus per node and 8 cpus per task
Submitted batch job 632378


In [1]:
!squeue -u $USER

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)


In [10]:
!scancel -u $USER

In [37]:
!scancel 305249

# sentence consort classification

In [None]:
debug=True

base_cmd = "python ./consort-qa/predict_consort_sentence.py "

cmds = []

cmd =  base_cmd
cmd += f"--model_path {scratch_model_dir}/biobert-consort-text "
cmd += f"--data_path consort-qa/depression-crt/without_section.csv "
cmd += "--text_column sentence_text "
#cmds.append(cmd)


cmd =  base_cmd
cmd += f"--model_path {scratch_model_dir}/biobert-consort-text+section "
cmd += f"--data_path consort-qa/depression-crt/with_section.csv "
cmd += "--text_column model_input "
cmds.append(cmd)

if debug: cmds=cmds[0]

cmds

In [None]:
# job name
job_name = "consort-sentence-classification"

In [None]:
# Submit jobs
job_ids = gpu_jobs_submitter(
    cmds,
    name=job_name,
    n_gpu=1,
    qos=None if not debug else 'qos_gpu-dev',
    module="pytorch-gpu/py3/2.1.1",
    time_max="04:00:00" if not debug else "00:05:00",
    account="aro@v100",
)

In [None]:
display_slurm_queue(job_name, 20)

In [None]:
logf_dict = search_log(job_name, with_err=True)
ind=-1

In [None]:
print(open(logf_dict["stdout"][ind]).read())

In [None]:
print(open(logf_dict["stderr"][ind]).read()) 

# clean logs and debug outputs

In [5]:
!rm -rf slurm/*.slurm

In [6]:
!rm -rf slurm/log/*

In [None]:
!rm -rf inference/out/*_debug

In [16]:
!rm -rf core-*

In [3]:
# clean output dirs without predictions
import shutil

for out_dir in glob.glob("inference_vllm/out/*/"):
    if not os.path.exists(os.path.join(out_dir,"predictions.json")):
        print(out_dir, "removed")
        shutil.rmtree(out_dir)