From 5c6f082f64c9a0ddb7612dfd558a26ee121562ed Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Wed, 28 Aug 2024 17:32:30 +0900 Subject: [PATCH 01/20] [wip] add g-leaderboard --- evaluation/installers/g-leaderboard/README.md | 82 +++++++++++++ .../installers/g-leaderboard/install.sh | 109 ++++++++++++++++++ .../installers/g-leaderboard/logs/.gitignore | 4 + .../g-leaderboard/resources/config_base.yaml | 86 ++++++++++++++ .../g-leaderboard/scripts/env_common.sh | 5 + .../scripts/envs/llm-jp-nvlink/environment.sh | 0 .../scripts/envs/llm-jp/environment.sh | 0 .../scripts/envs/sakura/environment.sh | 6 + .../scripts/run_g-leaderboard.sh | 61 ++++++++++ 9 files changed, 353 insertions(+) create mode 100644 evaluation/installers/g-leaderboard/README.md create mode 100644 evaluation/installers/g-leaderboard/install.sh create mode 100644 evaluation/installers/g-leaderboard/logs/.gitignore create mode 100644 evaluation/installers/g-leaderboard/resources/config_base.yaml create mode 100644 evaluation/installers/g-leaderboard/scripts/env_common.sh create mode 100644 evaluation/installers/g-leaderboard/scripts/envs/llm-jp-nvlink/environment.sh create mode 100644 evaluation/installers/g-leaderboard/scripts/envs/llm-jp/environment.sh create mode 100644 evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh create mode 100644 evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md new file mode 100644 index 00000000..ce2d1d26 --- /dev/null +++ b/evaluation/installers/g-leaderboard/README.md @@ -0,0 +1,82 @@ +# g-leaderboard (GENIAC Official Evaluation) installation and execution script + +This repository contains scripts for evaluating LLMs using g-leaderboard. + +## Usage + +### Build + +Clone this repository and move to the installation directory. + +```bash +git clone https://github.com/llm-jp/scripts +cd scripts/evaluation/installers/g-leaderboard +``` + +Then, run the installation script. +The following command will create a working directory under the specified directory (`~/g-leaderboard`). +`` should be the name of the environment (llm-jp, llm-jp-nvlink, sakura, etc). +The list of available environment names can be found in the `scripts/envs` directory. + +```bash +# For a cluster with SLURM +sbatch --partition {partition} install.sh ~/g-leaderboard +# For a cluster without SLURM +bash install.sh ~/g-leaderboard > logs/install.out 2> logs/install.err +``` + +After the installation is complete, set up the wandb and huggingface accounts. + +```shell +cd ~/g-leaderboard +source environment/venv/bin/activate +wandb login +huggingface-cli login +``` + +### Contents in installed directory (~/g-leaderboard) + +The following directory structure will be created after installation. + +``` +~/g-leaderboard/ + run_g-leaderboard.sh Script for running g-leaderboard + logs/ Log files for SLURM jobs + resources/ + config_base.yaml Configuration file template + environment/ + installer_envvar.log List of environment variables recorded during installation + install.sh Installation script + python/ Python + scripts/ Scripts for environment settings + src/ Downloaded libraries + venv/ Python virtual environemnt (linked to python/) +``` + +### Evaluation + +Replace variables as needed in `run_g-leaderboard.sh` and `resources/config_base.yaml`. + - To edit tokenizer, wandb entity, and/or wandb project: Edit `run_g-leaderboard.sh`. + - To edit others: Edit `resources/config_base.yaml` and `run_g-leaderboard.sh`. + +```shell +cd ~/g-leaderboard +# (Optional) If you need to change variables +cp resources/config_base.yaml resources/config_custom.yaml +cp run_g-leaderboard.sh run_g-leaderboard_custom.sh +# Set `resources/config_custom.yaml` in run_g-leaderboard_custom.sh + +# For a cluster with SLURM +sbatch --partition {partition} run_g-leaderboard.sh {path/to/model} {wandb.run_name} +# For a cluster without SLURM +CUDA_VISIBLE_DEVICES={num} bash run_g-leaderboard.sh {path/to/model} {wandb.run_name} +``` + +#### Sample code + +```shell +# For a cluster with SLURM +sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) +# For a cluster without SLURM +CUDA_VISIBLE_DEVICES=0 bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) +``` diff --git a/evaluation/installers/g-leaderboard/install.sh b/evaluation/installers/g-leaderboard/install.sh new file mode 100644 index 00000000..1149ea28 --- /dev/null +++ b/evaluation/installers/g-leaderboard/install.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# +# g-leaderboard installation script +# +# This script use CPU on a cluster. +# - In a SLURM environment, it is recommend to use CPU nodes. +# +# Usage: +# On a cluster with SLURM: +# Run `sbatch --paratition {partition} install.sh TARGET_DIR` +# On a cluster without SLURM: +# Run `bash install.sh TARGET_DIR > logs/install-eval.out 2> logs/install-eval.err` +# - TARGET_DIR: Instalation directory +# +#SBATCH --job-name=install-g-leaderboard +#SBATCH --partition={FIX_ME} +#SBATCH --nodes=1 +#SBATCH --exclusive +#SBATCH --mem=0 +#SBATCH --output=logs/%x-%j.out +#SBATCH --error=logs/%x-%j.err + +set -eux -o pipefail + +ENV_CHOICES=($(ls scripts/envs)) +TARGET_ENV_MSG="Set TARGET_ENV from (${ENV_CHOICES[@]} ) or add a new configuration in 'scripts/envs'." + +if [ $# -ne 2 ]; then + set +x + >&2 echo Usage: sbatch \(or bash\) install.sh TARGET_ENV TARGET_DIR + >&2 echo $TARGET_ENV_MSG + exit 1 +fi + +INSTALLER_DIR=$(pwd) +TARGET_ENV=$1 +TARGET_DIR=$2 +INSTALLER_COMMON=$INSTALLER_DIR/../../../common/installers.sh + +if [[ ! " ${ENV_CHOICES[@]} " =~ " ${TARGET_ENV} " ]]; then + set +x + >&2 echo $TARGET_ENV_MSG + exit 1 +fi + +>&2 echo INSTALLER_DIR=$INSTALLER_DIR +>&2 echo TARGET_DIR=$TARGET_DIR +>&2 echo TARGET_ENV=$TARGET_ENV +>&2 echo INSTALLER_COMMON=$INSTALLER_COMMON +source $INSTALLER_COMMON + +mkdir -p $TARGET_DIR +pushd $TARGET_DIR + +# Copy basic scripts for g-leaderboard +cp ${INSTALLER_DIR}/scripts/run_g-leaderboard.sh . +mkdir resources +cp ${INSTALLER_DIR}/resources/config_base.yaml resources/ +mkdir logs + +ENV_DIR=${TARGET_DIR}/environment +mkdir $ENV_DIR +pushd $ENV_DIR + +# Copy enviroment scripts +cp ${INSTALLER_DIR}/install.sh . +mkdir scripts + +# Create environment.sh +BASE_ENV_SHELL=${INSTALLER_DIR}/scripts/env_common.sh +EXT_ENV_SHELL=${INSTALLER_DIR}/scripts/envs/${TARGET_ENV}/environment.sh +NEW_ENV_SHELL=scripts/environment.sh + +print_env_shell() { + echo "#!/bin/bash" + echo + echo "# from $BASE_ENV_SHELL" + cat $BASE_ENV_SHELL + echo + echo "# from $EXT_ENV_SHELL" + cat $EXT_ENV_SHELL +} +print_env_shell > $NEW_ENV_SHELL + +source $NEW_ENV_SHELL + +# Record current environment variables +set > installer_envvar.log + +# src is used to store all resources for from-scratch builds +mkdir src +pushd src + +# Install Python (function in $INSTALLER_COMMON) +install_python v${PYTHON_VERSION} ${ENV_DIR}/python +popd # $ENV_DIR + +# Prepare venv +python/bin/python3 -m venv venv +source venv/bin/activate +python -m pip install --no-cache-dir -U pip setuptools + +# Install g-leaderboard +pushd src +git clone https://github.com/wandb/llm-leaderboard g-leaderboard -b v${G_LEADERBOARD_TAG} +pushd g-leaderboard +pip install --no-cache-dir requirements.txt + +echo "Installation done." | tee >(cat >&2) diff --git a/evaluation/installers/g-leaderboard/logs/.gitignore b/evaluation/installers/g-leaderboard/logs/.gitignore new file mode 100644 index 00000000..5e7d2734 --- /dev/null +++ b/evaluation/installers/g-leaderboard/logs/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/evaluation/installers/g-leaderboard/resources/config_base.yaml b/evaluation/installers/g-leaderboard/resources/config_base.yaml new file mode 100644 index 00000000..c5eaaa2c --- /dev/null +++ b/evaluation/installers/g-leaderboard/resources/config_base.yaml @@ -0,0 +1,86 @@ +testmode: false # If you want to test with a small amount of data, please set it to true. +model_name: "<>" # will be used in Table + +wandb: + entity: "<>" + project: "<>" + run_name: "<>" # this run_name will be used as the name of run in leaderboard. Can be changed later + +# Tasks to run +run_llm_jp_eval_ja_0_shot: true +run_llm_jp_eval_ja_few_shots: true +run_llm_jp_eval_en_0_shot: true +run_llm_jp_eval_en_few_shots: true +run_mt_bench_ja: true +run_mt_bench_en: true + +model: + api: false # if you don't use api, please set "api" as "false". If you use api, please select from "openai", "anthoropic", "google", "cohere", "mistral", "amazon_bedrock" + use_wandb_artifacts: false # if you user wandb artifacts, please set true. + artifacts_path: null # if you user wandb artifacts, please paste the link. if not, please leave it as "". + pretrained_model_name_or_path: "<>" #If you use openai api, put the name of model + device_map: "auto" + load_in_8bit: false + load_in_4bit: false + +# for llm-jp-eval +llm_jp_eval: + max_seq_length: 4096 + target_dataset: "all" # {all, jamp, janli, jcommonsenseqa, jemhopqa, jnli, jsem, jsick, jsquad, jsts, niilc, chabsa, mmlu_en} + ja_num_shots: 4 # if run_llm_jp_eval_ja_few_shots is true, please set the num of few shots. Default is 4 + en_num_shots: 4 # run_llm_jp_eval_en_few_shots is true, please set the num of few shots. Default is 4 + torch_dtype: "bf16" # {fp16, bf16, fp32} + # Items that do not need to be changed unless specifically intended. + dataset_artifact: "wandb-japan/llm-leaderboard/jaster:v11" + dataset_dir: "/jaster/1.2.6/evaluation/test" + ja: + custom_prompt_template: "以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。\n\n### 指示:\n{instruction}\n\n### 入力:\n{input}\n\n### 応答:\n" + custom_fewshots_template: "\n\n### 入力:\n{input}\n\n### 応答:\n{output}" + en: + custom_prompt_template: "以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。\n\n### 指示:\n{instruction}\n\n### 入力:\n{input}\n\n### 応答:\n" + custom_fewshots_template: "\n\n### 入力:\n{input}\n\n### 応答:\n{output}" + +# for mtbench +mtbench: + model_id: "nii--llama-2-175b-exp2-instruct" # cannot use '<', '>', ':', '"', '/', '\\', '|', '?', '*', '.' + max_new_token: 1024 + num_gpus_per_model: 8 + num_gpus_total: 8 + max_gpu_memory: null + dtype: bfloat16 # None or float32 or float16 or bfloat16 + use_azure: true # if you use azure openai service for evaluation, set true + # for conv template # added + custom_conv_template: true + # the following variables will be used when custom_conv_template is set as true + conv_name: "custom" + conv_sep: "\n\n### " + conv_stop_token_ids: "[2]" + conv_stop_str: "###" + conv_role_message_separator: ":\n" + conv_role_only_separator: ":\n" + ja: + conv_system_message: "以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。" + conv_roles: "('指示', '応答')" + en: + conv_system_message: "以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。" + conv_roles: "('指示', '応答')" + dataset: # Items that do not need to be changed unless specifically intended. + ja: + question_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_ja_question:v3" + test_question_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_ja_question_small_for_test:v5" + referenceanswer_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_ja_referenceanswer:v1" + test_referenceanswer_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_ja_referenceanswer_small_for_test:v1" + judge_prompt_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_ja_prompt:v1" + bench_name: "mt_bench_ja" + en: + question_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_en_question:v0" + test_question_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_en_question_small_for_test:v0" + referenceanswer_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_en_referenceanswer:v0" + test_referenceanswer_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_en_referenceanswer_small_for_test:v0" + judge_prompt_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_en_prompt:v0" + bench_name: "mt_bench_en" + +#================================================================== +# Items that do not need to be changed unless specifically intended. +#================================================================== +github_version: g-eval-v1.0 #for recording diff --git a/evaluation/installers/g-leaderboard/scripts/env_common.sh b/evaluation/installers/g-leaderboard/scripts/env_common.sh new file mode 100644 index 00000000..ac0467dd --- /dev/null +++ b/evaluation/installers/g-leaderboard/scripts/env_common.sh @@ -0,0 +1,5 @@ +# List of environment variables and module loads for g-leaderboard + +export PYTHON_VERSION=3.10.14 + +export G_LEADERBOARD_TAG=g-leaderboard diff --git a/evaluation/installers/g-leaderboard/scripts/envs/llm-jp-nvlink/environment.sh b/evaluation/installers/g-leaderboard/scripts/envs/llm-jp-nvlink/environment.sh new file mode 100644 index 00000000..e69de29b diff --git a/evaluation/installers/g-leaderboard/scripts/envs/llm-jp/environment.sh b/evaluation/installers/g-leaderboard/scripts/envs/llm-jp/environment.sh new file mode 100644 index 00000000..e69de29b diff --git a/evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh b/evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh new file mode 100644 index 00000000..ad20ca48 --- /dev/null +++ b/evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh @@ -0,0 +1,6 @@ +export CUDA_VERSION_MAJOR=12 +export CUDA_VERSION_MINOR=1 +export CUDA_VERSION=${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} +export CUDNN_VERSION=8.9.4 +module load cuda/${CUDA_VERSION} +module load /data/cudnn-tmp-install/modulefiles/${CUDNN_VERSION} diff --git a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh new file mode 100644 index 00000000..7d57453f --- /dev/null +++ b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh @@ -0,0 +1,61 @@ +#!/bin/bash +#SBATCH --job-name=g-leaderboard +#SBATCH --partition= +#SBATCH --exclusive +#SBATCH --nodes=1 +#SBATCH --gpus=1 +#SBATCH --ntasks-per-node=8 +#SBATCH --output=logs/%x-%j.out +#SBATCH --error=logs/%x-%j.err + +set -eux + +# Open file limit +ulimit -n 65536 1048576 + +ENV_DIR=environment +source ${ENV_DIR}/scripts/environment.sh +source ${ENV_DIR}/venv/bin/activate + +# Arguments +MODEL=$1 +WANDB_RUN_NAME=$2 + +# Semi-fixed vars +CONFIG_TEMPLATE=resources/config_base.yaml +TOKENIZER=$MODEL +WANDB_ENTITY=llm-jp-eval +WANDB_PROJECT=test + +# Fixed vars +G_LEADERBOARD_DIR=${ENV_DIR}/src/g-leaderboard +CONFIG_DIR=${G_LEADERBOARD_DIR}/configs +SCRIPT_PATH=${G_LEADERBOARD_DIR}/scripts/run_eval.py + +# Config settings +NEW_CONFIG=${CONFIG_DIR}/config.${WANDB_PROJECT}.${WANDB_RUN_NAME}.yaml +REPLACE_VARS=("MODEL" "TOKENIZER" "DATASET_DIR" "WANDB_ENTITY" "WANDB_PROJECT" "WANDB_RUN_NAME") + +# Create a new config file to save the config file of each run +cp $CONFIG_TEMPLATE $NEW_CONFIG + +# Replace variables +for VAR in "${REPLACE_VARS[@]}"; do + VALUE=$(eval echo \${$VAR}) + sed -i "s|<<${VAR}>>|${VALUE}|g" $NEW_CONFIG +done + +# Create a temporal project +TMP_G_LEADERBOARD_DIR=$(mktemp -d "${HOME}/ckpt_convert.XXXXXXXX") +cp -r $G_LEADERBOARD_DIR/* $TMP_G_LEADERBOARD_DIR +cp $NEW_CONFIG $TMP_G_LEADERBOARD_DIR/configs/config.yaml + +# Run g-leaderboard +pushd $TMP_G_LEADERBOARD_DIR +python $SCRIPT_PATH + +# Clean up +popd +rm -rf $TMP_G_LEADERBOARD_DIR + +echo "Done" From ab8acba21155ddadd0c8795b8b4bb8590b9f81ca Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Wed, 28 Aug 2024 17:36:09 +0900 Subject: [PATCH 02/20] add OpenAI-related environment variables --- evaluation/installers/g-leaderboard/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md index ce2d1d26..7f6b607b 100644 --- a/evaluation/installers/g-leaderboard/README.md +++ b/evaluation/installers/g-leaderboard/README.md @@ -76,7 +76,7 @@ CUDA_VISIBLE_DEVICES={num} bash run_g-leaderboard.sh {path/to/model} {wandb.run_ ```shell # For a cluster with SLURM -sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) +OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) # For a cluster without SLURM -CUDA_VISIBLE_DEVICES=0 bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) +OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) ``` From 683fd4cf294f2106926f54a8dea106488fafc8ae Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Wed, 28 Aug 2024 18:30:51 +0900 Subject: [PATCH 03/20] configure blended run --- .../resources/blended_run_config.yaml | 25 +++++++++++++++++++ .../scripts/run_g-leaderboard.sh | 5 ++++ 2 files changed, 30 insertions(+) create mode 100644 evaluation/installers/g-leaderboard/resources/blended_run_config.yaml diff --git a/evaluation/installers/g-leaderboard/resources/blended_run_config.yaml b/evaluation/installers/g-leaderboard/resources/blended_run_config.yaml new file mode 100644 index 00000000..640656f1 --- /dev/null +++ b/evaluation/installers/g-leaderboard/resources/blended_run_config.yaml @@ -0,0 +1,25 @@ +run_chain: false # If you want to reuse past evaluation results in a new run, please set it to true. + +new_run: # This setting is for blending runs without running new evaluations. If run_chain is set to true, this setting is disabled. + entity: "your/WANDB/entity" + project: "your/WANDB/project" + run_name: "your/WANDB/run_name" + +old_runs: # Please specify the tasks you want to carry over from past runs. Multiple runs are permissible. + - run_path: "your/WANDB/run_path" + tasks: # The list of tasks to take over. Please comment out tasks that do not need to be taken over. + - jaster_ja_0_shot + - jaster_ja_4_shot + - jaster_en_0_shot + - jaster_en_4_shot + - mtbench_ja + - mtbench_en + # - run_path: "your/WANDB/run_path" + # tasks: + # - jaster_ja_0_shot + # - jaster_ja_4_shot + # - jaster_en_0_shot + # - jaster_en_4_shot + # - mtbench_ja + # - mtbench_en + \ No newline at end of file diff --git a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh index 7d57453f..a9997481 100644 --- a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh +++ b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh @@ -45,6 +45,11 @@ for VAR in "${REPLACE_VARS[@]}"; do sed -i "s|<<${VAR}>>|${VALUE}|g" $NEW_CONFIG done +# Blended run config settings +BLENDED_RUN_CONFIG=resources/blended_run_config.yaml +BLENDED_RUN_CONFIG_DIR=${G_LEADERBOARD_DIR}/blend_run_configs +cp $BLENDED_RUN_CONFIG ${BLENDED_RUN_CONFIG_DIR}/config.yaml + # Create a temporal project TMP_G_LEADERBOARD_DIR=$(mktemp -d "${HOME}/ckpt_convert.XXXXXXXX") cp -r $G_LEADERBOARD_DIR/* $TMP_G_LEADERBOARD_DIR From d1a5060819d9ad042fabda3d117ba6c770c9bc20 Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Wed, 28 Aug 2024 18:41:10 +0900 Subject: [PATCH 04/20] fix --- evaluation/installers/g-leaderboard/install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation/installers/g-leaderboard/install.sh b/evaluation/installers/g-leaderboard/install.sh index 1149ea28..16e1719c 100644 --- a/evaluation/installers/g-leaderboard/install.sh +++ b/evaluation/installers/g-leaderboard/install.sh @@ -102,7 +102,7 @@ python -m pip install --no-cache-dir -U pip setuptools # Install g-leaderboard pushd src -git clone https://github.com/wandb/llm-leaderboard g-leaderboard -b v${G_LEADERBOARD_TAG} +git clone https://github.com/wandb/llm-leaderboard g-leaderboard -b ${G_LEADERBOARD_TAG} pushd g-leaderboard pip install --no-cache-dir requirements.txt From 08f3fb3a2fa16673c42af3feb6bbd7860918d0e8 Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Wed, 28 Aug 2024 19:33:30 +0900 Subject: [PATCH 05/20] fix indent --- evaluation/installers/g-leaderboard/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md index 7f6b607b..869580da 100644 --- a/evaluation/installers/g-leaderboard/README.md +++ b/evaluation/installers/g-leaderboard/README.md @@ -40,10 +40,10 @@ The following directory structure will be created after installation. ``` ~/g-leaderboard/ - run_g-leaderboard.sh Script for running g-leaderboard - logs/ Log files for SLURM jobs + run_g-leaderboard.sh Script for running g-leaderboard + logs/ Log files for SLURM jobs resources/ - config_base.yaml Configuration file template + config_base.yaml Configuration file template environment/ installer_envvar.log List of environment variables recorded during installation install.sh Installation script From 04130c063b84c8b8ea7cadbf11a8b2326ffd117f Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Wed, 28 Aug 2024 19:33:36 +0900 Subject: [PATCH 06/20] fix --- evaluation/installers/g-leaderboard/install.sh | 4 ++-- .../installers/g-leaderboard/scripts/run_g-leaderboard.sh | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/evaluation/installers/g-leaderboard/install.sh b/evaluation/installers/g-leaderboard/install.sh index 16e1719c..00564723 100644 --- a/evaluation/installers/g-leaderboard/install.sh +++ b/evaluation/installers/g-leaderboard/install.sh @@ -56,6 +56,7 @@ pushd $TARGET_DIR cp ${INSTALLER_DIR}/scripts/run_g-leaderboard.sh . mkdir resources cp ${INSTALLER_DIR}/resources/config_base.yaml resources/ +cp ${INSTALLER_DIR}/resources/blended_run_config.yaml resources/ mkdir logs ENV_DIR=${TARGET_DIR}/environment @@ -98,12 +99,11 @@ popd # $ENV_DIR # Prepare venv python/bin/python3 -m venv venv source venv/bin/activate -python -m pip install --no-cache-dir -U pip setuptools # Install g-leaderboard pushd src git clone https://github.com/wandb/llm-leaderboard g-leaderboard -b ${G_LEADERBOARD_TAG} pushd g-leaderboard -pip install --no-cache-dir requirements.txt +pip install --no-cache-dir -r requirements.txt echo "Installation done." | tee >(cat >&2) diff --git a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh index a9997481..b661b9c0 100644 --- a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh +++ b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh @@ -30,11 +30,10 @@ WANDB_PROJECT=test # Fixed vars G_LEADERBOARD_DIR=${ENV_DIR}/src/g-leaderboard CONFIG_DIR=${G_LEADERBOARD_DIR}/configs -SCRIPT_PATH=${G_LEADERBOARD_DIR}/scripts/run_eval.py # Config settings NEW_CONFIG=${CONFIG_DIR}/config.${WANDB_PROJECT}.${WANDB_RUN_NAME}.yaml -REPLACE_VARS=("MODEL" "TOKENIZER" "DATASET_DIR" "WANDB_ENTITY" "WANDB_PROJECT" "WANDB_RUN_NAME") +REPLACE_VARS=("MODEL" "TOKENIZER" "WANDB_ENTITY" "WANDB_PROJECT" "WANDB_RUN_NAME") # Create a new config file to save the config file of each run cp $CONFIG_TEMPLATE $NEW_CONFIG @@ -51,11 +50,12 @@ BLENDED_RUN_CONFIG_DIR=${G_LEADERBOARD_DIR}/blend_run_configs cp $BLENDED_RUN_CONFIG ${BLENDED_RUN_CONFIG_DIR}/config.yaml # Create a temporal project -TMP_G_LEADERBOARD_DIR=$(mktemp -d "${HOME}/ckpt_convert.XXXXXXXX") +TMP_G_LEADERBOARD_DIR=$(mktemp -d "${ENV_DIR}/src/g-leaderboard.XXXXXXXX") cp -r $G_LEADERBOARD_DIR/* $TMP_G_LEADERBOARD_DIR cp $NEW_CONFIG $TMP_G_LEADERBOARD_DIR/configs/config.yaml # Run g-leaderboard +SCRIPT_PATH=scripts/run_eval.py pushd $TMP_G_LEADERBOARD_DIR python $SCRIPT_PATH From 820cba128847274d13b591a7227776f3a920e19d Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Thu, 29 Aug 2024 10:59:29 +0900 Subject: [PATCH 07/20] use env command --- evaluation/installers/g-leaderboard/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md index 869580da..cee83e7a 100644 --- a/evaluation/installers/g-leaderboard/README.md +++ b/evaluation/installers/g-leaderboard/README.md @@ -76,7 +76,7 @@ CUDA_VISIBLE_DEVICES={num} bash run_g-leaderboard.sh {path/to/model} {wandb.run_ ```shell # For a cluster with SLURM -OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) +env OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) # For a cluster without SLURM -OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) +env OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) ``` From 1f385349bd8c0dd24e09f6cc7852506dd6c4cb8a Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Thu, 29 Aug 2024 12:04:49 +0900 Subject: [PATCH 08/20] fix envvar name --- evaluation/installers/g-leaderboard/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md index cee83e7a..45a4ddd5 100644 --- a/evaluation/installers/g-leaderboard/README.md +++ b/evaluation/installers/g-leaderboard/README.md @@ -76,7 +76,7 @@ CUDA_VISIBLE_DEVICES={num} bash run_g-leaderboard.sh {path/to/model} {wandb.run_ ```shell # For a cluster with SLURM -env OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) +env AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) # For a cluster without SLURM -env OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) +env AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) ``` From 392fe0084ab6d0e607949d23f2be327f386374ef Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Thu, 29 Aug 2024 12:23:00 +0900 Subject: [PATCH 09/20] update readme --- evaluation/installers/g-leaderboard/README.md | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md index 45a4ddd5..06f799b7 100644 --- a/evaluation/installers/g-leaderboard/README.md +++ b/evaluation/installers/g-leaderboard/README.md @@ -1,6 +1,6 @@ # g-leaderboard (GENIAC Official Evaluation) installation and execution script -This repository contains scripts for evaluating LLMs using g-leaderboard. +This repository contains scripts for evaluating LLMs using [g-leaderboard](https://github.com/wandb/llm-leaderboard/tree/g-leaderboard). ## Usage @@ -14,7 +14,7 @@ cd scripts/evaluation/installers/g-leaderboard ``` Then, run the installation script. -The following command will create a working directory under the specified directory (`~/g-leaderboard`). +The following command will create a working directory under the specified directory (here, `~/g-leaderboard`). `` should be the name of the environment (llm-jp, llm-jp-nvlink, sakura, etc). The list of available environment names can be found in the `scripts/envs` directory. @@ -41,13 +41,13 @@ The following directory structure will be created after installation. ``` ~/g-leaderboard/ run_g-leaderboard.sh Script for running g-leaderboard - logs/ Log files for SLURM jobs + logs/ Log files written by SLURM jobs resources/ config_base.yaml Configuration file template environment/ installer_envvar.log List of environment variables recorded during installation install.sh Installation script - python/ Python + python/ Python built from source scripts/ Scripts for environment settings src/ Downloaded libraries venv/ Python virtual environemnt (linked to python/) @@ -57,7 +57,7 @@ The following directory structure will be created after installation. Replace variables as needed in `run_g-leaderboard.sh` and `resources/config_base.yaml`. - To edit tokenizer, wandb entity, and/or wandb project: Edit `run_g-leaderboard.sh`. - - To edit others: Edit `resources/config_base.yaml` and `run_g-leaderboard.sh`. + - Otherwise: Edit `resources/config_base.yaml` and `run_g-leaderboard.sh`. ```shell cd ~/g-leaderboard @@ -67,16 +67,20 @@ cp run_g-leaderboard.sh run_g-leaderboard_custom.sh # Set `resources/config_custom.yaml` in run_g-leaderboard_custom.sh # For a cluster with SLURM -sbatch --partition {partition} run_g-leaderboard.sh {path/to/model} {wandb.run_name} +AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh {path/to/model} {wandb.run_name} # For a cluster without SLURM -CUDA_VISIBLE_DEVICES={num} bash run_g-leaderboard.sh {path/to/model} {wandb.run_name} +CUDA_VISIBLE_DEVICES= AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh {path/to/model} {wandb.run_name} ``` #### Sample code ```shell # For a cluster with SLURM -env AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) +AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) # For a cluster without SLURM -env AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) +AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) ``` + +### About Azure OpenAI API + +To conduct an evaluation, you must configure the Azure OpenAI API by setting the endpoint and key for the deployment named `gpt-4`, which uses `gpt-4-0613`. Please contact the administrator to obtain the necessary endpoint and key. From 347f71ad22e6aa5d0400a3c19a2677d9040aabd5 Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Thu, 29 Aug 2024 13:25:10 +0900 Subject: [PATCH 10/20] update documentation --- evaluation/installers/g-leaderboard/README.md | 8 ++++---- evaluation/installers/g-leaderboard/install.sh | 5 ++--- .../installers/g-leaderboard/scripts/run_g-leaderboard.sh | 1 + 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md index 06f799b7..3c8261f1 100644 --- a/evaluation/installers/g-leaderboard/README.md +++ b/evaluation/installers/g-leaderboard/README.md @@ -1,4 +1,4 @@ -# g-leaderboard (GENIAC Official Evaluation) installation and execution script +# LLM Evaluation using g-leaderboard (GENIAC Official Evaluation) This repository contains scripts for evaluating LLMs using [g-leaderboard](https://github.com/wandb/llm-leaderboard/tree/g-leaderboard). @@ -14,15 +14,15 @@ cd scripts/evaluation/installers/g-leaderboard ``` Then, run the installation script. -The following command will create a working directory under the specified directory (here, `~/g-leaderboard`). +The following command will create an installation directory under the specified directory (here, `~/g-leaderboard`). `` should be the name of the environment (llm-jp, llm-jp-nvlink, sakura, etc). The list of available environment names can be found in the `scripts/envs` directory. ```bash # For a cluster with SLURM -sbatch --partition {partition} install.sh ~/g-leaderboard +sbatch --partition {partition} install.sh {env-name} ~/g-leaderboard # For a cluster without SLURM -bash install.sh ~/g-leaderboard > logs/install.out 2> logs/install.err +bash install.sh {env-name} ~/g-leaderboard > logs/install.out 2> logs/install.err ``` After the installation is complete, set up the wandb and huggingface accounts. diff --git a/evaluation/installers/g-leaderboard/install.sh b/evaluation/installers/g-leaderboard/install.sh index 00564723..a6e29d46 100644 --- a/evaluation/installers/g-leaderboard/install.sh +++ b/evaluation/installers/g-leaderboard/install.sh @@ -3,7 +3,7 @@ # g-leaderboard installation script # # This script use CPU on a cluster. -# - In a SLURM environment, it is recommend to use CPU nodes. +# - In a SLURM environment, it is recommended to use CPU nodes. # # Usage: # On a cluster with SLURM: @@ -55,8 +55,7 @@ pushd $TARGET_DIR # Copy basic scripts for g-leaderboard cp ${INSTALLER_DIR}/scripts/run_g-leaderboard.sh . mkdir resources -cp ${INSTALLER_DIR}/resources/config_base.yaml resources/ -cp ${INSTALLER_DIR}/resources/blended_run_config.yaml resources/ +cp ${INSTALLER_DIR}/resources/* resources/ mkdir logs ENV_DIR=${TARGET_DIR}/environment diff --git a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh index b661b9c0..f7deaf78 100644 --- a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh +++ b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh @@ -50,6 +50,7 @@ BLENDED_RUN_CONFIG_DIR=${G_LEADERBOARD_DIR}/blend_run_configs cp $BLENDED_RUN_CONFIG ${BLENDED_RUN_CONFIG_DIR}/config.yaml # Create a temporal project +# NOTE: This is necessary to avoid using incorrect configurations when running multiple jobs at the same time. TMP_G_LEADERBOARD_DIR=$(mktemp -d "${ENV_DIR}/src/g-leaderboard.XXXXXXXX") cp -r $G_LEADERBOARD_DIR/* $TMP_G_LEADERBOARD_DIR cp $NEW_CONFIG $TMP_G_LEADERBOARD_DIR/configs/config.yaml From e36abfc0afbff4782088a133d8e69f860a440f95 Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Thu, 29 Aug 2024 14:03:11 +0900 Subject: [PATCH 11/20] update documentation --- evaluation/installers/g-leaderboard/README.md | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md index 3c8261f1..d1bb5598 100644 --- a/evaluation/installers/g-leaderboard/README.md +++ b/evaluation/installers/g-leaderboard/README.md @@ -55,17 +55,13 @@ The following directory structure will be created after installation. ### Evaluation -Replace variables as needed in `run_g-leaderboard.sh` and `resources/config_base.yaml`. - - To edit tokenizer, wandb entity, and/or wandb project: Edit `run_g-leaderboard.sh`. +The evaluation script takes the model path and wandb run name as arguments. +For the other settings, edit the configuration file `resources/config_base.yaml` and/or `resources/config_custom.yaml`. + - To edit the tokenizer, wandb entity, and/or wandb project: Edit `run_g-leaderboard.sh`. - Otherwise: Edit `resources/config_base.yaml` and `run_g-leaderboard.sh`. ```shell cd ~/g-leaderboard -# (Optional) If you need to change variables -cp resources/config_base.yaml resources/config_custom.yaml -cp run_g-leaderboard.sh run_g-leaderboard_custom.sh -# Set `resources/config_custom.yaml` in run_g-leaderboard_custom.sh - # For a cluster with SLURM AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh {path/to/model} {wandb.run_name} # For a cluster without SLURM From 93cc6637d6c78babdbc2baf5a7fdb5d1e110dbf0 Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Thu, 29 Aug 2024 14:08:50 +0900 Subject: [PATCH 12/20] update documentation --- evaluation/installers/g-leaderboard/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md index d1bb5598..06fdbf7f 100644 --- a/evaluation/installers/g-leaderboard/README.md +++ b/evaluation/installers/g-leaderboard/README.md @@ -79,4 +79,4 @@ AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/ ### About Azure OpenAI API -To conduct an evaluation, you must configure the Azure OpenAI API by setting the endpoint and key for the deployment named `gpt-4`, which uses `gpt-4-0613`. Please contact the administrator to obtain the necessary endpoint and key. +To conduct an evaluation, you must configure the Azure OpenAI API by setting the endpoint and key for the deployment named `gpt-4`, which corresponds to `gpt-4-0613`. Please contact the administrator to obtain the necessary endpoint and key. From 44ee980b1efeb74c44c6c615eacccad334ed80df Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Thu, 29 Aug 2024 14:22:40 +0900 Subject: [PATCH 13/20] update readme --- evaluation/installers/g-leaderboard/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md index 06fdbf7f..347f7582 100644 --- a/evaluation/installers/g-leaderboard/README.md +++ b/evaluation/installers/g-leaderboard/README.md @@ -19,8 +19,11 @@ The following command will create an installation directory under the specified The list of available environment names can be found in the `scripts/envs` directory. ```bash +# NOTE: Using a CPU node is recommended as the installation process doesn't require GPUs + # For a cluster with SLURM sbatch --partition {partition} install.sh {env-name} ~/g-leaderboard + # For a cluster without SLURM bash install.sh {env-name} ~/g-leaderboard > logs/install.out 2> logs/install.err ``` @@ -62,8 +65,10 @@ For the other settings, edit the configuration file `resources/config_base.yaml` ```shell cd ~/g-leaderboard + # For a cluster with SLURM AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh {path/to/model} {wandb.run_name} + # For a cluster without SLURM CUDA_VISIBLE_DEVICES= AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh {path/to/model} {wandb.run_name} ``` @@ -73,6 +78,7 @@ CUDA_VISIBLE_DEVICES= AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash r ```shell # For a cluster with SLURM AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) + # For a cluster without SLURM AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami) ``` From 33d382759708afaa480ac610c55921986ceb67f8 Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Thu, 29 Aug 2024 14:55:53 +0900 Subject: [PATCH 14/20] fix mtbench.model_id --- evaluation/installers/g-leaderboard/resources/config_base.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation/installers/g-leaderboard/resources/config_base.yaml b/evaluation/installers/g-leaderboard/resources/config_base.yaml index c5eaaa2c..2679945e 100644 --- a/evaluation/installers/g-leaderboard/resources/config_base.yaml +++ b/evaluation/installers/g-leaderboard/resources/config_base.yaml @@ -42,7 +42,7 @@ llm_jp_eval: # for mtbench mtbench: - model_id: "nii--llama-2-175b-exp2-instruct" # cannot use '<', '>', ':', '"', '/', '\\', '|', '?', '*', '.' + model_id: "<>" # cannot use '<', '>', ':', '"', '/', '\\', '|', '?', '*', '.' max_new_token: 1024 num_gpus_per_model: 8 num_gpus_total: 8 From 5191baa3c306e462b678b21e26a2a6f3bbc3cb78 Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru <13678589+hkiyomaru@users.noreply.github.com> Date: Thu, 29 Aug 2024 17:34:23 +0900 Subject: [PATCH 15/20] Update evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh Co-authored-by: YumaTsuta <67862948+YumaTsuta@users.noreply.github.com> --- .../installers/g-leaderboard/scripts/run_g-leaderboard.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh index f7deaf78..f08a94a5 100644 --- a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh +++ b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh @@ -3,7 +3,7 @@ #SBATCH --partition= #SBATCH --exclusive #SBATCH --nodes=1 -#SBATCH --gpus=1 +#SBATCH --gpus=8 #SBATCH --ntasks-per-node=8 #SBATCH --output=logs/%x-%j.out #SBATCH --error=logs/%x-%j.err From f11a2ee3591761b55e158a79ca5d4eb6a7afaac6 Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru <13678589+hkiyomaru@users.noreply.github.com> Date: Thu, 29 Aug 2024 17:34:33 +0900 Subject: [PATCH 16/20] Update evaluation/installers/g-leaderboard/scripts/env_common.sh Co-authored-by: YumaTsuta <67862948+YumaTsuta@users.noreply.github.com> --- evaluation/installers/g-leaderboard/scripts/env_common.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/evaluation/installers/g-leaderboard/scripts/env_common.sh b/evaluation/installers/g-leaderboard/scripts/env_common.sh index ac0467dd..ca7d1235 100644 --- a/evaluation/installers/g-leaderboard/scripts/env_common.sh +++ b/evaluation/installers/g-leaderboard/scripts/env_common.sh @@ -1,5 +1,6 @@ # List of environment variables and module loads for g-leaderboard +export LANG=ja_JP.UTF-8 export PYTHON_VERSION=3.10.14 export G_LEADERBOARD_TAG=g-leaderboard From b39adafa76c2335fbe3761abbae64ecab5235a66 Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Thu, 29 Aug 2024 17:38:44 +0900 Subject: [PATCH 17/20] hardcode to use g-leaderboard branch --- evaluation/installers/g-leaderboard/install.sh | 2 +- evaluation/installers/g-leaderboard/scripts/env_common.sh | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/evaluation/installers/g-leaderboard/install.sh b/evaluation/installers/g-leaderboard/install.sh index a6e29d46..406d1415 100644 --- a/evaluation/installers/g-leaderboard/install.sh +++ b/evaluation/installers/g-leaderboard/install.sh @@ -101,7 +101,7 @@ source venv/bin/activate # Install g-leaderboard pushd src -git clone https://github.com/wandb/llm-leaderboard g-leaderboard -b ${G_LEADERBOARD_TAG} +git clone https://github.com/wandb/llm-leaderboard g-leaderboard -b g-leaderboard pushd g-leaderboard pip install --no-cache-dir -r requirements.txt diff --git a/evaluation/installers/g-leaderboard/scripts/env_common.sh b/evaluation/installers/g-leaderboard/scripts/env_common.sh index ca7d1235..b031595d 100644 --- a/evaluation/installers/g-leaderboard/scripts/env_common.sh +++ b/evaluation/installers/g-leaderboard/scripts/env_common.sh @@ -2,5 +2,3 @@ export LANG=ja_JP.UTF-8 export PYTHON_VERSION=3.10.14 - -export G_LEADERBOARD_TAG=g-leaderboard From 7f1fb584375f3096050510153dfa5917e525cd2e Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Thu, 29 Aug 2024 17:42:34 +0900 Subject: [PATCH 18/20] deploy blended run condig during installation --- evaluation/installers/g-leaderboard/install.sh | 6 +++++- .../installers/g-leaderboard/scripts/run_g-leaderboard.sh | 5 ----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/evaluation/installers/g-leaderboard/install.sh b/evaluation/installers/g-leaderboard/install.sh index 406d1415..b63db457 100644 --- a/evaluation/installers/g-leaderboard/install.sh +++ b/evaluation/installers/g-leaderboard/install.sh @@ -55,7 +55,7 @@ pushd $TARGET_DIR # Copy basic scripts for g-leaderboard cp ${INSTALLER_DIR}/scripts/run_g-leaderboard.sh . mkdir resources -cp ${INSTALLER_DIR}/resources/* resources/ +cp ${INSTALLER_DIR}/resources/config_base.yaml resources/ mkdir logs ENV_DIR=${TARGET_DIR}/environment @@ -105,4 +105,8 @@ git clone https://github.com/wandb/llm-leaderboard g-leaderboard -b g-leaderboar pushd g-leaderboard pip install --no-cache-dir -r requirements.txt +# Deploy blended run config +BLENDED_RUN_CONFIG=${INSTALLER_DIR}/resources/blended_run_config.yaml +cp $BLENDED_RUN_CONFIG blend_run_configs/config.yaml + echo "Installation done." | tee >(cat >&2) diff --git a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh index f08a94a5..cf2de78b 100644 --- a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh +++ b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh @@ -44,11 +44,6 @@ for VAR in "${REPLACE_VARS[@]}"; do sed -i "s|<<${VAR}>>|${VALUE}|g" $NEW_CONFIG done -# Blended run config settings -BLENDED_RUN_CONFIG=resources/blended_run_config.yaml -BLENDED_RUN_CONFIG_DIR=${G_LEADERBOARD_DIR}/blend_run_configs -cp $BLENDED_RUN_CONFIG ${BLENDED_RUN_CONFIG_DIR}/config.yaml - # Create a temporal project # NOTE: This is necessary to avoid using incorrect configurations when running multiple jobs at the same time. TMP_G_LEADERBOARD_DIR=$(mktemp -d "${ENV_DIR}/src/g-leaderboard.XXXXXXXX") From a69083bc533458110483d1efa4f6dc999098e0cc Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Thu, 29 Aug 2024 17:57:48 +0900 Subject: [PATCH 19/20] update readme --- evaluation/installers/g-leaderboard/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md index 347f7582..5faa23e4 100644 --- a/evaluation/installers/g-leaderboard/README.md +++ b/evaluation/installers/g-leaderboard/README.md @@ -59,7 +59,7 @@ The following directory structure will be created after installation. ### Evaluation The evaluation script takes the model path and wandb run name as arguments. -For the other settings, edit the configuration file `resources/config_base.yaml` and/or `resources/config_custom.yaml`. +For the other settings, edit the configuration file `resources/config_base.yaml` and/or `run_g-leaderboard.sh`. - To edit the tokenizer, wandb entity, and/or wandb project: Edit `run_g-leaderboard.sh`. - Otherwise: Edit `resources/config_base.yaml` and `run_g-leaderboard.sh`. From c8050c1336607b77b4cdf80529ce88acfb03394c Mon Sep 17 00:00:00 2001 From: Hirokazu Kiyomaru Date: Fri, 30 Aug 2024 11:27:30 +0900 Subject: [PATCH 20/20] remove env-specific process --- evaluation/installers/g-leaderboard/README.md | 6 ++-- .../installers/g-leaderboard/install.sh | 33 +++---------------- .../scripts/{env_common.sh => environment.sh} | 0 .../scripts/envs/llm-jp-nvlink/environment.sh | 0 .../scripts/envs/llm-jp/environment.sh | 0 .../scripts/envs/sakura/environment.sh | 6 ---- 6 files changed, 7 insertions(+), 38 deletions(-) rename evaluation/installers/g-leaderboard/scripts/{env_common.sh => environment.sh} (100%) delete mode 100644 evaluation/installers/g-leaderboard/scripts/envs/llm-jp-nvlink/environment.sh delete mode 100644 evaluation/installers/g-leaderboard/scripts/envs/llm-jp/environment.sh delete mode 100644 evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md index 5faa23e4..12192a3b 100644 --- a/evaluation/installers/g-leaderboard/README.md +++ b/evaluation/installers/g-leaderboard/README.md @@ -15,17 +15,15 @@ cd scripts/evaluation/installers/g-leaderboard Then, run the installation script. The following command will create an installation directory under the specified directory (here, `~/g-leaderboard`). -`` should be the name of the environment (llm-jp, llm-jp-nvlink, sakura, etc). -The list of available environment names can be found in the `scripts/envs` directory. ```bash # NOTE: Using a CPU node is recommended as the installation process doesn't require GPUs # For a cluster with SLURM -sbatch --partition {partition} install.sh {env-name} ~/g-leaderboard +sbatch --partition {partition} install.sh ~/g-leaderboard # For a cluster without SLURM -bash install.sh {env-name} ~/g-leaderboard > logs/install.out 2> logs/install.err +bash install.sh ~/g-leaderboard > logs/install.out 2> logs/install.err ``` After the installation is complete, set up the wandb and huggingface accounts. diff --git a/evaluation/installers/g-leaderboard/install.sh b/evaluation/installers/g-leaderboard/install.sh index b63db457..9b0747fb 100644 --- a/evaluation/installers/g-leaderboard/install.sh +++ b/evaluation/installers/g-leaderboard/install.sh @@ -22,30 +22,18 @@ set -eux -o pipefail -ENV_CHOICES=($(ls scripts/envs)) -TARGET_ENV_MSG="Set TARGET_ENV from (${ENV_CHOICES[@]} ) or add a new configuration in 'scripts/envs'." - -if [ $# -ne 2 ]; then +if [ $# -ne 1 ]; then set +x - >&2 echo Usage: sbatch \(or bash\) install.sh TARGET_ENV TARGET_DIR - >&2 echo $TARGET_ENV_MSG + >&2 echo Usage: sbatch \(or bash\) install.sh TARGET_DIR exit 1 fi INSTALLER_DIR=$(pwd) -TARGET_ENV=$1 -TARGET_DIR=$2 +TARGET_DIR=$1 INSTALLER_COMMON=$INSTALLER_DIR/../../../common/installers.sh -if [[ ! " ${ENV_CHOICES[@]} " =~ " ${TARGET_ENV} " ]]; then - set +x - >&2 echo $TARGET_ENV_MSG - exit 1 -fi - >&2 echo INSTALLER_DIR=$INSTALLER_DIR >&2 echo TARGET_DIR=$TARGET_DIR ->&2 echo TARGET_ENV=$TARGET_ENV >&2 echo INSTALLER_COMMON=$INSTALLER_COMMON source $INSTALLER_COMMON @@ -67,20 +55,9 @@ cp ${INSTALLER_DIR}/install.sh . mkdir scripts # Create environment.sh -BASE_ENV_SHELL=${INSTALLER_DIR}/scripts/env_common.sh -EXT_ENV_SHELL=${INSTALLER_DIR}/scripts/envs/${TARGET_ENV}/environment.sh +BASE_ENV_SHELL=${INSTALLER_DIR}/scripts/environment.sh NEW_ENV_SHELL=scripts/environment.sh - -print_env_shell() { - echo "#!/bin/bash" - echo - echo "# from $BASE_ENV_SHELL" - cat $BASE_ENV_SHELL - echo - echo "# from $EXT_ENV_SHELL" - cat $EXT_ENV_SHELL -} -print_env_shell > $NEW_ENV_SHELL +cp $BASE_ENV_SHELL $NEW_ENV_SHELL source $NEW_ENV_SHELL diff --git a/evaluation/installers/g-leaderboard/scripts/env_common.sh b/evaluation/installers/g-leaderboard/scripts/environment.sh similarity index 100% rename from evaluation/installers/g-leaderboard/scripts/env_common.sh rename to evaluation/installers/g-leaderboard/scripts/environment.sh diff --git a/evaluation/installers/g-leaderboard/scripts/envs/llm-jp-nvlink/environment.sh b/evaluation/installers/g-leaderboard/scripts/envs/llm-jp-nvlink/environment.sh deleted file mode 100644 index e69de29b..00000000 diff --git a/evaluation/installers/g-leaderboard/scripts/envs/llm-jp/environment.sh b/evaluation/installers/g-leaderboard/scripts/envs/llm-jp/environment.sh deleted file mode 100644 index e69de29b..00000000 diff --git a/evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh b/evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh deleted file mode 100644 index ad20ca48..00000000 --- a/evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh +++ /dev/null @@ -1,6 +0,0 @@ -export CUDA_VERSION_MAJOR=12 -export CUDA_VERSION_MINOR=1 -export CUDA_VERSION=${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} -export CUDNN_VERSION=8.9.4 -module load cuda/${CUDA_VERSION} -module load /data/cudnn-tmp-install/modulefiles/${CUDNN_VERSION}