From 5c6f082f64c9a0ddb7612dfd558a26ee121562ed Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Wed, 28 Aug 2024 17:32:30 +0900
Subject: [PATCH 01/20] [wip] add g-leaderboard

---
 evaluation/installers/g-leaderboard/README.md |  82 +++++++++++++
 .../installers/g-leaderboard/install.sh       | 109 ++++++++++++++++++
 .../installers/g-leaderboard/logs/.gitignore  |   4 +
 .../g-leaderboard/resources/config_base.yaml  |  86 ++++++++++++++
 .../g-leaderboard/scripts/env_common.sh       |   5 +
 .../scripts/envs/llm-jp-nvlink/environment.sh |   0
 .../scripts/envs/llm-jp/environment.sh        |   0
 .../scripts/envs/sakura/environment.sh        |   6 +
 .../scripts/run_g-leaderboard.sh              |  61 ++++++++++
 9 files changed, 353 insertions(+)
 create mode 100644 evaluation/installers/g-leaderboard/README.md
 create mode 100644 evaluation/installers/g-leaderboard/install.sh
 create mode 100644 evaluation/installers/g-leaderboard/logs/.gitignore
 create mode 100644 evaluation/installers/g-leaderboard/resources/config_base.yaml
 create mode 100644 evaluation/installers/g-leaderboard/scripts/env_common.sh
 create mode 100644 evaluation/installers/g-leaderboard/scripts/envs/llm-jp-nvlink/environment.sh
 create mode 100644 evaluation/installers/g-leaderboard/scripts/envs/llm-jp/environment.sh
 create mode 100644 evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh
 create mode 100644 evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md
new file mode 100644
index 00000000..ce2d1d26
--- /dev/null
+++ b/evaluation/installers/g-leaderboard/README.md
@@ -0,0 +1,82 @@
+# g-leaderboard (GENIAC Official Evaluation) installation and execution script
+
+This repository contains scripts for evaluating LLMs using g-leaderboard.
+
+## Usage
+
+### Build
+
+Clone this repository and move to the installation directory.
+
+```bash
+git clone https://github.com/llm-jp/scripts
+cd scripts/evaluation/installers/g-leaderboard
+```
+
+Then, run the installation script.
+The following command will create a working directory under the specified directory (`~/g-leaderboard`).
+`<env-name>` should be the name of the environment (llm-jp, llm-jp-nvlink, sakura, etc).
+The list of available environment names can be found in the `scripts/envs` directory.
+
+```bash
+# For a cluster with SLURM
+sbatch --partition {partition} install.sh <env-name> ~/g-leaderboard
+# For a cluster without SLURM
+bash install.sh <env-name> ~/g-leaderboard > logs/install.out 2> logs/install.err
+```
+
+After the installation is complete, set up the wandb and huggingface accounts.
+
+```shell
+cd ~/g-leaderboard
+source environment/venv/bin/activate
+wandb login
+huggingface-cli login
+```
+
+### Contents in installed directory (~/g-leaderboard)
+
+The following directory structure will be created after installation.
+
+```
+~/g-leaderboard/
+    run_g-leaderboard.sh  Script for running g-leaderboard
+    logs/                 Log files for SLURM jobs
+    resources/
+        config_base.yaml  Configuration file template
+    environment/
+        installer_envvar.log  List of environment variables recorded during installation
+        install.sh            Installation script
+        python/               Python
+        scripts/              Scripts for environment settings
+        src/                  Downloaded libraries
+        venv/                 Python virtual environemnt (linked to python/)
+```
+
+### Evaluation
+
+Replace variables as needed in `run_g-leaderboard.sh` and `resources/config_base.yaml`.
+ - To edit tokenizer, wandb entity, and/or wandb project: Edit `run_g-leaderboard.sh`.
+ - To edit others: Edit `resources/config_base.yaml` and `run_g-leaderboard.sh`.
+
+```shell
+cd ~/g-leaderboard
+# (Optional) If you need to change variables
+cp resources/config_base.yaml resources/config_custom.yaml
+cp run_g-leaderboard.sh run_g-leaderboard_custom.sh
+# Set `resources/config_custom.yaml` in run_g-leaderboard_custom.sh
+
+# For a cluster with SLURM
+sbatch --partition {partition} run_g-leaderboard.sh {path/to/model} {wandb.run_name}
+# For a cluster without SLURM
+CUDA_VISIBLE_DEVICES={num} bash run_g-leaderboard.sh {path/to/model} {wandb.run_name}
+```
+
+#### Sample code
+
+```shell
+# For a cluster with SLURM
+sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
+# For a cluster without SLURM
+CUDA_VISIBLE_DEVICES=0 bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
+```
diff --git a/evaluation/installers/g-leaderboard/install.sh b/evaluation/installers/g-leaderboard/install.sh
new file mode 100644
index 00000000..1149ea28
--- /dev/null
+++ b/evaluation/installers/g-leaderboard/install.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+#
+# g-leaderboard installation script
+#
+# This script use CPU on a cluster.
+#  - In a SLURM environment, it is recommend to use CPU nodes.
+#
+# Usage:
+# On a cluster with SLURM:
+#   Run `sbatch --paratition {partition} install.sh TARGET_DIR`
+# On a cluster without SLURM:
+#   Run `bash install.sh TARGET_DIR > logs/install-eval.out 2> logs/install-eval.err`
+# - TARGET_DIR: Instalation directory
+#
+#SBATCH --job-name=install-g-leaderboard
+#SBATCH --partition={FIX_ME}
+#SBATCH --nodes=1
+#SBATCH --exclusive
+#SBATCH --mem=0
+#SBATCH --output=logs/%x-%j.out
+#SBATCH --error=logs/%x-%j.err
+
+set -eux -o pipefail
+
+ENV_CHOICES=($(ls scripts/envs))
+TARGET_ENV_MSG="Set TARGET_ENV from (${ENV_CHOICES[@]} ) or add a new configuration in 'scripts/envs'."
+
+if [ $# -ne 2 ]; then
+  set +x
+  >&2 echo Usage: sbatch \(or bash\)  install.sh TARGET_ENV TARGET_DIR
+  >&2 echo $TARGET_ENV_MSG
+  exit 1
+fi
+
+INSTALLER_DIR=$(pwd)
+TARGET_ENV=$1
+TARGET_DIR=$2
+INSTALLER_COMMON=$INSTALLER_DIR/../../../common/installers.sh
+
+if [[ ! " ${ENV_CHOICES[@]} " =~ " ${TARGET_ENV} " ]]; then
+  set +x
+  >&2 echo $TARGET_ENV_MSG
+  exit 1
+fi
+
+>&2 echo INSTALLER_DIR=$INSTALLER_DIR
+>&2 echo TARGET_DIR=$TARGET_DIR
+>&2 echo TARGET_ENV=$TARGET_ENV
+>&2 echo INSTALLER_COMMON=$INSTALLER_COMMON
+source $INSTALLER_COMMON
+
+mkdir -p $TARGET_DIR
+pushd $TARGET_DIR
+
+# Copy basic scripts for g-leaderboard
+cp ${INSTALLER_DIR}/scripts/run_g-leaderboard.sh .
+mkdir resources
+cp ${INSTALLER_DIR}/resources/config_base.yaml resources/
+mkdir logs
+
+ENV_DIR=${TARGET_DIR}/environment
+mkdir $ENV_DIR
+pushd $ENV_DIR
+
+# Copy enviroment scripts
+cp ${INSTALLER_DIR}/install.sh .
+mkdir scripts
+
+# Create environment.sh
+BASE_ENV_SHELL=${INSTALLER_DIR}/scripts/env_common.sh
+EXT_ENV_SHELL=${INSTALLER_DIR}/scripts/envs/${TARGET_ENV}/environment.sh
+NEW_ENV_SHELL=scripts/environment.sh
+
+print_env_shell() {
+    echo "#!/bin/bash"
+    echo
+    echo "# from $BASE_ENV_SHELL"
+    cat $BASE_ENV_SHELL
+    echo
+    echo "# from $EXT_ENV_SHELL"
+    cat $EXT_ENV_SHELL
+}
+print_env_shell > $NEW_ENV_SHELL
+
+source $NEW_ENV_SHELL
+
+# Record current environment variables
+set > installer_envvar.log
+
+# src is used to store all resources for from-scratch builds
+mkdir src
+pushd src
+
+# Install Python (function in $INSTALLER_COMMON)
+install_python v${PYTHON_VERSION} ${ENV_DIR}/python
+popd # $ENV_DIR
+
+# Prepare venv
+python/bin/python3 -m venv venv
+source venv/bin/activate
+python -m pip install --no-cache-dir -U pip setuptools
+
+# Install g-leaderboard
+pushd src
+git clone https://github.com/wandb/llm-leaderboard g-leaderboard -b v${G_LEADERBOARD_TAG}
+pushd g-leaderboard
+pip install --no-cache-dir requirements.txt
+
+echo "Installation done." | tee >(cat >&2)
diff --git a/evaluation/installers/g-leaderboard/logs/.gitignore b/evaluation/installers/g-leaderboard/logs/.gitignore
new file mode 100644
index 00000000..5e7d2734
--- /dev/null
+++ b/evaluation/installers/g-leaderboard/logs/.gitignore
@@ -0,0 +1,4 @@
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore
diff --git a/evaluation/installers/g-leaderboard/resources/config_base.yaml b/evaluation/installers/g-leaderboard/resources/config_base.yaml
new file mode 100644
index 00000000..c5eaaa2c
--- /dev/null
+++ b/evaluation/installers/g-leaderboard/resources/config_base.yaml
@@ -0,0 +1,86 @@
+testmode: false # If you want to test with a small amount of data, please set it to true.
+model_name: "<<WANDB_RUN_NAME>>" # will be used in Table
+
+wandb:
+  entity: "<<WANDB_ENTITY>>"
+  project: "<<WANDB_PROJECT>>"
+  run_name: "<<WANDB_RUN_NAME>>" # this run_name will be used as the name of run in leaderboard. Can be changed later
+
+# Tasks to run
+run_llm_jp_eval_ja_0_shot: true
+run_llm_jp_eval_ja_few_shots: true
+run_llm_jp_eval_en_0_shot: true
+run_llm_jp_eval_en_few_shots: true
+run_mt_bench_ja: true
+run_mt_bench_en: true
+
+model:
+  api: false # if you don't use api, please set "api" as "false". If you use api, please select from "openai", "anthoropic", "google", "cohere", "mistral", "amazon_bedrock"
+  use_wandb_artifacts: false # if you user wandb artifacts, please set true.
+  artifacts_path: null  # if you user wandb artifacts, please paste the link. if not, please leave it as "".
+  pretrained_model_name_or_path: "<<MODEL>>" #If you use openai api, put the name of model
+  device_map: "auto"
+  load_in_8bit: false
+  load_in_4bit: false
+
+# for llm-jp-eval
+llm_jp_eval:
+  max_seq_length: 4096
+  target_dataset: "all" # {all, jamp, janli, jcommonsenseqa, jemhopqa, jnli, jsem, jsick, jsquad, jsts, niilc, chabsa, mmlu_en}
+  ja_num_shots: 4 # if run_llm_jp_eval_ja_few_shots is true, please set the num of few shots. Default is 4
+  en_num_shots: 4 # run_llm_jp_eval_en_few_shots is true, please set the num of few shots. Default is 4
+  torch_dtype: "bf16" # {fp16, bf16, fp32}
+  # Items that do not need to be changed unless specifically intended.
+  dataset_artifact: "wandb-japan/llm-leaderboard/jaster:v11"
+  dataset_dir: "/jaster/1.2.6/evaluation/test"
+  ja: 
+    custom_prompt_template: "以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。\n\n### 指示:\n{instruction}\n\n### 入力:\n{input}\n\n### 応答:\n"
+    custom_fewshots_template: "\n\n### 入力:\n{input}\n\n### 応答:\n{output}"
+  en: 
+    custom_prompt_template: "以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。\n\n### 指示:\n{instruction}\n\n### 入力:\n{input}\n\n### 応答:\n"
+    custom_fewshots_template: "\n\n### 入力:\n{input}\n\n### 応答:\n{output}"
+
+# for mtbench
+mtbench:
+  model_id: "nii--llama-2-175b-exp2-instruct" # cannot use '<', '>', ':', '"', '/', '\\', '|', '?', '*', '.'  
+  max_new_token: 1024
+  num_gpus_per_model: 8
+  num_gpus_total: 8
+  max_gpu_memory: null
+  dtype: bfloat16 # None or float32 or float16 or bfloat16
+  use_azure: true # if you use azure openai service for evaluation, set true
+  # for conv template # added
+  custom_conv_template: true
+  # the following variables will be used when custom_conv_template is set as true
+  conv_name: "custom"
+  conv_sep: "\n\n### "
+  conv_stop_token_ids: "[2]"
+  conv_stop_str: "###"
+  conv_role_message_separator: ":\n"
+  conv_role_only_separator: ":\n"
+  ja:
+    conv_system_message: "以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。"
+    conv_roles: "('指示', '応答')"
+  en:
+    conv_system_message: "以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。"
+    conv_roles: "('指示', '応答')"
+  dataset: # Items that do not need to be changed unless specifically intended.
+    ja:
+      question_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_ja_question:v3" 
+      test_question_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_ja_question_small_for_test:v5"
+      referenceanswer_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_ja_referenceanswer:v1" 
+      test_referenceanswer_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_ja_referenceanswer_small_for_test:v1"
+      judge_prompt_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_ja_prompt:v1"
+      bench_name: "mt_bench_ja"
+    en:
+      question_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_en_question:v0"
+      test_question_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_en_question_small_for_test:v0"
+      referenceanswer_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_en_referenceanswer:v0" 
+      test_referenceanswer_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_en_referenceanswer_small_for_test:v0"
+      judge_prompt_artifacts_path: "wandb-japan/llm-leaderboard/mtbench_en_prompt:v0"
+      bench_name: "mt_bench_en"
+
+#==================================================================
+# Items that do not need to be changed unless specifically intended.
+#==================================================================
+github_version: g-eval-v1.0 #for recording
diff --git a/evaluation/installers/g-leaderboard/scripts/env_common.sh b/evaluation/installers/g-leaderboard/scripts/env_common.sh
new file mode 100644
index 00000000..ac0467dd
--- /dev/null
+++ b/evaluation/installers/g-leaderboard/scripts/env_common.sh
@@ -0,0 +1,5 @@
+# List of environment variables and module loads for g-leaderboard
+
+export PYTHON_VERSION=3.10.14
+
+export G_LEADERBOARD_TAG=g-leaderboard
diff --git a/evaluation/installers/g-leaderboard/scripts/envs/llm-jp-nvlink/environment.sh b/evaluation/installers/g-leaderboard/scripts/envs/llm-jp-nvlink/environment.sh
new file mode 100644
index 00000000..e69de29b
diff --git a/evaluation/installers/g-leaderboard/scripts/envs/llm-jp/environment.sh b/evaluation/installers/g-leaderboard/scripts/envs/llm-jp/environment.sh
new file mode 100644
index 00000000..e69de29b
diff --git a/evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh b/evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh
new file mode 100644
index 00000000..ad20ca48
--- /dev/null
+++ b/evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh
@@ -0,0 +1,6 @@
+export CUDA_VERSION_MAJOR=12
+export CUDA_VERSION_MINOR=1
+export CUDA_VERSION=${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}
+export CUDNN_VERSION=8.9.4
+module load cuda/${CUDA_VERSION}
+module load /data/cudnn-tmp-install/modulefiles/${CUDNN_VERSION}
diff --git a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
new file mode 100644
index 00000000..7d57453f
--- /dev/null
+++ b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+#SBATCH --job-name=g-leaderboard
+#SBATCH --partition=<partition>
+#SBATCH --exclusive
+#SBATCH --nodes=1
+#SBATCH --gpus=1
+#SBATCH --ntasks-per-node=8
+#SBATCH --output=logs/%x-%j.out
+#SBATCH --error=logs/%x-%j.err
+
+set -eux
+
+# Open file limit
+ulimit -n 65536 1048576
+
+ENV_DIR=environment
+source ${ENV_DIR}/scripts/environment.sh
+source ${ENV_DIR}/venv/bin/activate
+
+# Arguments
+MODEL=$1
+WANDB_RUN_NAME=$2
+
+# Semi-fixed vars
+CONFIG_TEMPLATE=resources/config_base.yaml
+TOKENIZER=$MODEL
+WANDB_ENTITY=llm-jp-eval
+WANDB_PROJECT=test
+
+# Fixed vars
+G_LEADERBOARD_DIR=${ENV_DIR}/src/g-leaderboard
+CONFIG_DIR=${G_LEADERBOARD_DIR}/configs
+SCRIPT_PATH=${G_LEADERBOARD_DIR}/scripts/run_eval.py
+
+# Config settings
+NEW_CONFIG=${CONFIG_DIR}/config.${WANDB_PROJECT}.${WANDB_RUN_NAME}.yaml
+REPLACE_VARS=("MODEL" "TOKENIZER" "DATASET_DIR" "WANDB_ENTITY" "WANDB_PROJECT" "WANDB_RUN_NAME")
+
+# Create a new config file to save the config file of each run
+cp $CONFIG_TEMPLATE $NEW_CONFIG
+
+# Replace variables
+for VAR in "${REPLACE_VARS[@]}"; do
+  VALUE=$(eval echo \${$VAR})
+  sed -i "s|<<${VAR}>>|${VALUE}|g" $NEW_CONFIG
+done
+
+# Create a temporal project
+TMP_G_LEADERBOARD_DIR=$(mktemp -d "${HOME}/ckpt_convert.XXXXXXXX")
+cp -r $G_LEADERBOARD_DIR/* $TMP_G_LEADERBOARD_DIR
+cp $NEW_CONFIG $TMP_G_LEADERBOARD_DIR/configs/config.yaml
+
+# Run g-leaderboard
+pushd $TMP_G_LEADERBOARD_DIR
+python $SCRIPT_PATH
+
+# Clean up
+popd
+rm -rf $TMP_G_LEADERBOARD_DIR
+
+echo "Done"

From ab8acba21155ddadd0c8795b8b4bb8590b9f81ca Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Wed, 28 Aug 2024 17:36:09 +0900
Subject: [PATCH 02/20] add OpenAI-related environment variables

---
 evaluation/installers/g-leaderboard/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md
index ce2d1d26..7f6b607b 100644
--- a/evaluation/installers/g-leaderboard/README.md
+++ b/evaluation/installers/g-leaderboard/README.md
@@ -76,7 +76,7 @@ CUDA_VISIBLE_DEVICES={num} bash run_g-leaderboard.sh {path/to/model} {wandb.run_
 
 ```shell
 # For a cluster with SLURM
-sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
+OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
 # For a cluster without SLURM
-CUDA_VISIBLE_DEVICES=0 bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
+OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
 ```

From 683fd4cf294f2106926f54a8dea106488fafc8ae Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Wed, 28 Aug 2024 18:30:51 +0900
Subject: [PATCH 03/20] configure blended run

---
 .../resources/blended_run_config.yaml         | 25 +++++++++++++++++++
 .../scripts/run_g-leaderboard.sh              |  5 ++++
 2 files changed, 30 insertions(+)
 create mode 100644 evaluation/installers/g-leaderboard/resources/blended_run_config.yaml

diff --git a/evaluation/installers/g-leaderboard/resources/blended_run_config.yaml b/evaluation/installers/g-leaderboard/resources/blended_run_config.yaml
new file mode 100644
index 00000000..640656f1
--- /dev/null
+++ b/evaluation/installers/g-leaderboard/resources/blended_run_config.yaml
@@ -0,0 +1,25 @@
+run_chain: false # If you want to reuse past evaluation results in a new run, please set it to true.
+
+new_run: # This setting is for blending runs without running new evaluations. If run_chain is set to true, this setting is disabled.
+  entity: "your/WANDB/entity"
+  project: "your/WANDB/project"
+  run_name: "your/WANDB/run_name"
+
+old_runs: # Please specify the tasks you want to carry over from past runs. Multiple runs are permissible.
+  - run_path: "your/WANDB/run_path"
+    tasks: # The list of tasks to take over. Please comment out tasks that do not need to be taken over.
+      - jaster_ja_0_shot
+      - jaster_ja_4_shot
+      - jaster_en_0_shot
+      - jaster_en_4_shot
+      - mtbench_ja
+      - mtbench_en
+  # - run_path: "your/WANDB/run_path"
+  #   tasks:
+  #     - jaster_ja_0_shot
+  #     - jaster_ja_4_shot
+  #     - jaster_en_0_shot
+  #     - jaster_en_4_shot
+  #     - mtbench_ja
+  #     - mtbench_en
+  
\ No newline at end of file
diff --git a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
index 7d57453f..a9997481 100644
--- a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
+++ b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
@@ -45,6 +45,11 @@ for VAR in "${REPLACE_VARS[@]}"; do
   sed -i "s|<<${VAR}>>|${VALUE}|g" $NEW_CONFIG
 done
 
+# Blended run config settings
+BLENDED_RUN_CONFIG=resources/blended_run_config.yaml
+BLENDED_RUN_CONFIG_DIR=${G_LEADERBOARD_DIR}/blend_run_configs
+cp $BLENDED_RUN_CONFIG ${BLENDED_RUN_CONFIG_DIR}/config.yaml
+
 # Create a temporal project
 TMP_G_LEADERBOARD_DIR=$(mktemp -d "${HOME}/ckpt_convert.XXXXXXXX")
 cp -r $G_LEADERBOARD_DIR/* $TMP_G_LEADERBOARD_DIR

From d1a5060819d9ad042fabda3d117ba6c770c9bc20 Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Wed, 28 Aug 2024 18:41:10 +0900
Subject: [PATCH 04/20] fix

---
 evaluation/installers/g-leaderboard/install.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluation/installers/g-leaderboard/install.sh b/evaluation/installers/g-leaderboard/install.sh
index 1149ea28..16e1719c 100644
--- a/evaluation/installers/g-leaderboard/install.sh
+++ b/evaluation/installers/g-leaderboard/install.sh
@@ -102,7 +102,7 @@ python -m pip install --no-cache-dir -U pip setuptools
 
 # Install g-leaderboard
 pushd src
-git clone https://github.com/wandb/llm-leaderboard g-leaderboard -b v${G_LEADERBOARD_TAG}
+git clone https://github.com/wandb/llm-leaderboard g-leaderboard -b ${G_LEADERBOARD_TAG}
 pushd g-leaderboard
 pip install --no-cache-dir requirements.txt
 

From 08f3fb3a2fa16673c42af3feb6bbd7860918d0e8 Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Wed, 28 Aug 2024 19:33:30 +0900
Subject: [PATCH 05/20] fix indent

---
 evaluation/installers/g-leaderboard/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md
index 7f6b607b..869580da 100644
--- a/evaluation/installers/g-leaderboard/README.md
+++ b/evaluation/installers/g-leaderboard/README.md
@@ -40,10 +40,10 @@ The following directory structure will be created after installation.
 
 ```
 ~/g-leaderboard/
-    run_g-leaderboard.sh  Script for running g-leaderboard
-    logs/                 Log files for SLURM jobs
+    run_g-leaderboard.sh      Script for running g-leaderboard
+    logs/                     Log files for SLURM jobs
     resources/
-        config_base.yaml  Configuration file template
+        config_base.yaml      Configuration file template
     environment/
         installer_envvar.log  List of environment variables recorded during installation
         install.sh            Installation script

From 04130c063b84c8b8ea7cadbf11a8b2326ffd117f Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Wed, 28 Aug 2024 19:33:36 +0900
Subject: [PATCH 06/20] fix

---
 evaluation/installers/g-leaderboard/install.sh              | 4 ++--
 .../installers/g-leaderboard/scripts/run_g-leaderboard.sh   | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/evaluation/installers/g-leaderboard/install.sh b/evaluation/installers/g-leaderboard/install.sh
index 16e1719c..00564723 100644
--- a/evaluation/installers/g-leaderboard/install.sh
+++ b/evaluation/installers/g-leaderboard/install.sh
@@ -56,6 +56,7 @@ pushd $TARGET_DIR
 cp ${INSTALLER_DIR}/scripts/run_g-leaderboard.sh .
 mkdir resources
 cp ${INSTALLER_DIR}/resources/config_base.yaml resources/
+cp ${INSTALLER_DIR}/resources/blended_run_config.yaml resources/
 mkdir logs
 
 ENV_DIR=${TARGET_DIR}/environment
@@ -98,12 +99,11 @@ popd # $ENV_DIR
 # Prepare venv
 python/bin/python3 -m venv venv
 source venv/bin/activate
-python -m pip install --no-cache-dir -U pip setuptools
 
 # Install g-leaderboard
 pushd src
 git clone https://github.com/wandb/llm-leaderboard g-leaderboard -b ${G_LEADERBOARD_TAG}
 pushd g-leaderboard
-pip install --no-cache-dir requirements.txt
+pip install --no-cache-dir -r requirements.txt
 
 echo "Installation done." | tee >(cat >&2)
diff --git a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
index a9997481..b661b9c0 100644
--- a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
+++ b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
@@ -30,11 +30,10 @@ WANDB_PROJECT=test
 # Fixed vars
 G_LEADERBOARD_DIR=${ENV_DIR}/src/g-leaderboard
 CONFIG_DIR=${G_LEADERBOARD_DIR}/configs
-SCRIPT_PATH=${G_LEADERBOARD_DIR}/scripts/run_eval.py
 
 # Config settings
 NEW_CONFIG=${CONFIG_DIR}/config.${WANDB_PROJECT}.${WANDB_RUN_NAME}.yaml
-REPLACE_VARS=("MODEL" "TOKENIZER" "DATASET_DIR" "WANDB_ENTITY" "WANDB_PROJECT" "WANDB_RUN_NAME")
+REPLACE_VARS=("MODEL" "TOKENIZER" "WANDB_ENTITY" "WANDB_PROJECT" "WANDB_RUN_NAME")
 
 # Create a new config file to save the config file of each run
 cp $CONFIG_TEMPLATE $NEW_CONFIG
@@ -51,11 +50,12 @@ BLENDED_RUN_CONFIG_DIR=${G_LEADERBOARD_DIR}/blend_run_configs
 cp $BLENDED_RUN_CONFIG ${BLENDED_RUN_CONFIG_DIR}/config.yaml
 
 # Create a temporal project
-TMP_G_LEADERBOARD_DIR=$(mktemp -d "${HOME}/ckpt_convert.XXXXXXXX")
+TMP_G_LEADERBOARD_DIR=$(mktemp -d "${ENV_DIR}/src/g-leaderboard.XXXXXXXX")
 cp -r $G_LEADERBOARD_DIR/* $TMP_G_LEADERBOARD_DIR
 cp $NEW_CONFIG $TMP_G_LEADERBOARD_DIR/configs/config.yaml
 
 # Run g-leaderboard
+SCRIPT_PATH=scripts/run_eval.py
 pushd $TMP_G_LEADERBOARD_DIR
 python $SCRIPT_PATH
 

From 820cba128847274d13b591a7227776f3a920e19d Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Thu, 29 Aug 2024 10:59:29 +0900
Subject: [PATCH 07/20] use env command

---
 evaluation/installers/g-leaderboard/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md
index 869580da..cee83e7a 100644
--- a/evaluation/installers/g-leaderboard/README.md
+++ b/evaluation/installers/g-leaderboard/README.md
@@ -76,7 +76,7 @@ CUDA_VISIBLE_DEVICES={num} bash run_g-leaderboard.sh {path/to/model} {wandb.run_
 
 ```shell
 # For a cluster with SLURM
-OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
+env OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
 # For a cluster without SLURM
-OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
+env OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
 ```

From 1f385349bd8c0dd24e09f6cc7852506dd6c4cb8a Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Thu, 29 Aug 2024 12:04:49 +0900
Subject: [PATCH 08/20] fix envvar name

---
 evaluation/installers/g-leaderboard/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md
index cee83e7a..45a4ddd5 100644
--- a/evaluation/installers/g-leaderboard/README.md
+++ b/evaluation/installers/g-leaderboard/README.md
@@ -76,7 +76,7 @@ CUDA_VISIBLE_DEVICES={num} bash run_g-leaderboard.sh {path/to/model} {wandb.run_
 
 ```shell
 # For a cluster with SLURM
-env OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
+env AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
 # For a cluster without SLURM
-env OPENAI_AZURE_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
+env AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
 ```

From 392fe0084ab6d0e607949d23f2be327f386374ef Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Thu, 29 Aug 2024 12:23:00 +0900
Subject: [PATCH 09/20] update readme

---
 evaluation/installers/g-leaderboard/README.md | 22 +++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md
index 45a4ddd5..06f799b7 100644
--- a/evaluation/installers/g-leaderboard/README.md
+++ b/evaluation/installers/g-leaderboard/README.md
@@ -1,6 +1,6 @@
 # g-leaderboard (GENIAC Official Evaluation) installation and execution script
 
-This repository contains scripts for evaluating LLMs using g-leaderboard.
+This repository contains scripts for evaluating LLMs using [g-leaderboard](https://github.com/wandb/llm-leaderboard/tree/g-leaderboard).
 
 ## Usage
 
@@ -14,7 +14,7 @@ cd scripts/evaluation/installers/g-leaderboard
 ```
 
 Then, run the installation script.
-The following command will create a working directory under the specified directory (`~/g-leaderboard`).
+The following command will create a working directory under the specified directory (here, `~/g-leaderboard`).
 `<env-name>` should be the name of the environment (llm-jp, llm-jp-nvlink, sakura, etc).
 The list of available environment names can be found in the `scripts/envs` directory.
 
@@ -41,13 +41,13 @@ The following directory structure will be created after installation.
 ```
 ~/g-leaderboard/
     run_g-leaderboard.sh      Script for running g-leaderboard
-    logs/                     Log files for SLURM jobs
+    logs/                     Log files written by SLURM jobs
     resources/
         config_base.yaml      Configuration file template
     environment/
         installer_envvar.log  List of environment variables recorded during installation
         install.sh            Installation script
-        python/               Python
+        python/               Python built from source
         scripts/              Scripts for environment settings
         src/                  Downloaded libraries
         venv/                 Python virtual environemnt (linked to python/)
@@ -57,7 +57,7 @@ The following directory structure will be created after installation.
 
 Replace variables as needed in `run_g-leaderboard.sh` and `resources/config_base.yaml`.
  - To edit tokenizer, wandb entity, and/or wandb project: Edit `run_g-leaderboard.sh`.
- - To edit others: Edit `resources/config_base.yaml` and `run_g-leaderboard.sh`.
+ - Otherwise: Edit `resources/config_base.yaml` and `run_g-leaderboard.sh`.
 
 ```shell
 cd ~/g-leaderboard
@@ -67,16 +67,20 @@ cp run_g-leaderboard.sh run_g-leaderboard_custom.sh
 # Set `resources/config_custom.yaml` in run_g-leaderboard_custom.sh
 
 # For a cluster with SLURM
-sbatch --partition {partition} run_g-leaderboard.sh {path/to/model} {wandb.run_name}
+AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh {path/to/model} {wandb.run_name}
 # For a cluster without SLURM
-CUDA_VISIBLE_DEVICES={num} bash run_g-leaderboard.sh {path/to/model} {wandb.run_name}
+CUDA_VISIBLE_DEVICES=<num> AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh {path/to/model} {wandb.run_name}
 ```
 
 #### Sample code
 
 ```shell
 # For a cluster with SLURM
-env AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
+AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
 # For a cluster without SLURM
-env AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
+AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
 ```
+
+### About Azure OpenAI API
+
+To conduct an evaluation, you must configure the Azure OpenAI API by setting the endpoint and key for the deployment named `gpt-4`, which uses `gpt-4-0613`. Please contact the administrator to obtain the necessary endpoint and key.

From 347f71ad22e6aa5d0400a3c19a2677d9040aabd5 Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Thu, 29 Aug 2024 13:25:10 +0900
Subject: [PATCH 10/20] update documentation

---
 evaluation/installers/g-leaderboard/README.md             | 8 ++++----
 evaluation/installers/g-leaderboard/install.sh            | 5 ++---
 .../installers/g-leaderboard/scripts/run_g-leaderboard.sh | 1 +
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md
index 06f799b7..3c8261f1 100644
--- a/evaluation/installers/g-leaderboard/README.md
+++ b/evaluation/installers/g-leaderboard/README.md
@@ -1,4 +1,4 @@
-# g-leaderboard (GENIAC Official Evaluation) installation and execution script
+# LLM Evaluation using g-leaderboard (GENIAC Official Evaluation)
 
 This repository contains scripts for evaluating LLMs using [g-leaderboard](https://github.com/wandb/llm-leaderboard/tree/g-leaderboard).
 
@@ -14,15 +14,15 @@ cd scripts/evaluation/installers/g-leaderboard
 ```
 
 Then, run the installation script.
-The following command will create a working directory under the specified directory (here, `~/g-leaderboard`).
+The following command will create an installation directory under the specified directory (here, `~/g-leaderboard`).
 `<env-name>` should be the name of the environment (llm-jp, llm-jp-nvlink, sakura, etc).
 The list of available environment names can be found in the `scripts/envs` directory.
 
 ```bash
 # For a cluster with SLURM
-sbatch --partition {partition} install.sh <env-name> ~/g-leaderboard
+sbatch --partition {partition} install.sh {env-name} ~/g-leaderboard
 # For a cluster without SLURM
-bash install.sh <env-name> ~/g-leaderboard > logs/install.out 2> logs/install.err
+bash install.sh {env-name} ~/g-leaderboard > logs/install.out 2> logs/install.err
 ```
 
 After the installation is complete, set up the wandb and huggingface accounts.
diff --git a/evaluation/installers/g-leaderboard/install.sh b/evaluation/installers/g-leaderboard/install.sh
index 00564723..a6e29d46 100644
--- a/evaluation/installers/g-leaderboard/install.sh
+++ b/evaluation/installers/g-leaderboard/install.sh
@@ -3,7 +3,7 @@
 # g-leaderboard installation script
 #
 # This script use CPU on a cluster.
-#  - In a SLURM environment, it is recommend to use CPU nodes.
+#  - In a SLURM environment, it is recommended to use CPU nodes.
 #
 # Usage:
 # On a cluster with SLURM:
@@ -55,8 +55,7 @@ pushd $TARGET_DIR
 # Copy basic scripts for g-leaderboard
 cp ${INSTALLER_DIR}/scripts/run_g-leaderboard.sh .
 mkdir resources
-cp ${INSTALLER_DIR}/resources/config_base.yaml resources/
-cp ${INSTALLER_DIR}/resources/blended_run_config.yaml resources/
+cp ${INSTALLER_DIR}/resources/* resources/
 mkdir logs
 
 ENV_DIR=${TARGET_DIR}/environment
diff --git a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
index b661b9c0..f7deaf78 100644
--- a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
+++ b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
@@ -50,6 +50,7 @@ BLENDED_RUN_CONFIG_DIR=${G_LEADERBOARD_DIR}/blend_run_configs
 cp $BLENDED_RUN_CONFIG ${BLENDED_RUN_CONFIG_DIR}/config.yaml
 
 # Create a temporal project
+# NOTE: This is necessary to avoid using incorrect configurations when running multiple jobs at the same time.
 TMP_G_LEADERBOARD_DIR=$(mktemp -d "${ENV_DIR}/src/g-leaderboard.XXXXXXXX")
 cp -r $G_LEADERBOARD_DIR/* $TMP_G_LEADERBOARD_DIR
 cp $NEW_CONFIG $TMP_G_LEADERBOARD_DIR/configs/config.yaml

From e36abfc0afbff4782088a133d8e69f860a440f95 Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Thu, 29 Aug 2024 14:03:11 +0900
Subject: [PATCH 11/20] update documentation

---
 evaluation/installers/g-leaderboard/README.md | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md
index 3c8261f1..d1bb5598 100644
--- a/evaluation/installers/g-leaderboard/README.md
+++ b/evaluation/installers/g-leaderboard/README.md
@@ -55,17 +55,13 @@ The following directory structure will be created after installation.
 
 ### Evaluation
 
-Replace variables as needed in `run_g-leaderboard.sh` and `resources/config_base.yaml`.
- - To edit tokenizer, wandb entity, and/or wandb project: Edit `run_g-leaderboard.sh`.
+The evaluation script takes the model path and wandb run name as arguments.
+For the other settings, edit the configuration file `resources/config_base.yaml` and/or `resources/config_custom.yaml`.
+ - To edit the tokenizer, wandb entity, and/or wandb project: Edit `run_g-leaderboard.sh`.
  - Otherwise: Edit `resources/config_base.yaml` and `run_g-leaderboard.sh`.
 
 ```shell
 cd ~/g-leaderboard
-# (Optional) If you need to change variables
-cp resources/config_base.yaml resources/config_custom.yaml
-cp run_g-leaderboard.sh run_g-leaderboard_custom.sh
-# Set `resources/config_custom.yaml` in run_g-leaderboard_custom.sh
-
 # For a cluster with SLURM
 AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh {path/to/model} {wandb.run_name}
 # For a cluster without SLURM

From 93cc6637d6c78babdbc2baf5a7fdb5d1e110dbf0 Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Thu, 29 Aug 2024 14:08:50 +0900
Subject: [PATCH 12/20] update documentation

---
 evaluation/installers/g-leaderboard/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md
index d1bb5598..06fdbf7f 100644
--- a/evaluation/installers/g-leaderboard/README.md
+++ b/evaluation/installers/g-leaderboard/README.md
@@ -79,4 +79,4 @@ AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/
 
 ### About Azure OpenAI API
 
-To conduct an evaluation, you must configure the Azure OpenAI API by setting the endpoint and key for the deployment named `gpt-4`, which uses `gpt-4-0613`. Please contact the administrator to obtain the necessary endpoint and key.
+To conduct an evaluation, you must configure the Azure OpenAI API by setting the endpoint and key for the deployment named `gpt-4`, which corresponds to `gpt-4-0613`. Please contact the administrator to obtain the necessary endpoint and key.

From 44ee980b1efeb74c44c6c615eacccad334ed80df Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Thu, 29 Aug 2024 14:22:40 +0900
Subject: [PATCH 13/20] update readme

---
 evaluation/installers/g-leaderboard/README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md
index 06fdbf7f..347f7582 100644
--- a/evaluation/installers/g-leaderboard/README.md
+++ b/evaluation/installers/g-leaderboard/README.md
@@ -19,8 +19,11 @@ The following command will create an installation directory under the specified
 The list of available environment names can be found in the `scripts/envs` directory.
 
 ```bash
+# NOTE: Using a CPU node is recommended as the installation process doesn't require GPUs
+
 # For a cluster with SLURM
 sbatch --partition {partition} install.sh {env-name} ~/g-leaderboard
+
 # For a cluster without SLURM
 bash install.sh {env-name} ~/g-leaderboard > logs/install.out 2> logs/install.err
 ```
@@ -62,8 +65,10 @@ For the other settings, edit the configuration file `resources/config_base.yaml`
 
 ```shell
 cd ~/g-leaderboard
+
 # For a cluster with SLURM
 AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh {path/to/model} {wandb.run_name}
+
 # For a cluster without SLURM
 CUDA_VISIBLE_DEVICES=<num> AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh {path/to/model} {wandb.run_name}
 ```
@@ -73,6 +78,7 @@ CUDA_VISIBLE_DEVICES=<num> AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash r
 ```shell
 # For a cluster with SLURM
 AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx sbatch --partition {partition} run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
+
 # For a cluster without SLURM
 AZURE_OPENAI_ENDPOINT=xxx AZURE_OPENAI_KEY=xxx bash run_g-leaderboard.sh llm-jp/llm-jp-13b-v2.0 g-leaderboard-$(whoami)
 ```

From 33d382759708afaa480ac610c55921986ceb67f8 Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Thu, 29 Aug 2024 14:55:53 +0900
Subject: [PATCH 14/20] fix mtbench.model_id

---
 evaluation/installers/g-leaderboard/resources/config_base.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluation/installers/g-leaderboard/resources/config_base.yaml b/evaluation/installers/g-leaderboard/resources/config_base.yaml
index c5eaaa2c..2679945e 100644
--- a/evaluation/installers/g-leaderboard/resources/config_base.yaml
+++ b/evaluation/installers/g-leaderboard/resources/config_base.yaml
@@ -42,7 +42,7 @@ llm_jp_eval:
 
 # for mtbench
 mtbench:
-  model_id: "nii--llama-2-175b-exp2-instruct" # cannot use '<', '>', ':', '"', '/', '\\', '|', '?', '*', '.'  
+  model_id: "<<WANDB_RUN_NAME>>" # cannot use '<', '>', ':', '"', '/', '\\', '|', '?', '*', '.'  
   max_new_token: 1024
   num_gpus_per_model: 8
   num_gpus_total: 8

From 5191baa3c306e462b678b21e26a2a6f3bbc3cb78 Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <13678589+hkiyomaru@users.noreply.github.com>
Date: Thu, 29 Aug 2024 17:34:23 +0900
Subject: [PATCH 15/20] Update
 evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh

Co-authored-by: YumaTsuta <67862948+YumaTsuta@users.noreply.github.com>
---
 .../installers/g-leaderboard/scripts/run_g-leaderboard.sh       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
index f7deaf78..f08a94a5 100644
--- a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
+++ b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
@@ -3,7 +3,7 @@
 #SBATCH --partition=<partition>
 #SBATCH --exclusive
 #SBATCH --nodes=1
-#SBATCH --gpus=1
+#SBATCH --gpus=8
 #SBATCH --ntasks-per-node=8
 #SBATCH --output=logs/%x-%j.out
 #SBATCH --error=logs/%x-%j.err

From f11a2ee3591761b55e158a79ca5d4eb6a7afaac6 Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <13678589+hkiyomaru@users.noreply.github.com>
Date: Thu, 29 Aug 2024 17:34:33 +0900
Subject: [PATCH 16/20] Update
 evaluation/installers/g-leaderboard/scripts/env_common.sh

Co-authored-by: YumaTsuta <67862948+YumaTsuta@users.noreply.github.com>
---
 evaluation/installers/g-leaderboard/scripts/env_common.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/evaluation/installers/g-leaderboard/scripts/env_common.sh b/evaluation/installers/g-leaderboard/scripts/env_common.sh
index ac0467dd..ca7d1235 100644
--- a/evaluation/installers/g-leaderboard/scripts/env_common.sh
+++ b/evaluation/installers/g-leaderboard/scripts/env_common.sh
@@ -1,5 +1,6 @@
 # List of environment variables and module loads for g-leaderboard
 
+export LANG=ja_JP.UTF-8
 export PYTHON_VERSION=3.10.14
 
 export G_LEADERBOARD_TAG=g-leaderboard

From b39adafa76c2335fbe3761abbae64ecab5235a66 Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Thu, 29 Aug 2024 17:38:44 +0900
Subject: [PATCH 17/20] hardcode to use g-leaderboard branch

---
 evaluation/installers/g-leaderboard/install.sh            | 2 +-
 evaluation/installers/g-leaderboard/scripts/env_common.sh | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/evaluation/installers/g-leaderboard/install.sh b/evaluation/installers/g-leaderboard/install.sh
index a6e29d46..406d1415 100644
--- a/evaluation/installers/g-leaderboard/install.sh
+++ b/evaluation/installers/g-leaderboard/install.sh
@@ -101,7 +101,7 @@ source venv/bin/activate
 
 # Install g-leaderboard
 pushd src
-git clone https://github.com/wandb/llm-leaderboard g-leaderboard -b ${G_LEADERBOARD_TAG}
+git clone https://github.com/wandb/llm-leaderboard g-leaderboard -b g-leaderboard
 pushd g-leaderboard
 pip install --no-cache-dir -r requirements.txt
 
diff --git a/evaluation/installers/g-leaderboard/scripts/env_common.sh b/evaluation/installers/g-leaderboard/scripts/env_common.sh
index ca7d1235..b031595d 100644
--- a/evaluation/installers/g-leaderboard/scripts/env_common.sh
+++ b/evaluation/installers/g-leaderboard/scripts/env_common.sh
@@ -2,5 +2,3 @@
 
 export LANG=ja_JP.UTF-8
 export PYTHON_VERSION=3.10.14
-
-export G_LEADERBOARD_TAG=g-leaderboard

From 7f1fb584375f3096050510153dfa5917e525cd2e Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Thu, 29 Aug 2024 17:42:34 +0900
Subject: [PATCH 18/20] deploy blended run condig during installation

---
 evaluation/installers/g-leaderboard/install.sh              | 6 +++++-
 .../installers/g-leaderboard/scripts/run_g-leaderboard.sh   | 5 -----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/evaluation/installers/g-leaderboard/install.sh b/evaluation/installers/g-leaderboard/install.sh
index 406d1415..b63db457 100644
--- a/evaluation/installers/g-leaderboard/install.sh
+++ b/evaluation/installers/g-leaderboard/install.sh
@@ -55,7 +55,7 @@ pushd $TARGET_DIR
 # Copy basic scripts for g-leaderboard
 cp ${INSTALLER_DIR}/scripts/run_g-leaderboard.sh .
 mkdir resources
-cp ${INSTALLER_DIR}/resources/* resources/
+cp ${INSTALLER_DIR}/resources/config_base.yaml resources/
 mkdir logs
 
 ENV_DIR=${TARGET_DIR}/environment
@@ -105,4 +105,8 @@ git clone https://github.com/wandb/llm-leaderboard g-leaderboard -b g-leaderboar
 pushd g-leaderboard
 pip install --no-cache-dir -r requirements.txt
 
+# Deploy blended run config
+BLENDED_RUN_CONFIG=${INSTALLER_DIR}/resources/blended_run_config.yaml
+cp $BLENDED_RUN_CONFIG blend_run_configs/config.yaml
+
 echo "Installation done." | tee >(cat >&2)
diff --git a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
index f08a94a5..cf2de78b 100644
--- a/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
+++ b/evaluation/installers/g-leaderboard/scripts/run_g-leaderboard.sh
@@ -44,11 +44,6 @@ for VAR in "${REPLACE_VARS[@]}"; do
   sed -i "s|<<${VAR}>>|${VALUE}|g" $NEW_CONFIG
 done
 
-# Blended run config settings
-BLENDED_RUN_CONFIG=resources/blended_run_config.yaml
-BLENDED_RUN_CONFIG_DIR=${G_LEADERBOARD_DIR}/blend_run_configs
-cp $BLENDED_RUN_CONFIG ${BLENDED_RUN_CONFIG_DIR}/config.yaml
-
 # Create a temporal project
 # NOTE: This is necessary to avoid using incorrect configurations when running multiple jobs at the same time.
 TMP_G_LEADERBOARD_DIR=$(mktemp -d "${ENV_DIR}/src/g-leaderboard.XXXXXXXX")

From a69083bc533458110483d1efa4f6dc999098e0cc Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Thu, 29 Aug 2024 17:57:48 +0900
Subject: [PATCH 19/20] update readme

---
 evaluation/installers/g-leaderboard/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md
index 347f7582..5faa23e4 100644
--- a/evaluation/installers/g-leaderboard/README.md
+++ b/evaluation/installers/g-leaderboard/README.md
@@ -59,7 +59,7 @@ The following directory structure will be created after installation.
 ### Evaluation
 
 The evaluation script takes the model path and wandb run name as arguments.
-For the other settings, edit the configuration file `resources/config_base.yaml` and/or `resources/config_custom.yaml`.
+For the other settings, edit the configuration file `resources/config_base.yaml` and/or `run_g-leaderboard.sh`.
  - To edit the tokenizer, wandb entity, and/or wandb project: Edit `run_g-leaderboard.sh`.
  - Otherwise: Edit `resources/config_base.yaml` and `run_g-leaderboard.sh`.
 

From c8050c1336607b77b4cdf80529ce88acfb03394c Mon Sep 17 00:00:00 2001
From: Hirokazu Kiyomaru <h.kiyomaru@gmail.com>
Date: Fri, 30 Aug 2024 11:27:30 +0900
Subject: [PATCH 20/20] remove env-specific process

---
 evaluation/installers/g-leaderboard/README.md |  6 ++--
 .../installers/g-leaderboard/install.sh       | 33 +++----------------
 .../scripts/{env_common.sh => environment.sh} |  0
 .../scripts/envs/llm-jp-nvlink/environment.sh |  0
 .../scripts/envs/llm-jp/environment.sh        |  0
 .../scripts/envs/sakura/environment.sh        |  6 ----
 6 files changed, 7 insertions(+), 38 deletions(-)
 rename evaluation/installers/g-leaderboard/scripts/{env_common.sh => environment.sh} (100%)
 delete mode 100644 evaluation/installers/g-leaderboard/scripts/envs/llm-jp-nvlink/environment.sh
 delete mode 100644 evaluation/installers/g-leaderboard/scripts/envs/llm-jp/environment.sh
 delete mode 100644 evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh

diff --git a/evaluation/installers/g-leaderboard/README.md b/evaluation/installers/g-leaderboard/README.md
index 5faa23e4..12192a3b 100644
--- a/evaluation/installers/g-leaderboard/README.md
+++ b/evaluation/installers/g-leaderboard/README.md
@@ -15,17 +15,15 @@ cd scripts/evaluation/installers/g-leaderboard
 
 Then, run the installation script.
 The following command will create an installation directory under the specified directory (here, `~/g-leaderboard`).
-`<env-name>` should be the name of the environment (llm-jp, llm-jp-nvlink, sakura, etc).
-The list of available environment names can be found in the `scripts/envs` directory.
 
 ```bash
 # NOTE: Using a CPU node is recommended as the installation process doesn't require GPUs
 
 # For a cluster with SLURM
-sbatch --partition {partition} install.sh {env-name} ~/g-leaderboard
+sbatch --partition {partition} install.sh ~/g-leaderboard
 
 # For a cluster without SLURM
-bash install.sh {env-name} ~/g-leaderboard > logs/install.out 2> logs/install.err
+bash install.sh ~/g-leaderboard > logs/install.out 2> logs/install.err
 ```
 
 After the installation is complete, set up the wandb and huggingface accounts.
diff --git a/evaluation/installers/g-leaderboard/install.sh b/evaluation/installers/g-leaderboard/install.sh
index b63db457..9b0747fb 100644
--- a/evaluation/installers/g-leaderboard/install.sh
+++ b/evaluation/installers/g-leaderboard/install.sh
@@ -22,30 +22,18 @@
 
 set -eux -o pipefail
 
-ENV_CHOICES=($(ls scripts/envs))
-TARGET_ENV_MSG="Set TARGET_ENV from (${ENV_CHOICES[@]} ) or add a new configuration in 'scripts/envs'."
-
-if [ $# -ne 2 ]; then
+if [ $# -ne 1 ]; then
   set +x
-  >&2 echo Usage: sbatch \(or bash\)  install.sh TARGET_ENV TARGET_DIR
-  >&2 echo $TARGET_ENV_MSG
+  >&2 echo Usage: sbatch \(or bash\) install.sh TARGET_DIR
   exit 1
 fi
 
 INSTALLER_DIR=$(pwd)
-TARGET_ENV=$1
-TARGET_DIR=$2
+TARGET_DIR=$1
 INSTALLER_COMMON=$INSTALLER_DIR/../../../common/installers.sh
 
-if [[ ! " ${ENV_CHOICES[@]} " =~ " ${TARGET_ENV} " ]]; then
-  set +x
-  >&2 echo $TARGET_ENV_MSG
-  exit 1
-fi
-
 >&2 echo INSTALLER_DIR=$INSTALLER_DIR
 >&2 echo TARGET_DIR=$TARGET_DIR
->&2 echo TARGET_ENV=$TARGET_ENV
 >&2 echo INSTALLER_COMMON=$INSTALLER_COMMON
 source $INSTALLER_COMMON
 
@@ -67,20 +55,9 @@ cp ${INSTALLER_DIR}/install.sh .
 mkdir scripts
 
 # Create environment.sh
-BASE_ENV_SHELL=${INSTALLER_DIR}/scripts/env_common.sh
-EXT_ENV_SHELL=${INSTALLER_DIR}/scripts/envs/${TARGET_ENV}/environment.sh
+BASE_ENV_SHELL=${INSTALLER_DIR}/scripts/environment.sh
 NEW_ENV_SHELL=scripts/environment.sh
-
-print_env_shell() {
-    echo "#!/bin/bash"
-    echo
-    echo "# from $BASE_ENV_SHELL"
-    cat $BASE_ENV_SHELL
-    echo
-    echo "# from $EXT_ENV_SHELL"
-    cat $EXT_ENV_SHELL
-}
-print_env_shell > $NEW_ENV_SHELL
+cp $BASE_ENV_SHELL $NEW_ENV_SHELL
 
 source $NEW_ENV_SHELL
 
diff --git a/evaluation/installers/g-leaderboard/scripts/env_common.sh b/evaluation/installers/g-leaderboard/scripts/environment.sh
similarity index 100%
rename from evaluation/installers/g-leaderboard/scripts/env_common.sh
rename to evaluation/installers/g-leaderboard/scripts/environment.sh
diff --git a/evaluation/installers/g-leaderboard/scripts/envs/llm-jp-nvlink/environment.sh b/evaluation/installers/g-leaderboard/scripts/envs/llm-jp-nvlink/environment.sh
deleted file mode 100644
index e69de29b..00000000
diff --git a/evaluation/installers/g-leaderboard/scripts/envs/llm-jp/environment.sh b/evaluation/installers/g-leaderboard/scripts/envs/llm-jp/environment.sh
deleted file mode 100644
index e69de29b..00000000
diff --git a/evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh b/evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh
deleted file mode 100644
index ad20ca48..00000000
--- a/evaluation/installers/g-leaderboard/scripts/envs/sakura/environment.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-export CUDA_VERSION_MAJOR=12
-export CUDA_VERSION_MINOR=1
-export CUDA_VERSION=${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}
-export CUDNN_VERSION=8.9.4
-module load cuda/${CUDA_VERSION}
-module load /data/cudnn-tmp-install/modulefiles/${CUDNN_VERSION}