# lmeval_multitask

런타임 유형: L4 or A100 으로 설정하기

실행 순서:
1. 패키지 설치
2. Google Drive 마운트
3. 설정값(특히 `MODEL_PATH`) 확인
4. (선택) `lm_eval --tasks list`로 태스크 확인
5. 태스크 실행 셀 4개를 순서대로 실행

참고:
- 태스크 실행 셀은 `TASK_NAME`만 다릅니다.
- 결과는 `RUN_DIR/<task_name>` 경로에 저장됩니다.

In [None]:
# 런타임 재시작 후 실행 권장
%pip install -U pip
%pip install -U "lm-eval[vllm]" langdetect

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from datetime import datetime, timezone
from pathlib import Path

MODEL_PATH = "/content/drive/MyDrive/LGAimers/base_model"  # 필요 시 수정
OUTPUT_ROOT = "/content/drive/MyDrive/LGAimers/lm_eval_multitask_results"
RUN_NAME = f"multitask_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"

TRUST_REMOTE_CODE = True
ENABLE_THINKING = False
TENSOR_PARALLEL_SIZE = 1
GPU_MEMORY_UTILIZATION = 0.85
BATCH_SIZE = "auto"
MAX_GEN_TOKS = 16384
APPLY_CHAT_TEMPLATE = True

RUN_DIR = Path(OUTPUT_ROOT) / RUN_NAME
RUN_DIR.mkdir(parents=True, exist_ok=True)

print(f"MODEL_PATH: {MODEL_PATH}")
print(f"RUN_DIR: {RUN_DIR}")


In [None]:
TASK_NAME = "ifeval"
!lm_eval \
  --model vllm \
  --model_args pretrained={MODEL_PATH},trust_remote_code={str(TRUST_REMOTE_CODE).lower()},enable_thinking={str(ENABLE_THINKING).lower()},tensor_parallel_size={TENSOR_PARALLEL_SIZE},gpu_memory_utilization={GPU_MEMORY_UTILIZATION} \
  --tasks {TASK_NAME} \
  --batch_size {BATCH_SIZE} \
  --apply_chat_template {APPLY_CHAT_TEMPLATE} \
  --gen_kwargs max_gen_toks={MAX_GEN_TOKS} \
  --output_path {RUN_DIR}/{TASK_NAME}


In [None]:
TASK_NAME = "mmlu_redux_generative"
!lm_eval \
  --model vllm \
  --model_args pretrained={MODEL_PATH},trust_remote_code={str(TRUST_REMOTE_CODE).lower()},enable_thinking={str(ENABLE_THINKING).lower()},tensor_parallel_size={TENSOR_PARALLEL_SIZE},gpu_memory_utilization={GPU_MEMORY_UTILIZATION} \
  --tasks {TASK_NAME} \
  --batch_size {BATCH_SIZE} \
  --apply_chat_template {APPLY_CHAT_TEMPLATE} \
  --gen_kwargs max_gen_toks={MAX_GEN_TOKS} \
  --output_path {RUN_DIR}/{TASK_NAME}


In [None]:
TASK_NAME = "gsm8k"
!lm_eval \
  --model vllm \
  --model_args pretrained={MODEL_PATH},trust_remote_code={str(TRUST_REMOTE_CODE).lower()},enable_thinking={str(ENABLE_THINKING).lower()},tensor_parallel_size={TENSOR_PARALLEL_SIZE},gpu_memory_utilization={GPU_MEMORY_UTILIZATION} \
  --tasks {TASK_NAME} \
  --batch_size {BATCH_SIZE} \
  --apply_chat_template {APPLY_CHAT_TEMPLATE} \
  --gen_kwargs max_gen_toks={MAX_GEN_TOKS} \
  --output_path {RUN_DIR}/{TASK_NAME}


In [None]:
TASK_NAME = "hrm8k_ksm"
!lm_eval \
  --model vllm \
  --model_args pretrained={MODEL_PATH},trust_remote_code={str(TRUST_REMOTE_CODE).lower()},enable_thinking={str(ENABLE_THINKING).lower()},tensor_parallel_size={TENSOR_PARALLEL_SIZE},gpu_memory_utilization={GPU_MEMORY_UTILIZATION} \
  --tasks {TASK_NAME} \
  --batch_size {BATCH_SIZE} \
  --apply_chat_template {APPLY_CHAT_TEMPLATE} \
  --gen_kwargs max_gen_toks={MAX_GEN_TOKS} \
  --output_path {RUN_DIR}/{TASK_NAME}
