-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_code_bloom.slurm
82 lines (67 loc) · 2.51 KB
/
generate_code_bloom.slurm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/bin/bash
#SBATCH --job-name=bs-code-generation-bloom-176b
#SBATCH --partition=gpu_p5
#SBATCH --constraint=a100
#SBATCH --reservation=hug
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1 # crucial - only 1 task per dist per node!
#SBATCH --cpus-per-task=64 # number of cores per tasks
#SBATCH --hint=nomultithread # we get physical cores not logical
#SBATCH --gres=gpu:8 # number of gpus
#SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS)
#SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out # output file name
#SBATCH --account=six@a100
set -x -e
source $six_ALL_CCFRWORK/start-py38-pt111
conda activate thomas_code_evaluation
echo "START TIME: $(date)"
MODEL_CKPT=/gpfsscratch/rech/six/commun/uan68tv-model-conversion/bloom
export HF_DATASETS_OFFLINE=1
export TRANSFORMERS_OFFLINE=1
export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models
export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasets_code_eval
export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules
export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics
NUM_TASKS=164
NUM_TASKS_PER_JOB=1
# SLURM starts at 1, but we make it start at 0 for indexing purposes
TASK_START=$((($SLURM_ARRAY_TASK_ID - 1) * $NUM_TASKS_PER_JOB))
TASK_END=$(($TASK_START + $NUM_TASKS_PER_JOB))
STORE_GENERATIONS_FOLDER=/gpfswork/rech/six/commun/code_generations_bloom
OUTPUT_FILE1=$STORE_GENERATIONS_FOLDER/task_${TASK_START}_${TASK_END}/output_1
OUTPUT_FILE2=$STORE_GENERATIONS_FOLDER/task_${TASK_START}_${TASK_END}/output_2
OUTPUT_FILE3=$STORE_GENERATIONS_FOLDER/task_${TASK_START}_${TASK_END}/output_3
echo using $MODEL_CKPT as model checkpoint, if not done change it to a local repository
WORKDIR=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloom-code-evaluation
pushd $WORKDIR
export CUDA_LAUNCH_BLOCKING=1
python code_eval.py --model_ckpt $MODEL_CKPT \
--batch_size 1 \
--do_sample True \
--task_start $TASK_START \
--task_end $TASK_END \
--temperature 0.2 \
--top_p 0.95 \
--n_samples 200 \
--dtype bfloat16 \
--output_file $OUTPUT_FILE1
python code_eval.py --model_ckpt $MODEL_CKPT \
--batch_size 1 \
--do_sample True \
--task_start $TASK_START \
--task_end $TASK_END \
--temperature 0.6 \
--top_p 0.95 \
--n_samples 200 \
--dtype bfloat16 \
--output_file $OUTPUT_FILE2
python code_eval.py --model_ckpt $MODEL_CKPT \
--batch_size 1 \
--do_sample True \
--task_start $TASK_START \
--task_end $TASK_END \
--temperature 0.8 \
--top_p 0.95 \
--n_samples 200 \
--dtype bfloat16 \
--output_file $OUTPUT_FILE3