-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_glue.sh
154 lines (130 loc) · 3.51 KB
/
run_glue.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/usr/bin/env bash
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
# GLUE task name, from ['MNLI', 'QQP', 'QNLI', 'SST-2', 'CoLA', 'RTE', 'MRPC', 'STS-B']
TASK=$1
# Path to pretrained AMOS checkpoints
PRETRAINED_MODEL_PATH=$2
# Path to processed GLUE dataset (containing binary files) 'path/to/glue_data'
GLUE_DATA_DIR=$3
# Output path for results and fine-tuned model
OUTPUT_PATH=$4
# Set pretrained model name
ARCH="amos_base"
# Set the hyperparameters for the run
N_EPOCH=$5
WARMUP_RATIO=$6
BSZ=$7
LR=$8
SEED=$9
BINS=64
MAX_DIST=128
BETAS="(0.9,0.98)"
CLIP=0.0
WEIGHT_DECAY=0.01
MAX_TOKENS=2200
if [ ! -e $PRETRAINED_MODEL_PATH ]; then
echo "Checkpoint ${PRETRAINED_MODEL_PATH} doesn't exist"
exit 0
fi
TASK_DATA_DIR=$GLUE_DATA_DIR/$TASK-bin
OPTION=""
METRIC=accuracy
N_CLASSES=2
task_type=SMALL
if [ "$TASK" = "MNLI" ]
then
N_CLASSES=3
OPTION="--valid-subset valid,valid1"
EPOCH_ITER=12452
task_type=LARGE
fi
if [ "$TASK" = "QNLI" ]
then
EPOCH_ITER=3320
task_type=LARGE
fi
if [ "$TASK" = "QQP" ]
then
EPOCH_ITER=11392
task_type=LARGE
fi
if [ "$TASK" = "SST-2" ]
then
EPOCH_ITER=2105
task_type=LARGE
fi
if [ "$TASK" = "MRPC" ]
then
EPOCH_ITER=115
fi
if [ "$TASK" = "RTE" ]
then
EPOCH_ITER=101
fi
if [ "$TASK" = "CoLA" ]
then
METRIC=mcc
EPOCH_ITER=268
fi
if [ "$TASK" = "STS-B" ]
then
METRIC=pearson_spearman
N_CLASSES=1
OPTION="--regression-target"
EPOCH_ITER=180
fi
if [ "$task_type" = "LARGE" ]
then
if [ "$N_EPOCH" = "10" ]
then
echo 'skip'
exit 0
fi
if [ "$WARMUP_RATIO" = "10" ]
then
echo 'skip'
exit 0
fi
# if [ "$BSZ" = "16" ]
# then
# echo 'skip'
# exit 0
# fi
fi
EPOCH_ITER=$((EPOCH_ITER*2)) # expand to itr for bsz=16
BSZ_EXPAND=$((BSZ/16))
MAX_TOKENS=$((MAX_TOKENS*BSZ/16)) # expand to itr for bsz=16
EPOCH_ITER=$((EPOCH_ITER/BSZ_EXPAND))
TOTAL_STEPS=$((EPOCH_ITER*N_EPOCH))
WARMUP_STEPS=$((TOTAL_STEPS/WARMUP_RATIO))
VALIDATE_INTERVAL=$((EPOCH_ITER/2))
OUTPUT_PATH=$OUTPUT_PATH/$TASK/$N_EPOCH-$WARMUP_RATIO-$BSZ-$LR-$SEED
mkdir -p $OUTPUT_PATH
echo $OUTPUT_PATH
if [ -e $OUTPUT_PATH/train_log.txt ]; then
if grep -q 'done training' $OUTPUT_PATH/train_log.txt && grep -q 'Loaded checkpoint' $OUTPUT_PATH/train_log.txt; then
echo "Training log existed"
exit 0
fi
fi
python train.py $TASK_DATA_DIR --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \
--restore-file $PRETRAINED_MODEL_PATH \
--max-positions 512 \
--max-sentences $BSZ \
--max-tokens $MAX_TOKENS \
--update-freq 1 \
--task sentence_prediction \
--reset-optimizer --reset-dataloader --reset-meters \
--required-batch-size-multiple 1 \
--init-token 0 --separator-token 2 \
--arch $ARCH \
--criterion sentence_prediction $OPTION \
--num-classes $N_CLASSES \
--dropout 0.1 --attention-dropout 0.1 --pooler-dropout 0.1 \
--weight-decay $WEIGHT_DECAY --optimizer adam --adam-betas "$BETAS" --adam-eps 1e-06 \
--clip-norm $CLIP \
--lr-scheduler polynomial_decay --lr $LR --total-num-update $TOTAL_STEPS --warmup-updates $WARMUP_STEPS \
--max-update $TOTAL_STEPS --seed $SEED --save-dir $OUTPUT_PATH --no-progress-bar --log-interval 100 --no-epoch-checkpoints --no-last-checkpoints \
--find-unused-parameters --skip-invalid-size-inputs-valid-test --rel-pos 1 --max-rel-pos $MAX_DIST --rel-pos-bins $BINS \
--best-checkpoint-metric $METRIC --maximize-best-checkpoint-metric --validate-interval-updates $VALIDATE_INTERVAL | tee $OUTPUT_PATH/train_log.txt