## COLA

In [146]:
template = """#!/bin/bash
#SBATCH -N 1
#SBATCH -A m3624
#SBATCH -C gpu
#SBATCH -J cola-{cased}-{bs}-{epoch}
#SBATCH -G 1
#SBATCH -c 5
#SBATCH --mail-user=haoyan.huo@lbl.gov
#SBATCH -t 0:59:59
#SBATCH --output=eval_cola_{cased}_bs{bs}_epoch{epoch}.out
## DISABLED: SBATCH --mail-type=END,FAIL

module load cuda
source ~/miniconda3/etc/profile.d/conda.sh
conda activate bert

export HF_HOME=$SCRATCH/cache/huggingface
export TOKENIZERS_PARALLELISM=false

export OUTPUTDIR=$SCRATCH/glue_eval/cola_{cased}_bs{bs}_epoch{epoch}
mkdir -p $OUTPUTDIR
echo "Starting to train cased, batch size {bs}, epochs {epoch} at $(date)"
time python -m torch.distributed.launch --nnodes 1 --nproc_per_node 1 --master_addr 127.0.0.1 --master_port {port} \
    evaluate_glue.py --overwrite_output_dir \\
        --no_pad_to_max_length \\
        --dataloader_num_workers 4 \\
        --model_name_or_path ../models/matbert-base-{cased}/ {lower_case}\\
        --per_device_train_batch_size {bs} \\
        --num_train_epochs {epoch} \\
        --output_dir $OUTPUTDIR \\
        --logging_dir $OUTPUTDIR/run \\
        --task_name cola \\
        --do_train --do_eval --do_predict \\
        --load_best_model_at_end \\
        --metric_for_best_model "matthews_correlation" \\
        --seed 42 \\
        --evaluation_strategy 'epoch' > $OUTPUTDIR/file.log 2>&1 
echo "Finished at $(date)"
"""

In [147]:
base_port = 17800
i = 0
for bs in [8, 16, 32, 64]:
    for cased in ['cased', 'uncased']:
        epoch = 8
        lower_case = '--no_do_lower_case ' if cased == 'cased' else ''
        script = template.format(bs=bs, epoch=epoch, cased=cased, lower_case=lower_case, port=base_port+i)
        i += 1

        fn = 'script_cola_{cased}_bs{bs}_epoch{epoch}.sh'.format(bs=bs, epoch=epoch, cased=cased)
        with open(fn, 'w') as f:
            f.write(script)

## MNLI

In [148]:
template = """#!/bin/bash
#SBATCH -N 1
#SBATCH -A m3624
#SBATCH -C gpu
#SBATCH -J mnli-{cased}-{bs}-{epoch}
#SBATCH -G 4
#SBATCH -c 20
#SBATCH --mail-user=haoyan.huo@lbl.gov
#SBATCH -t 3:59:59
#SBATCH --output=eval_mnli_{cased}_bs{bs}_epoch{epoch}.out
## DISABLED: SBATCH --mail-type=END,FAIL

module load cuda
source ~/miniconda3/etc/profile.d/conda.sh
conda activate bert

export HF_HOME=$SCRATCH/cache/huggingface
export TOKENIZERS_PARALLELISM=false

export OUTPUTDIR=$SCRATCH/glue_eval/mnli_{cased}_bs{bs}_epoch{epoch}
mkdir -p $OUTPUTDIR
echo "Starting to train cased, batch size {bs}, epochs {epoch} at $(date)"
time python -m torch.distributed.launch --nnodes 1 --nproc_per_node 4 --master_addr 127.0.0.1 --master_port {port} \
    evaluate_glue.py --overwrite_output_dir \\
        --no_pad_to_max_length \\
        --dataloader_num_workers 4 \\
        --model_name_or_path ../models/matbert-base-{cased}/ {lower_case}\\
        --per_device_train_batch_size {bs} \\
        --num_train_epochs {epoch} \\
        --output_dir $OUTPUTDIR \\
        --logging_dir $OUTPUTDIR/run \\
        --task_name mnli \\
        --do_train --do_eval --do_predict \\
        --load_best_model_at_end \\
        --metric_for_best_model "accuracy" \\
        --seed 42 \\
        --evaluation_strategy 'epoch' > $OUTPUTDIR/file.log 2>&1 
echo "Finished at $(date)"
"""

In [149]:
base_port = 17900
i = 0
for bs in [8, 16]:
    epoch = 5
    for cased in ['cased', 'uncased']:
        lower_case = '--no_do_lower_case ' if cased == 'cased' else ''
        script = template.format(bs=bs, epoch=epoch, cased=cased, lower_case=lower_case, port=base_port+i)
        i += 1

        fn = 'script_mnli_{cased}_bs{bs}_epoch{epoch}.sh'.format(bs=bs, epoch=epoch, cased=cased)
        with open(fn, 'w') as f:
            f.write(script)

## MRPC

In [150]:
template = """#!/bin/bash
#SBATCH -N 1
#SBATCH -A m3624
#SBATCH -C gpu
#SBATCH -J mrpc-{cased}-{bs}-{epoch}
#SBATCH -G 4
#SBATCH -c 20
#SBATCH --mail-user=haoyan.huo@lbl.gov
#SBATCH -t 0:59:59
#SBATCH --output=eval_mrpc_{cased}_bs{bs}_epoch{epoch}.out
## DISABLED: SBATCH --mail-type=END,FAIL

module load cuda
source ~/miniconda3/etc/profile.d/conda.sh
conda activate bert

export HF_HOME=$SCRATCH/cache/huggingface
export TOKENIZERS_PARALLELISM=false

export OUTPUTDIR=$SCRATCH/glue_eval/mrpc_{cased}_bs{bs}_epoch{epoch}
mkdir -p $OUTPUTDIR
echo "Starting to train cased, batch size {bs}, epochs {epoch} at $(date)"
time python -m torch.distributed.launch --nnodes 1 --nproc_per_node 4 --master_addr 127.0.0.1 --master_port {port} \
    evaluate_glue.py --overwrite_output_dir \\
        --no_pad_to_max_length \\
        --dataloader_num_workers 4 \\
        --model_name_or_path ../models/matbert-base-{cased}/ {lower_case}\\
        --per_device_train_batch_size {bs} \\
        --num_train_epochs {epoch} \\
        --output_dir $OUTPUTDIR \\
        --logging_dir $OUTPUTDIR/run \\
        --task_name mrpc \\
        --do_train --do_eval --do_predict \\
        --load_best_model_at_end \\
        --metric_for_best_model "f1" \\
        --seed 42 \\
        --evaluation_strategy 'epoch' > $OUTPUTDIR/file.log 2>&1 
echo "Finished at $(date)"
"""

In [151]:
base_port = 18000
i = 0
for bs in [2, 4, 8, 16]:
    epoch = 5
    for cased in ['cased', 'uncased']:
        lower_case = '--no_do_lower_case ' if cased == 'cased' else ''
        script = template.format(bs=bs, epoch=epoch, cased=cased, lower_case=lower_case, port=base_port+i)
        i += 1

        fn = 'script_mrpc_{cased}_bs{bs}_epoch{epoch}.sh'.format(bs=bs, epoch=epoch, cased=cased)
        with open(fn, 'w') as f:
            f.write(script)

## QNLI

In [1]:
template = """#!/bin/bash
#SBATCH -N 1
#SBATCH -A m3624
#SBATCH -C gpu
#SBATCH -J qnli-{cased}-{bs}-{epoch}
#SBATCH -G 4
#SBATCH -c 20
#SBATCH --mail-user=haoyan.huo@lbl.gov
#SBATCH -t 3:59:59
#SBATCH --output=eval_qnli_{cased}_bs{bs}_epoch{epoch}.out
## DISABLED: SBATCH --mail-type=END,FAIL

module load cuda
source ~/miniconda3/etc/profile.d/conda.sh
conda activate bert

export HF_HOME=$SCRATCH/cache/huggingface
export TOKENIZERS_PARALLELISM=false

export OUTPUTDIR=$SCRATCH/glue_eval/qnli_{cased}_bs{bs}_epoch{epoch}
mkdir -p $OUTPUTDIR
echo "Starting to train cased, batch size {bs}, epochs {epoch} at $(date)"
time python -m torch.distributed.launch --nnodes 1 --nproc_per_node 4 --master_addr 127.0.0.1 --master_port {port} \
    evaluate_glue.py --overwrite_output_dir \\
        --max_seq_length 512 \\
        --dataloader_num_workers 4 \\
        --model_name_or_path ../models/matbert-base-{cased}/ {lower_case}\\
        --per_device_train_batch_size {bs} \\
        --num_train_epochs {epoch} \\
        --output_dir $OUTPUTDIR \\
        --logging_dir $OUTPUTDIR/run \\
        --task_name qnli \\
        --do_train --do_eval --do_predict \\
        --load_best_model_at_end \\
        --metric_for_best_model "accuracy" \\
        --seed 42 \\
        --evaluation_strategy 'epoch' > $OUTPUTDIR/file.log 2>&1 
echo "Finished at $(date)"
"""

In [2]:
base_port = 18100
i = 0
for bs in [2, 4, 8, 16]:
    epoch = 5
    for cased in ['cased', 'uncased']:
        lower_case = '--no_do_lower_case ' if cased == 'cased' else ''
        script = template.format(bs=bs, epoch=epoch, cased=cased, lower_case=lower_case, port=base_port+i)
        i += 1

        fn = 'script_qnli_{cased}_bs{bs}_epoch{epoch}.sh'.format(bs=bs, epoch=epoch, cased=cased)
        with open(fn, 'w') as f:
            f.write(script)

## WNLI

In [155]:
template = """#!/bin/bash
#SBATCH -N 1
#SBATCH -A m3624
#SBATCH -C gpu
#SBATCH -J wnli-{cased}-{bs}-{epoch}
#SBATCH -G 1
#SBATCH -c 5
#SBATCH --mail-user=haoyan.huo@lbl.gov
#SBATCH -t 0:59:59
#SBATCH --output=eval_wnli_{cased}_bs{bs}_epoch{epoch}.out
## DISABLED: SBATCH --mail-type=END,FAIL

module load cuda
source ~/miniconda3/etc/profile.d/conda.sh
conda activate bert

export HF_HOME=$SCRATCH/cache/huggingface
export TOKENIZERS_PARALLELISM=false

export OUTPUTDIR=$SCRATCH/glue_eval/wnli_{cased}_bs{bs}_epoch{epoch}
mkdir -p $OUTPUTDIR
echo "Starting to train cased, batch size {bs}, epochs {epoch} at $(date)"
time python -m torch.distributed.launch --nnodes 1 --nproc_per_node 1 --master_addr 127.0.0.1 --master_port {port} \
    evaluate_glue.py --overwrite_output_dir \\
        --no_pad_to_max_length \\
        --dataloader_num_workers 4 \\
        --model_name_or_path ../models/matbert-base-{cased}/ {lower_case}\\
        --per_device_train_batch_size {bs} \\
        --num_train_epochs {epoch} \\
        --output_dir $OUTPUTDIR \\
        --logging_dir $OUTPUTDIR/run \\
        --task_name wnli \\
        --do_train --do_eval --do_predict \\
        --seed 42 \\
        --evaluation_strategy 'epoch' > $OUTPUTDIR/file.log 2>&1 
echo "Finished at $(date)"
"""

In [156]:
base_port = 18200
i = 0
for bs in [8, 16, 32, 64]:
    for cased in ['cased', 'uncased']:
        epoch = 8
        lower_case = '--no_do_lower_case ' if cased == 'cased' else ''
        script = template.format(bs=bs, epoch=epoch, cased=cased, lower_case=lower_case, port=base_port+i)
        i += 1

        fn = 'script_wnli_{cased}_bs{bs}_epoch{epoch}.sh'.format(bs=bs, epoch=epoch, cased=cased)
        with open(fn, 'w') as f:
            f.write(script)

## STSB

In [159]:
template = """#!/bin/bash
#SBATCH -N 1
#SBATCH -A m3624
#SBATCH -C gpu
#SBATCH -J stsb-{cased}-{bs}-{epoch}
#SBATCH -G 4
#SBATCH -c 20
#SBATCH --mail-user=haoyan.huo@lbl.gov
#SBATCH -t 0:59:59
#SBATCH --output=eval_stsb_{cased}_bs{bs}_epoch{epoch}.out
## DISABLED: SBATCH --mail-type=END,FAIL

module load cuda
source ~/miniconda3/etc/profile.d/conda.sh
conda activate bert

export HF_HOME=$SCRATCH/cache/huggingface
export TOKENIZERS_PARALLELISM=false

export OUTPUTDIR=$SCRATCH/glue_eval/stsb_{cased}_bs{bs}_epoch{epoch}
mkdir -p $OUTPUTDIR
echo "Starting to train cased, batch size {bs}, epochs {epoch} at $(date)"
time python -m torch.distributed.launch --nnodes 1 --nproc_per_node 4 --master_addr 127.0.0.1 --master_port {port} \
    evaluate_glue.py --overwrite_output_dir \\
        --no_pad_to_max_length \\
        --dataloader_num_workers 4 \\
        --model_name_or_path ../models/matbert-base-{cased}/ {lower_case}\\
        --per_device_train_batch_size {bs} \\
        --num_train_epochs {epoch} \\
        --output_dir $OUTPUTDIR \\
        --logging_dir $OUTPUTDIR/run \\
        --task_name stsb \\
        --do_train --do_eval --do_predict \\
        --load_best_model_at_end \\
        --metric_for_best_model "combined_score" \\
        --seed 42 \\
        --evaluation_strategy 'epoch' > $OUTPUTDIR/file.log 2>&1 
echo "Finished at $(date)"
"""

In [160]:
base_port = 18300
i = 0
for bs in [2, 4, 8, 16]:
    epoch = 5
    for cased in ['cased', 'uncased']:
        lower_case = '--no_do_lower_case ' if cased == 'cased' else ''
        script = template.format(bs=bs, epoch=epoch, cased=cased, lower_case=lower_case, port=base_port+i)
        i += 1

        fn = 'script_stsb_{cased}_bs{bs}_epoch{epoch}.sh'.format(bs=bs, epoch=epoch, cased=cased)
        with open(fn, 'w') as f:
            f.write(script)

## RTE

In [161]:
template = """#!/bin/bash
#SBATCH -N 1
#SBATCH -A m3624
#SBATCH -C gpu
#SBATCH -J rte-{cased}-{bs}-{epoch}
#SBATCH -G 4
#SBATCH -c 20
#SBATCH --mail-user=haoyan.huo@lbl.gov
#SBATCH -t 0:59:59
#SBATCH --output=eval_rte_{cased}_bs{bs}_epoch{epoch}.out
## DISABLED: SBATCH --mail-type=END,FAIL

module load cuda
source ~/miniconda3/etc/profile.d/conda.sh
conda activate bert

export HF_HOME=$SCRATCH/cache/huggingface
export TOKENIZERS_PARALLELISM=false

export OUTPUTDIR=$SCRATCH/glue_eval/rte_{cased}_bs{bs}_epoch{epoch}
mkdir -p $OUTPUTDIR
echo "Starting to train cased, batch size {bs}, epochs {epoch} at $(date)"
time python -m torch.distributed.launch --nnodes 1 --nproc_per_node 4 --master_addr 127.0.0.1 --master_port {port} \
    evaluate_glue.py --overwrite_output_dir \\
        --no_pad_to_max_length \\
        --dataloader_num_workers 4 \\
        --model_name_or_path ../models/matbert-base-{cased}/ {lower_case}\\
        --per_device_train_batch_size {bs} \\
        --num_train_epochs {epoch} \\
        --output_dir $OUTPUTDIR \\
        --logging_dir $OUTPUTDIR/run \\
        --task_name rte \\
        --do_train --do_eval --do_predict \\
        --load_best_model_at_end \\
        --metric_for_best_model "accuracy" \\
        --seed 42 \\
        --evaluation_strategy 'epoch' > $OUTPUTDIR/file.log 2>&1 
echo "Finished at $(date)"
"""

In [162]:
base_port = 18400
i = 0
for bs in [2, 4, 8, 16]:
    epoch = 5
    for cased in ['cased', 'uncased']:
        lower_case = '--no_do_lower_case ' if cased == 'cased' else ''
        script = template.format(bs=bs, epoch=epoch, cased=cased, lower_case=lower_case, port=base_port+i)
        i += 1

        fn = 'script_rte_{cased}_bs{bs}_epoch{epoch}.sh'.format(bs=bs, epoch=epoch, cased=cased)
        with open(fn, 'w') as f:
            f.write(script)

## QQP

In [164]:
template = """#!/bin/bash
#SBATCH -N 1
#SBATCH -A m3624
#SBATCH -C gpu
#SBATCH -J qqp-{cased}-{bs}-{epoch}
#SBATCH -G 4
#SBATCH -c 20
#SBATCH --mail-user=haoyan.huo@lbl.gov
#SBATCH -t 3:59:59
#SBATCH --output=eval_qqp_{cased}_bs{bs}_epoch{epoch}.out
## DISABLED: SBATCH --mail-type=END,FAIL

module load cuda
source ~/miniconda3/etc/profile.d/conda.sh
conda activate bert

export HF_HOME=$SCRATCH/cache/huggingface
export TOKENIZERS_PARALLELISM=false

export OUTPUTDIR=$SCRATCH/glue_eval/qqp_{cased}_bs{bs}_epoch{epoch}
mkdir -p $OUTPUTDIR
echo "Starting to train cased, batch size {bs}, epochs {epoch} at $(date)"
time python -m torch.distributed.launch --nnodes 1 --nproc_per_node 4 --master_addr 127.0.0.1 --master_port {port} \
    evaluate_glue.py --overwrite_output_dir \\
        --no_pad_to_max_length \\
        --dataloader_num_workers 4 \\
        --model_name_or_path ../models/matbert-base-{cased}/ {lower_case}\\
        --per_device_train_batch_size {bs} \\
        --num_train_epochs {epoch} \\
        --output_dir $OUTPUTDIR \\
        --logging_dir $OUTPUTDIR/run \\
        --task_name qqp \\
        --do_train --do_eval --do_predict \\
        --load_best_model_at_end \\
        --metric_for_best_model "combined_score" \\
        --seed 42 \\
        --evaluation_strategy 'epoch' > $OUTPUTDIR/file.log 2>&1 
echo "Finished at $(date)"
"""

In [165]:
base_port = 18500
i = 0
for bs in [8, 16]:
    epoch = 5
    for cased in ['cased', 'uncased']:
        lower_case = '--no_do_lower_case ' if cased == 'cased' else ''
        script = template.format(bs=bs, epoch=epoch, cased=cased, lower_case=lower_case, port=base_port+i)
        i += 1

        fn = 'script_qqp_{cased}_bs{bs}_epoch{epoch}.sh'.format(bs=bs, epoch=epoch, cased=cased)
        with open(fn, 'w') as f:
            f.write(script)

## SST2

In [166]:
template = """#!/bin/bash
#SBATCH -N 1
#SBATCH -A m3624
#SBATCH -C gpu
#SBATCH -J sst2-{cased}-{bs}-{epoch}
#SBATCH -G 4
#SBATCH -c 20
#SBATCH --mail-user=haoyan.huo@lbl.gov
#SBATCH -t 3:59:59
#SBATCH --output=eval_sst2_{cased}_bs{bs}_epoch{epoch}.out
## DISABLED: SBATCH --mail-type=END,FAIL

module load cuda
source ~/miniconda3/etc/profile.d/conda.sh
conda activate bert

export HF_HOME=$SCRATCH/cache/huggingface
export TOKENIZERS_PARALLELISM=false

export OUTPUTDIR=$SCRATCH/glue_eval/sst2_{cased}_bs{bs}_epoch{epoch}
mkdir -p $OUTPUTDIR
echo "Starting to train cased, batch size {bs}, epochs {epoch} at $(date)"
time python -m torch.distributed.launch --nnodes 1 --nproc_per_node 4 --master_addr 127.0.0.1 --master_port {port} \
    evaluate_glue.py --overwrite_output_dir \\
        --no_pad_to_max_length \\
        --dataloader_num_workers 4 \\
        --model_name_or_path ../models/matbert-base-{cased}/ {lower_case}\\
        --per_device_train_batch_size {bs} \\
        --num_train_epochs {epoch} \\
        --output_dir $OUTPUTDIR \\
        --logging_dir $OUTPUTDIR/run \\
        --task_name sst2 \\
        --do_train --do_eval --do_predict \\
        --load_best_model_at_end \\
        --metric_for_best_model "accuracy" \\
        --seed 42 \\
        --evaluation_strategy 'epoch' > $OUTPUTDIR/file.log 2>&1 
echo "Finished at $(date)"
"""

In [167]:
base_port = 18600
i = 0
for bs in [2, 4, 8, 16]:
    epoch = 5
    for cased in ['cased', 'uncased']:
        lower_case = '--no_do_lower_case ' if cased == 'cased' else ''
        script = template.format(bs=bs, epoch=epoch, cased=cased, lower_case=lower_case, port=base_port+i)
        i += 1

        fn = 'script_sst2_{cased}_bs{bs}_epoch{epoch}.sh'.format(bs=bs, epoch=epoch, cased=cased)
        with open(fn, 'w') as f:
            f.write(script)