From 4423683d4e554421ebbace4c75557e00e824b741 Mon Sep 17 00:00:00 2001 From: kumapo Date: Fri, 29 Sep 2023 13:28:10 +0900 Subject: [PATCH] evaluate line-corporation large models (#81) --- .../harness.sh | 3 + .../result.json | 71 +++++++++++++++++++ .../harness.sh | 3 + .../result.json | 71 +++++++++++++++++++ .../harness.sh | 3 + .../result.json | 71 +++++++++++++++++++ .../harness.sh | 3 + .../result.json | 71 +++++++++++++++++++ 8 files changed, 296 insertions(+) create mode 100644 models/line-corporation/line-corporation-japanese-large-lm-1.7b-instruction-sft/harness.sh create mode 100644 models/line-corporation/line-corporation-japanese-large-lm-1.7b-instruction-sft/result.json create mode 100644 models/line-corporation/line-corporation-japanese-large-lm-1.7b/harness.sh create mode 100644 models/line-corporation/line-corporation-japanese-large-lm-1.7b/result.json create mode 100644 models/line-corporation/line-corporation-japanese-large-lm-3.6b-instruction-sft/harness.sh create mode 100644 models/line-corporation/line-corporation-japanese-large-lm-3.6b-instruction-sft/result.json create mode 100644 models/line-corporation/line-corporation-japanese-large-lm-3.6b/harness.sh create mode 100644 models/line-corporation/line-corporation-japanese-large-lm-3.6b/result.json diff --git a/models/line-corporation/line-corporation-japanese-large-lm-1.7b-instruction-sft/harness.sh b/models/line-corporation/line-corporation-japanese-large-lm-1.7b-instruction-sft/harness.sh new file mode 100644 index 0000000000..9d5d79f544 --- /dev/null +++ b/models/line-corporation/line-corporation-japanese-large-lm-1.7b-instruction-sft/harness.sh @@ -0,0 +1,3 @@ +MODEL_ARGS="pretrained=line-corporation/japanese-large-lm-1.7b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto" +TASK="jcommonsenseqa-1.1-0.5,jnli-1.1-0.5,marc_ja-1.1-0.5,jsquad-1.1-0.5,jaqket_v2-0.2-0.5,xlsum_ja-1.0-0.5,xwinograd_ja,mgsm-1.0-0.5" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/line-corporation/line-corporation-japanese-large-lm-1.7b-instruction-sft/result.json" diff --git a/models/line-corporation/line-corporation-japanese-large-lm-1.7b-instruction-sft/result.json b/models/line-corporation/line-corporation-japanese-large-lm-1.7b-instruction-sft/result.json new file mode 100644 index 0000000000..220d66b235 --- /dev/null +++ b/models/line-corporation/line-corporation-japanese-large-lm-1.7b-instruction-sft/result.json @@ -0,0 +1,71 @@ +{ + "results": { + "jcommonsenseqa-1.1-0.5": { + "acc": 0.22430741733690795, + "acc_stderr": 0.012475148816050531, + "acc_norm": 0.23681858802502234, + "acc_norm_stderr": 0.01271454677969028 + }, + "jnli-1.1-0.5": { + "acc": 0.34346754313886607, + "acc_stderr": 0.009627197865307401, + "acc_norm": 0.3011503697617091, + "acc_norm_stderr": 0.009300633175085522 + }, + "marc_ja-1.1-0.5": { + "acc": 0.8036788114609126, + "acc_stderr": 0.005283057698929343, + "acc_norm": 0.8036788114609126, + "acc_norm_stderr": 0.005283057698929343 + }, + "xwinograd_ja": { + "acc": 0.6329509906152242, + "acc_stderr": 0.015572714283682185 + }, + "jsquad-1.1-0.5": { + "exact_match": 30.977037370553806, + "f1": 48.12415333506568 + }, + "jaqket_v2-0.2-0.5": { + "exact_match": 25.257731958762886, + "f1": 40.58191140665372 + }, + "xlsum_ja-1.0-0.5": { + "rouge2": 1.0385441084792033 + }, + "mgsm-1.0-0.5": { + "acc": 0.016, + "acc_stderr": 0.007951661188874354 + } + }, + "versions": { + "jcommonsenseqa-1.1-0.5": 1.1, + "jnli-1.1-0.5": 1.1, + "marc_ja-1.1-0.5": 1.1, + "jsquad-1.1-0.5": 1.1, + "jaqket_v2-0.2-0.5": 0.2, + "xlsum_ja-1.0-0.5": 1.0, + "xwinograd_ja": 1.0, + "mgsm-1.0-0.5": 1.0 + }, + "config": { + "model": "hf-causal", + "model_args": "pretrained=line-corporation/japanese-large-lm-1.7b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto", + "num_fewshot": [ + 3, + 3, + 3, + 2, + 1, + 1, + 0, + 5 + ], + "batch_size": null, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/models/line-corporation/line-corporation-japanese-large-lm-1.7b/harness.sh b/models/line-corporation/line-corporation-japanese-large-lm-1.7b/harness.sh new file mode 100644 index 0000000000..37dcae95fb --- /dev/null +++ b/models/line-corporation/line-corporation-japanese-large-lm-1.7b/harness.sh @@ -0,0 +1,3 @@ +MODEL_ARGS="pretrained=line-corporation/japanese-large-lm-1.7b,use_fast=False,device_map=auto,torch_dtype=auto" +TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.2-0.2,xlsum_ja,xwinograd_ja,mgsm" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/line-corporation/line-corporation-japanese-large-lm-1.7b/result.json" \ No newline at end of file diff --git a/models/line-corporation/line-corporation-japanese-large-lm-1.7b/result.json b/models/line-corporation/line-corporation-japanese-large-lm-1.7b/result.json new file mode 100644 index 0000000000..84b5b920d8 --- /dev/null +++ b/models/line-corporation/line-corporation-japanese-large-lm-1.7b/result.json @@ -0,0 +1,71 @@ +{ + "results": { + "jcommonsenseqa-1.1-0.2": { + "acc": 0.30831099195710454, + "acc_stderr": 0.013811124479483034, + "acc_norm": 0.26005361930294907, + "acc_norm_stderr": 0.013119300343161644 + }, + "jnli-1.1-0.2": { + "acc": 0.35949055053410023, + "acc_stderr": 0.009728266419780814, + "acc_norm": 0.300328677074774, + "acc_norm_stderr": 0.00929339473482123 + }, + "marc_ja-1.1-0.2": { + "acc": 0.745136186770428, + "acc_stderr": 0.005796054001130057, + "acc_norm": 0.745136186770428, + "acc_norm_stderr": 0.005796054001130057 + }, + "xwinograd_ja": { + "acc": 0.6611053180396246, + "acc_stderr": 0.015292727421996942 + }, + "jsquad-1.1-0.2": { + "exact_match": 56.55110310670869, + "f1": 69.46989310703984 + }, + "jaqket_v2-0.2-0.2": { + "exact_match": 52.06185567010309, + "f1": 60.433303332787865 + }, + "xlsum_ja": { + "rouge2": 8.408787633129647 + }, + "mgsm": { + "acc": 0.0, + "acc_stderr": 0.0 + } + }, + "versions": { + "jcommonsenseqa-1.1-0.2": 1.1, + "jnli-1.1-0.2": 1.1, + "marc_ja-1.1-0.2": 1.1, + "jsquad-1.1-0.2": 1.1, + "jaqket_v2-0.2-0.2": 0.2, + "xlsum_ja": 1.0, + "xwinograd_ja": 1.0, + "mgsm": 1.0 + }, + "config": { + "model": "hf-causal", + "model_args": "pretrained=line-corporation/japanese-large-lm-1.7b,use_fast=False,device_map=auto,torch_dtype=auto", + "num_fewshot": [ + 3, + 3, + 3, + 2, + 1, + 1, + 0, + 5 + ], + "batch_size": null, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/models/line-corporation/line-corporation-japanese-large-lm-3.6b-instruction-sft/harness.sh b/models/line-corporation/line-corporation-japanese-large-lm-3.6b-instruction-sft/harness.sh new file mode 100644 index 0000000000..d3f2377290 --- /dev/null +++ b/models/line-corporation/line-corporation-japanese-large-lm-3.6b-instruction-sft/harness.sh @@ -0,0 +1,3 @@ +MODEL_ARGS="pretrained=line-corporation/japanese-large-lm-3.6b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto" +TASK="jcommonsenseqa-1.1-0.5,jnli-1.1-0.5,marc_ja-1.1-0.5,jsquad-1.1-0.5,jaqket_v2-0.2-0.5,xlsum_ja-1.0-0.5,xwinograd_ja,mgsm-1.0-0.5" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/line-corporation/line-corporation-japanese-large-lm-3.6b-instruction-sft/result.json" diff --git a/models/line-corporation/line-corporation-japanese-large-lm-3.6b-instruction-sft/result.json b/models/line-corporation/line-corporation-japanese-large-lm-3.6b-instruction-sft/result.json new file mode 100644 index 0000000000..14b7b87c65 --- /dev/null +++ b/models/line-corporation/line-corporation-japanese-large-lm-3.6b-instruction-sft/result.json @@ -0,0 +1,71 @@ +{ + "results": { + "jcommonsenseqa-1.1-0.5": { + "acc": 0.3360142984807864, + "acc_stderr": 0.014126590011265207, + "acc_norm": 0.26720285969615726, + "acc_norm_stderr": 0.013234012242081952 + }, + "jnli-1.1-0.5": { + "acc": 0.4256368118323747, + "acc_stderr": 0.010024017935515625, + "acc_norm": 0.3019720624486442, + "acc_norm_stderr": 0.009307836171755053 + }, + "marc_ja-1.1-0.5": { + "acc": 0.5509373894587902, + "acc_stderr": 0.006615536639080702, + "acc_norm": 0.5509373894587902, + "acc_norm_stderr": 0.006615536639080702 + }, + "xwinograd_ja": { + "acc": 0.6465067778936392, + "acc_stderr": 0.015445228301221386 + }, + "jsquad-1.1-0.5": { + "exact_match": 44.371904547501124, + "f1": 59.516773934435584 + }, + "jaqket_v2-0.2-0.5": { + "exact_match": 39.86254295532646, + "f1": 51.98299576521227 + }, + "xlsum_ja-1.0-0.5": { + "rouge2": 6.577976426409143 + }, + "mgsm-1.0-0.5": { + "acc": 0.024, + "acc_stderr": 0.009699087026964249 + } + }, + "versions": { + "jcommonsenseqa-1.1-0.5": 1.1, + "jnli-1.1-0.5": 1.1, + "marc_ja-1.1-0.5": 1.1, + "jsquad-1.1-0.5": 1.1, + "jaqket_v2-0.2-0.5": 0.2, + "xlsum_ja-1.0-0.5": 1.0, + "xwinograd_ja": 1.0, + "mgsm-1.0-0.5": 1.0 + }, + "config": { + "model": "hf-causal", + "model_args": "pretrained=line-corporation/japanese-large-lm-3.6b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto", + "num_fewshot": [ + 3, + 3, + 3, + 2, + 1, + 1, + 0, + 5 + ], + "batch_size": null, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/models/line-corporation/line-corporation-japanese-large-lm-3.6b/harness.sh b/models/line-corporation/line-corporation-japanese-large-lm-3.6b/harness.sh new file mode 100644 index 0000000000..b63d3761f2 --- /dev/null +++ b/models/line-corporation/line-corporation-japanese-large-lm-3.6b/harness.sh @@ -0,0 +1,3 @@ +MODEL_ARGS="pretrained=line-corporation/japanese-large-lm-3.6b,use_fast=False,device_map=auto,torch_dtype=auto" +TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.2-0.2,xlsum_ja,xwinograd_ja,mgsm" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/line-corporation/line-corporation-japanese-large-lm-3.6b/result.json" diff --git a/models/line-corporation/line-corporation-japanese-large-lm-3.6b/result.json b/models/line-corporation/line-corporation-japanese-large-lm-3.6b/result.json new file mode 100644 index 0000000000..d670e23651 --- /dev/null +++ b/models/line-corporation/line-corporation-japanese-large-lm-3.6b/result.json @@ -0,0 +1,71 @@ +{ + "results": { + "jcommonsenseqa-1.1-0.2": { + "acc": 0.24039320822162646, + "acc_stderr": 0.01278011066769292, + "acc_norm": 0.2421805183199285, + "acc_norm_stderr": 0.0128124322893179 + }, + "jnli-1.1-0.2": { + "acc": 0.29950698438783896, + "acc_stderr": 0.009286120768078254, + "acc_norm": 0.30156121610517667, + "acc_norm_stderr": 0.009304239098715018 + }, + "marc_ja-1.1-0.2": { + "acc": 0.7939511850017686, + "acc_stderr": 0.005379506895071017, + "acc_norm": 0.7939511850017686, + "acc_norm_stderr": 0.005379506895071017 + }, + "xwinograd_ja": { + "acc": 0.7028154327424401, + "acc_stderr": 0.014765597190000436 + }, + "jsquad-1.1-0.2": { + "exact_match": 62.26924808644755, + "f1": 74.52057820837234 + }, + "jaqket_v2-0.2-0.2": { + "exact_match": 67.18213058419244, + "f1": 74.29659878113482 + }, + "xlsum_ja": { + "rouge2": 8.610239752200977 + }, + "mgsm": { + "acc": 0.028, + "acc_stderr": 0.010454721651927288 + } + }, + "versions": { + "jcommonsenseqa-1.1-0.2": 1.1, + "jnli-1.1-0.2": 1.1, + "marc_ja-1.1-0.2": 1.1, + "jsquad-1.1-0.2": 1.1, + "jaqket_v2-0.2-0.2": 0.2, + "xlsum_ja": 1.0, + "xwinograd_ja": 1.0, + "mgsm": 1.0 + }, + "config": { + "model": "hf-causal", + "model_args": "pretrained=line-corporation/japanese-large-lm-3.6b,use_fast=False,device_map=auto,torch_dtype=auto", + "num_fewshot": [ + 3, + 3, + 3, + 2, + 1, + 1, + 0, + 5 + ], + "batch_size": null, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file