Skip to content

Commit

Permalink
evaluate line-corporation large models (Stability-AI#81)
Browse files Browse the repository at this point in the history
  • Loading branch information
kumapo authored and polm committed Oct 11, 2023
1 parent 42491ae commit 4423683
Show file tree
Hide file tree
Showing 8 changed files with 296 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=line-corporation/japanese-large-lm-1.7b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto"
TASK="jcommonsenseqa-1.1-0.5,jnli-1.1-0.5,marc_ja-1.1-0.5,jsquad-1.1-0.5,jaqket_v2-0.2-0.5,xlsum_ja-1.0-0.5,xwinograd_ja,mgsm-1.0-0.5"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/line-corporation/line-corporation-japanese-large-lm-1.7b-instruction-sft/result.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"results": {
"jcommonsenseqa-1.1-0.5": {
"acc": 0.22430741733690795,
"acc_stderr": 0.012475148816050531,
"acc_norm": 0.23681858802502234,
"acc_norm_stderr": 0.01271454677969028
},
"jnli-1.1-0.5": {
"acc": 0.34346754313886607,
"acc_stderr": 0.009627197865307401,
"acc_norm": 0.3011503697617091,
"acc_norm_stderr": 0.009300633175085522
},
"marc_ja-1.1-0.5": {
"acc": 0.8036788114609126,
"acc_stderr": 0.005283057698929343,
"acc_norm": 0.8036788114609126,
"acc_norm_stderr": 0.005283057698929343
},
"xwinograd_ja": {
"acc": 0.6329509906152242,
"acc_stderr": 0.015572714283682185
},
"jsquad-1.1-0.5": {
"exact_match": 30.977037370553806,
"f1": 48.12415333506568
},
"jaqket_v2-0.2-0.5": {
"exact_match": 25.257731958762886,
"f1": 40.58191140665372
},
"xlsum_ja-1.0-0.5": {
"rouge2": 1.0385441084792033
},
"mgsm-1.0-0.5": {
"acc": 0.016,
"acc_stderr": 0.007951661188874354
}
},
"versions": {
"jcommonsenseqa-1.1-0.5": 1.1,
"jnli-1.1-0.5": 1.1,
"marc_ja-1.1-0.5": 1.1,
"jsquad-1.1-0.5": 1.1,
"jaqket_v2-0.2-0.5": 0.2,
"xlsum_ja-1.0-0.5": 1.0,
"xwinograd_ja": 1.0,
"mgsm-1.0-0.5": 1.0
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=line-corporation/japanese-large-lm-1.7b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto",
"num_fewshot": [
3,
3,
3,
2,
1,
1,
0,
5
],
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=line-corporation/japanese-large-lm-1.7b,use_fast=False,device_map=auto,torch_dtype=auto"
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.2-0.2,xlsum_ja,xwinograd_ja,mgsm"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/line-corporation/line-corporation-japanese-large-lm-1.7b/result.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"results": {
"jcommonsenseqa-1.1-0.2": {
"acc": 0.30831099195710454,
"acc_stderr": 0.013811124479483034,
"acc_norm": 0.26005361930294907,
"acc_norm_stderr": 0.013119300343161644
},
"jnli-1.1-0.2": {
"acc": 0.35949055053410023,
"acc_stderr": 0.009728266419780814,
"acc_norm": 0.300328677074774,
"acc_norm_stderr": 0.00929339473482123
},
"marc_ja-1.1-0.2": {
"acc": 0.745136186770428,
"acc_stderr": 0.005796054001130057,
"acc_norm": 0.745136186770428,
"acc_norm_stderr": 0.005796054001130057
},
"xwinograd_ja": {
"acc": 0.6611053180396246,
"acc_stderr": 0.015292727421996942
},
"jsquad-1.1-0.2": {
"exact_match": 56.55110310670869,
"f1": 69.46989310703984
},
"jaqket_v2-0.2-0.2": {
"exact_match": 52.06185567010309,
"f1": 60.433303332787865
},
"xlsum_ja": {
"rouge2": 8.408787633129647
},
"mgsm": {
"acc": 0.0,
"acc_stderr": 0.0
}
},
"versions": {
"jcommonsenseqa-1.1-0.2": 1.1,
"jnli-1.1-0.2": 1.1,
"marc_ja-1.1-0.2": 1.1,
"jsquad-1.1-0.2": 1.1,
"jaqket_v2-0.2-0.2": 0.2,
"xlsum_ja": 1.0,
"xwinograd_ja": 1.0,
"mgsm": 1.0
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=line-corporation/japanese-large-lm-1.7b,use_fast=False,device_map=auto,torch_dtype=auto",
"num_fewshot": [
3,
3,
3,
2,
1,
1,
0,
5
],
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=line-corporation/japanese-large-lm-3.6b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto"
TASK="jcommonsenseqa-1.1-0.5,jnli-1.1-0.5,marc_ja-1.1-0.5,jsquad-1.1-0.5,jaqket_v2-0.2-0.5,xlsum_ja-1.0-0.5,xwinograd_ja,mgsm-1.0-0.5"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/line-corporation/line-corporation-japanese-large-lm-3.6b-instruction-sft/result.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"results": {
"jcommonsenseqa-1.1-0.5": {
"acc": 0.3360142984807864,
"acc_stderr": 0.014126590011265207,
"acc_norm": 0.26720285969615726,
"acc_norm_stderr": 0.013234012242081952
},
"jnli-1.1-0.5": {
"acc": 0.4256368118323747,
"acc_stderr": 0.010024017935515625,
"acc_norm": 0.3019720624486442,
"acc_norm_stderr": 0.009307836171755053
},
"marc_ja-1.1-0.5": {
"acc": 0.5509373894587902,
"acc_stderr": 0.006615536639080702,
"acc_norm": 0.5509373894587902,
"acc_norm_stderr": 0.006615536639080702
},
"xwinograd_ja": {
"acc": 0.6465067778936392,
"acc_stderr": 0.015445228301221386
},
"jsquad-1.1-0.5": {
"exact_match": 44.371904547501124,
"f1": 59.516773934435584
},
"jaqket_v2-0.2-0.5": {
"exact_match": 39.86254295532646,
"f1": 51.98299576521227
},
"xlsum_ja-1.0-0.5": {
"rouge2": 6.577976426409143
},
"mgsm-1.0-0.5": {
"acc": 0.024,
"acc_stderr": 0.009699087026964249
}
},
"versions": {
"jcommonsenseqa-1.1-0.5": 1.1,
"jnli-1.1-0.5": 1.1,
"marc_ja-1.1-0.5": 1.1,
"jsquad-1.1-0.5": 1.1,
"jaqket_v2-0.2-0.5": 0.2,
"xlsum_ja-1.0-0.5": 1.0,
"xwinograd_ja": 1.0,
"mgsm-1.0-0.5": 1.0
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=line-corporation/japanese-large-lm-3.6b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto",
"num_fewshot": [
3,
3,
3,
2,
1,
1,
0,
5
],
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=line-corporation/japanese-large-lm-3.6b,use_fast=False,device_map=auto,torch_dtype=auto"
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,jaqket_v2-0.2-0.2,xlsum_ja,xwinograd_ja,mgsm"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3,3,3,2,1,1,0,5" --device "cuda" --output_path "models/line-corporation/line-corporation-japanese-large-lm-3.6b/result.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"results": {
"jcommonsenseqa-1.1-0.2": {
"acc": 0.24039320822162646,
"acc_stderr": 0.01278011066769292,
"acc_norm": 0.2421805183199285,
"acc_norm_stderr": 0.0128124322893179
},
"jnli-1.1-0.2": {
"acc": 0.29950698438783896,
"acc_stderr": 0.009286120768078254,
"acc_norm": 0.30156121610517667,
"acc_norm_stderr": 0.009304239098715018
},
"marc_ja-1.1-0.2": {
"acc": 0.7939511850017686,
"acc_stderr": 0.005379506895071017,
"acc_norm": 0.7939511850017686,
"acc_norm_stderr": 0.005379506895071017
},
"xwinograd_ja": {
"acc": 0.7028154327424401,
"acc_stderr": 0.014765597190000436
},
"jsquad-1.1-0.2": {
"exact_match": 62.26924808644755,
"f1": 74.52057820837234
},
"jaqket_v2-0.2-0.2": {
"exact_match": 67.18213058419244,
"f1": 74.29659878113482
},
"xlsum_ja": {
"rouge2": 8.610239752200977
},
"mgsm": {
"acc": 0.028,
"acc_stderr": 0.010454721651927288
}
},
"versions": {
"jcommonsenseqa-1.1-0.2": 1.1,
"jnli-1.1-0.2": 1.1,
"marc_ja-1.1-0.2": 1.1,
"jsquad-1.1-0.2": 1.1,
"jaqket_v2-0.2-0.2": 0.2,
"xlsum_ja": 1.0,
"xwinograd_ja": 1.0,
"mgsm": 1.0
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=line-corporation/japanese-large-lm-3.6b,use_fast=False,device_map=auto,torch_dtype=auto",
"num_fewshot": [
3,
3,
3,
2,
1,
1,
0,
5
],
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}

0 comments on commit 4423683

Please sign in to comment.