Skip to content

Commit

Permalink
fix eval/language tags (#66)
Browse files Browse the repository at this point in the history
* fix eval/language tag for ChatEval paper

* fix eval/language tag for ChatEval paper and u-BLEU

---------

Co-authored-by: XuhuiZhou <zhouxuhui2018@gmail.com>
  • Loading branch information
zhenwu0831 and XuhuiZhou committed May 25, 2024
1 parent 26496cb commit 8725972
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 58 deletions.
100 changes: 50 additions & 50 deletions components/data/chartData.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export const bar_data = [
agents_with_personas: 24,
human: 92,
not_applicable: 148,
rule_based: 60,
rule_based: 58,
more_than_three_agents: 35,
more_information_asymmetrical: 2,
prompting_and_in_context_learning: 47,
Expand All @@ -25,7 +25,7 @@ export const bar_data = [
agents_with_memory: 16,
more_omniscient: 7,
pretraining: 18,
model_based: 45,
model_based: 47,
simulated_humans: 14,
agent_teams: 6,
health: 18,
Expand Down Expand Up @@ -133,28 +133,28 @@ export const area_data = [
rule_based: 1,
human_agent: 1,
more_than_three_agents: 0,
competition: 0,
finetuning: 0,
more_information_asymmetrical: 0,
more_omniscient: 0,
fully_omniscient: 0,
embodied: 0,
qualitative: 0,
not_applicable: 0,
pretraining: 0,
health: 0,
policy: 0,
two_agents: 0,
text: 0,
implicit_objectives: 0,
agent_teams: 0,
pretraining: 0,
agents_with_personas: 0,
text: 0,
model_based: 0,
simulated_humans: 0,
education: 0,
embodied: 0,
competition: 0,
virtual: 0,
implicit_objectives: 0,
fully_omniscient: 0,
not_applicable: 0,
qualitative: 0,
prompting_and_in_context_learning: 0,
more_information_asymmetrical: 0,
policy: 0,
agents_with_memory: 0,
two_agents: 0,
model_based: 0,
more_omniscient: 0,
finetuning: 0,
education: 0,
},
{
name: '2016',
Expand All @@ -176,18 +176,18 @@ export const area_data = [
more_than_three_agents: 1,
model_based: 1,
education: 1,
more_information_asymmetrical: 0,
more_omniscient: 0,
fully_omniscient: 0,
not_applicable: 0,
pretraining: 0,
health: 0,
policy: 0,
implicit_objectives: 0,
agent_teams: 0,
pretraining: 0,
simulated_humans: 0,
implicit_objectives: 0,
fully_omniscient: 0,
not_applicable: 0,
prompting_and_in_context_learning: 0,
more_information_asymmetrical: 0,
policy: 0,
agents_with_memory: 0,
more_omniscient: 0,
},
{
name: '2017',
Expand All @@ -205,22 +205,22 @@ export const area_data = [
qualitative: 1,
human: 1,
more_than_three_agents: 0,
finetuning: 0,
more_information_asymmetrical: 0,
more_omniscient: 0,
fully_omniscient: 0,
embodied: 0,
pretraining: 0,
health: 0,
policy: 0,
implicit_objectives: 0,
agent_teams: 0,
pretraining: 0,
agents_with_personas: 0,
collaboration: 0,
model_based: 0,
simulated_humans: 0,
education: 0,
embodied: 0,
implicit_objectives: 0,
fully_omniscient: 0,
prompting_and_in_context_learning: 0,
more_information_asymmetrical: 0,
policy: 0,
model_based: 0,
more_omniscient: 0,
finetuning: 0,
collaboration: 0,
education: 0,
},
{
name: '2018',
Expand Down Expand Up @@ -250,10 +250,10 @@ export const area_data = [
model_based: 1,
education: 1,
more_omniscient: 1,
agent_teams: 0,
pretraining: 0,
more_information_asymmetrical: 0,
pretraining: 0,
policy: 0,
agent_teams: 0,
},
{
name: '2019',
Expand All @@ -279,24 +279,24 @@ export const area_data = [
agent_teams: 1,
model_based: 1,
health: 1,
pretraining: 0,
simulated_humans: 0,
education: 0,
fully_omniscient: 0,
more_information_asymmetrical: 0,
more_omniscient: 0,
fully_omniscient: 0,
pretraining: 0,
policy: 0,
simulated_humans: 0,
agents_with_memory: 0,
more_omniscient: 0,
education: 0,
},
{
name: '2020',
text: 18,
mixed_objectives: 9,
more_than_three_agents: 6,
model_based: 10,
model_based: 11,
human_agent: 11,
collaboration: 5,
rule_based: 17,
rule_based: 16,
not_applicable: 25,
finetuning: 6,
agents_with_personas: 1,
Expand All @@ -317,8 +317,8 @@ export const area_data = [
policy: 1,
education: 2,
more_omniscient: 2,
agent_teams: 0,
more_information_asymmetrical: 0,
agent_teams: 0,
agents_with_memory: 0,
},
{
Expand Down Expand Up @@ -349,9 +349,9 @@ export const area_data = [
more_omniscient: 1,
agents_with_personas: 1,
prompting_and_in_context_learning: 1,
pretraining: 0,
fully_omniscient: 0,
more_information_asymmetrical: 0,
fully_omniscient: 0,
pretraining: 0,
agents_with_memory: 0,
},
{
Expand Down Expand Up @@ -384,16 +384,16 @@ export const area_data = [
education: 6,
policy: 2,
more_omniscient: 2,
fully_omniscient: 0,
more_information_asymmetrical: 0,
fully_omniscient: 0,
},
{
name: '2023',
collaboration: 25,
embodied: 25,
prompting_and_in_context_learning: 55,
more_than_three_agents: 21,
rule_based: 69,
rule_based: 68,
not_applicable: 81,
text: 53,
implicit_objectives: 18,
Expand All @@ -413,7 +413,7 @@ export const area_data = [
pretraining: 7,
agent_teams: 10,
agents_with_personas: 8,
model_based: 17,
model_based: 18,
fully_omniscient: 1,
health: 7,
policy: 2,
Expand Down
8 changes: 4 additions & 4 deletions components/papers.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2246,10 +2246,10 @@ export const data: Paper[] = [
date: "07/2020",
environments: "text",
agents: "n/a",
evaluation: "rule_based",
evaluation: "model_based",
other: "human_agent",
url: "https://aclanthology.org/2020.acl-srw.27",
bibtex: "@inproceedings{tsuta-etal-2020-ubleu,\n title = \"u{BLEU}: Uncertainty-Aware Automatic Evaluation Method for Open-Domain Dialogue Systems\",\n author = \"Tsuta, Yuma and\n Yoshinaga, Naoki and\n Toyoda, Masashi\",\n editor = \"Rijhwani, Shruti and\n Liu, Jiangming and\n Wang, Yizhong and\n Dror, Rotem\",\n booktitle = \"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop\",\n month = jul,\n year = \"2020\",\n address = \"Online\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://aclanthology.org/2020.acl-srw.27\",\n doi = \"10.18653/v1/2020.acl-srw.27\",\n pages = \"199--206\",\n environments = {text},\n agents = {n/a},\n evaluation = {rule_based},\n other = {human_agent}\n}",
bibtex: "@inproceedings{tsuta-etal-2020-ubleu,\n title = \"u{BLEU}: Uncertainty-Aware Automatic Evaluation Method for Open-Domain Dialogue Systems\",\n author = \"Tsuta, Yuma and\n Yoshinaga, Naoki and\n Toyoda, Masashi\",\n editor = \"Rijhwani, Shruti and\n Liu, Jiangming and\n Wang, Yizhong and\n Dror, Rotem\",\n booktitle = \"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop\",\n month = jul,\n year = \"2020\",\n address = \"Online\",\n publisher = \"Association for Computational Linguistics\",\n url = \"https://aclanthology.org/2020.acl-srw.27\",\n doi = \"10.18653/v1/2020.acl-srw.27\",\n pages = \"199--206\",\n environments = {text},\n agents = {n/a},\n evaluation = {model_based},\n other = {human_agent}\n}",
authors: "Tsuta et al.",
subsection: "evaluation/language",
},
Expand Down Expand Up @@ -2428,10 +2428,10 @@ export const data: Paper[] = [
date: "08/2023",
environments: "collaboration, text",
agents: "prompting_and_in_context_learning, more_than_three_agents",
evaluation: "rule_based",
evaluation: "model_based",
other: "n/a",
url: "https://arxiv.org/abs/2308.07201",
bibtex: "@misc{chan2023chateval,\n title={ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate}, \n author={Chi-Min Chan and Weize Chen and Yusheng Su and Jianxuan Yu and Wei Xue and Shanghang Zhang and Jie Fu and Zhiyuan Liu},\n year={2023},\n month={8},\n eprint={2308.07201},\n archivePrefix={arXiv},\n primaryClass={cs.CL},\n url={https://arxiv.org/abs/2308.07201},\n environments = {collaboration, text},\n agents = {prompting_and_in_context_learning, more_than_three_agents},\n evaluation = {rule_based},\n other = {n/a}\n}",
bibtex: "@misc{chan2023chateval,\n title={ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate}, \n author={Chi-Min Chan and Weize Chen and Yusheng Su and Jianxuan Yu and Wei Xue and Shanghang Zhang and Jie Fu and Zhiyuan Liu},\n year={2023},\n month={8},\n eprint={2308.07201},\n archivePrefix={arXiv},\n primaryClass={cs.CL},\n url={https://arxiv.org/abs/2308.07201},\n environments = {collaboration, text},\n agents = {prompting_and_in_context_learning, more_than_three_agents},\n evaluation = {model_based},\n other = {n/a}\n}",
authors: "Chan et al.",
subsection: "evaluation/language",
},
Expand Down
4 changes: 2 additions & 2 deletions docs/paper_table.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@
| [AgentCF: Collaborative Learning with Autonomous Language Agents for Recommender Systems](https://arxiv.org/abs/2310.09233) | 10, 2023 | ['mixed_objectives', 'text'] | ['prompting_and_in_context_learning', 'more_than_three_agents', 'agents_with_memory', 'agents_with_personas'] | ['rule_based'] | ['simulated_humans'] |
| [Approximating Online Human Evaluation of Social Chatbots with Prompting](https://aclanthology.org/2023.sigdial-1.25) | 9, 2023 | ['mixed_objectives', 'text'] | ['prompting_and_in_context_learning', 'two_agents'] | ['model_based'] | ['n/a'] |
| [CharacterChat: Learning towards Conversational AI with Personalized Social Support](https://arxiv.org/abs/2308.10278) | 08, 2023 | ['implicit_objectives', 'text'] | ['prompting_and_in_context_learning', 'two_agents', 'agents_with_memory', 'agents_with_personas'] | ['model_based', 'human'] | ['simulated_humans'] |
| [ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate](https://arxiv.org/abs/2308.07201) | 08, 2023 | ['collaboration', 'text'] | ['prompting_and_in_context_learning', 'more_than_three_agents'] | ['rule_based'] | ['n/a'] |
| [ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate](https://arxiv.org/abs/2308.07201) | 08, 2023 | ['collaboration', 'text'] | ['prompting_and_in_context_learning', 'more_than_three_agents'] | ['model_based'] | ['n/a'] |
| [Don{'}t Forget Your {ABC}{'}s: Evaluating the State-of-the-Art in Chat-Oriented Dialogue Systems](https://aclanthology.org/2023.acl-long.839) | 7, 2023 | ['text'] | ['n/a'] | ['human'] | ['human_agent'] |
| [PersonaLLM: Investigating the Ability of Large Language Models to Express Personality Traits](https://api.semanticscholar.org/CorpusID:268032940) | 5, 2023 | ['text'] | ['prompting_and_in_context_learning'] | ['human', 'model_based'] | ['n/a'] |
| [Psychological Metrics for Dialog System Evaluation](https://arxiv.org/abs/2305.14757) | 05, 2023 | ['text'] | ['two_agents'] | ['human', 'rule_based'] | ['human_agent'] |
Expand All @@ -194,7 +194,7 @@
| [Survey on evaluation methods for dialogue systems](https://link.springer.com/article/10.1007/s10462-020-09866-x) | 1, 2021 | ['text'] | ['n/a'] | ['rule_based', 'model_based'] | ['human_agent'] |
| [{GRADE}: Automatic Graph-Enhanced Coherence Metric for Evaluating Open-Domain Dialogue Systems](https://aclanthology.org/2020.emnlp-main.742) | 11, 2020 | ['text'] | ['n/a'] | ['human', 'model_based'] | ['n/a'] |
| [Towards Unified Dialogue System Evaluation: A Comprehensive Analysis of Current Evaluation Protocols](https://aclanthology.org/2020.sigdial-1.29) | 7, 2020 | ['text'] | ['n/a'] | ['human', 'rule_based', 'model_based'] | ['human_agent'] |
| [u{BLEU}: Uncertainty-Aware Automatic Evaluation Method for Open-Domain Dialogue Systems](https://aclanthology.org/2020.acl-srw.27) | 7, 2020 | ['text'] | ['n/a'] | ['rule_based'] | ['human_agent'] |
| [u{BLEU}: Uncertainty-Aware Automatic Evaluation Method for Open-Domain Dialogue Systems](https://aclanthology.org/2020.acl-srw.27) | 7, 2020 | ['text'] | ['n/a'] | ['model_based'] | ['human_agent'] |
| [{C}onvo{K}it: A Toolkit for the Analysis of Conversations](https://aclanthology.org/2020.sigdial-1.8) | 7, 2020 | ['text'] | ['n/a'] | ['human', 'model_based', 'rule_based'] | ['n/a'] |
| [Unsupervised Evaluation of Interactive Dialog with {D}ialo{GPT}](https://aclanthology.org/2020.sigdial-1.28) | 7, 2020 | ['text'] | ['n/a'] | ['human', 'model_based'] | ['n/a'] |
| [Embodied LLM Agents Learn to Cooperate in Organized Teams](https://arxiv.org/abs/2403.12482) | 03, 2024 | ['collaboration', 'embodied'] | ['prompting_and_in_context_learning', 'more_than_three_agents'] | ['model_based', 'human'] | ['education'] |
Expand Down
Loading

0 comments on commit 8725972

Please sign in to comment.