fix eval/language tags (#66)

* fix eval/language tag for ChatEval paper * fix eval/language tag for ChatEval paper and u-BLEU --------- Co-authored-by: XuhuiZhou <zhouxuhui2018@gmail.com>
sotopia-lab · May 25, 2024 · 8725972 · 8725972
1 parent 26496cb
commit 8725972
Show file tree

Hide file tree

Showing 4 changed files with 58 additions and 58 deletions.
diff --git a/components/data/chartData.tsx b/components/data/chartData.tsx
@@ -14,7 +14,7 @@ export const bar_data = [
   agents_with_personas: 24,
   human: 92,
   not_applicable: 148,
-  rule_based: 60,
+  rule_based: 58,
   more_than_three_agents: 35,
   more_information_asymmetrical: 2,
   prompting_and_in_context_learning: 47,
@@ -25,7 +25,7 @@ export const bar_data = [
   agents_with_memory: 16,
   more_omniscient: 7,
   pretraining: 18,
-  model_based: 45,
+  model_based: 47,
   simulated_humans: 14,
   agent_teams: 6,
   health: 18,
@@ -133,28 +133,28 @@ export const area_data = [
   rule_based: 1,
   human_agent: 1,
   more_than_three_agents: 0,
+  competition: 0,
+  finetuning: 0,
+  more_information_asymmetrical: 0,
+  more_omniscient: 0,
+  fully_omniscient: 0,
+  embodied: 0,
+  qualitative: 0,
+  not_applicable: 0,
+  pretraining: 0,
   health: 0,
+  policy: 0,
+  two_agents: 0,
+  text: 0,
+  implicit_objectives: 0,
   agent_teams: 0,
-  pretraining: 0,
   agents_with_personas: 0,
-  text: 0,
+  model_based: 0,
   simulated_humans: 0,
-  education: 0,
-  embodied: 0,
-  competition: 0,
   virtual: 0,
-  implicit_objectives: 0,
-  fully_omniscient: 0,
-  not_applicable: 0,
-  qualitative: 0,
   prompting_and_in_context_learning: 0,
-  more_information_asymmetrical: 0,
-  policy: 0,
   agents_with_memory: 0,
-  two_agents: 0,
-  model_based: 0,
-  more_omniscient: 0,
-  finetuning: 0,
+  education: 0,
 },
 {
   name: '2016',
@@ -176,18 +176,18 @@ export const area_data = [
   more_than_three_agents: 1,
   model_based: 1,
   education: 1,
+  more_information_asymmetrical: 0,
+  more_omniscient: 0,
+  fully_omniscient: 0,
+  not_applicable: 0,
+  pretraining: 0,
   health: 0,
+  policy: 0,
+  implicit_objectives: 0,
   agent_teams: 0,
-  pretraining: 0,
   simulated_humans: 0,
-  implicit_objectives: 0,
-  fully_omniscient: 0,
-  not_applicable: 0,
   prompting_and_in_context_learning: 0,
-  more_information_asymmetrical: 0,
-  policy: 0,
   agents_with_memory: 0,
-  more_omniscient: 0,
 },
 {
   name: '2017',
@@ -205,22 +205,22 @@ export const area_data = [
   qualitative: 1,
   human: 1,
   more_than_three_agents: 0,
+  finetuning: 0,
+  more_information_asymmetrical: 0,
+  more_omniscient: 0,
+  fully_omniscient: 0,
+  embodied: 0,
+  pretraining: 0,
   health: 0,
+  policy: 0,
+  implicit_objectives: 0,
   agent_teams: 0,
-  pretraining: 0,
   agents_with_personas: 0,
-  collaboration: 0,
+  model_based: 0,
   simulated_humans: 0,
-  education: 0,
-  embodied: 0,
-  implicit_objectives: 0,
-  fully_omniscient: 0,
   prompting_and_in_context_learning: 0,
-  more_information_asymmetrical: 0,
-  policy: 0,
-  model_based: 0,
-  more_omniscient: 0,
-  finetuning: 0,
+  collaboration: 0,
+  education: 0,
 },
 {
   name: '2018',
@@ -250,10 +250,10 @@ export const area_data = [
   model_based: 1,
   education: 1,
   more_omniscient: 1,
-  agent_teams: 0,
-  pretraining: 0,
   more_information_asymmetrical: 0,
+  pretraining: 0,
   policy: 0,
+  agent_teams: 0,
 },
 {
   name: '2019',
@@ -279,24 +279,24 @@ export const area_data = [
   agent_teams: 1,
   model_based: 1,
   health: 1,
-  pretraining: 0,
-  simulated_humans: 0,
-  education: 0,
-  fully_omniscient: 0,
   more_information_asymmetrical: 0,
+  more_omniscient: 0,
+  fully_omniscient: 0,
+  pretraining: 0,
   policy: 0,
+  simulated_humans: 0,
   agents_with_memory: 0,
-  more_omniscient: 0,
+  education: 0,
 },
 {
   name: '2020',
   text: 18,
   mixed_objectives: 9,
   more_than_three_agents: 6,
-  model_based: 10,
+  model_based: 11,
   human_agent: 11,
   collaboration: 5,
-  rule_based: 17,
+  rule_based: 16,
   not_applicable: 25,
   finetuning: 6,
   agents_with_personas: 1,
@@ -317,8 +317,8 @@ export const area_data = [
   policy: 1,
   education: 2,
   more_omniscient: 2,
-  agent_teams: 0,
   more_information_asymmetrical: 0,
+  agent_teams: 0,
   agents_with_memory: 0,
 },
 {
@@ -349,9 +349,9 @@ export const area_data = [
   more_omniscient: 1,
   agents_with_personas: 1,
   prompting_and_in_context_learning: 1,
-  pretraining: 0,
-  fully_omniscient: 0,
   more_information_asymmetrical: 0,
+  fully_omniscient: 0,
+  pretraining: 0,
   agents_with_memory: 0,
 },
 {
@@ -384,16 +384,16 @@ export const area_data = [
   education: 6,
   policy: 2,
   more_omniscient: 2,
-  fully_omniscient: 0,
   more_information_asymmetrical: 0,
+  fully_omniscient: 0,
 },
 {
   name: '2023',
   collaboration: 25,
   embodied: 25,
   prompting_and_in_context_learning: 55,
   more_than_three_agents: 21,
-  rule_based: 69,
+  rule_based: 68,
   not_applicable: 81,
   text: 53,
   implicit_objectives: 18,
@@ -413,7 +413,7 @@ export const area_data = [
   pretraining: 7,
   agent_teams: 10,
   agents_with_personas: 8,
-  model_based: 17,
+  model_based: 18,
   fully_omniscient: 1,
   health: 7,
   policy: 2,

diff --git a/components/papers.tsx b/components/papers.tsx
@@ -2246,10 +2246,10 @@ export const data: Paper[] = [
     date: "07/2020",
     environments: "text",
     agents: "n/a",
-    evaluation: "rule_based",
+    evaluation: "model_based",
     other: "human_agent",
     url: "https://aclanthology.org/2020.acl-srw.27",
-    bibtex: "@inproceedings{tsuta-etal-2020-ubleu,\n    title = \"u{BLEU}: Uncertainty-Aware Automatic Evaluation Method for Open-Domain Dialogue Systems\",\n    author = \"Tsuta, Yuma  and\n      Yoshinaga, Naoki  and\n      Toyoda, Masashi\",\n    editor = \"Rijhwani, Shruti  and\n      Liu, Jiangming  and\n      Wang, Yizhong  and\n      Dror, Rotem\",\n    booktitle = \"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop\",\n    month = jul,\n    year = \"2020\",\n    address = \"Online\",\n    publisher = \"Association for Computational Linguistics\",\n    url = \"https://aclanthology.org/2020.acl-srw.27\",\n    doi = \"10.18653/v1/2020.acl-srw.27\",\n    pages = \"199--206\",\n    environments = {text},\n    agents = {n/a},\n    evaluation = {rule_based},\n    other = {human_agent}\n}",
+    bibtex: "@inproceedings{tsuta-etal-2020-ubleu,\n    title = \"u{BLEU}: Uncertainty-Aware Automatic Evaluation Method for Open-Domain Dialogue Systems\",\n    author = \"Tsuta, Yuma  and\n      Yoshinaga, Naoki  and\n      Toyoda, Masashi\",\n    editor = \"Rijhwani, Shruti  and\n      Liu, Jiangming  and\n      Wang, Yizhong  and\n      Dror, Rotem\",\n    booktitle = \"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop\",\n    month = jul,\n    year = \"2020\",\n    address = \"Online\",\n    publisher = \"Association for Computational Linguistics\",\n    url = \"https://aclanthology.org/2020.acl-srw.27\",\n    doi = \"10.18653/v1/2020.acl-srw.27\",\n    pages = \"199--206\",\n    environments = {text},\n    agents = {n/a},\n    evaluation = {model_based},\n    other = {human_agent}\n}",
     authors: "Tsuta et al.",
     subsection: "evaluation/language",
 },
@@ -2428,10 +2428,10 @@ export const data: Paper[] = [
     date: "08/2023",
     environments: "collaboration, text",
     agents: "prompting_and_in_context_learning, more_than_three_agents",
-    evaluation: "rule_based",
+    evaluation: "model_based",
     other: "n/a",
     url: "https://arxiv.org/abs/2308.07201",
-    bibtex: "@misc{chan2023chateval,\n      title={ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate}, \n      author={Chi-Min Chan and Weize Chen and Yusheng Su and Jianxuan Yu and Wei Xue and Shanghang Zhang and Jie Fu and Zhiyuan Liu},\n      year={2023},\n      month={8},\n      eprint={2308.07201},\n      archivePrefix={arXiv},\n      primaryClass={cs.CL},\n      url={https://arxiv.org/abs/2308.07201},\n      environments = {collaboration, text},\n      agents = {prompting_and_in_context_learning, more_than_three_agents},\n      evaluation = {rule_based},\n      other = {n/a}\n}",
+    bibtex: "@misc{chan2023chateval,\n      title={ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate}, \n      author={Chi-Min Chan and Weize Chen and Yusheng Su and Jianxuan Yu and Wei Xue and Shanghang Zhang and Jie Fu and Zhiyuan Liu},\n      year={2023},\n      month={8},\n      eprint={2308.07201},\n      archivePrefix={arXiv},\n      primaryClass={cs.CL},\n      url={https://arxiv.org/abs/2308.07201},\n      environments = {collaboration, text},\n      agents = {prompting_and_in_context_learning, more_than_three_agents},\n      evaluation = {model_based},\n      other = {n/a}\n}",
     authors: "Chan et al.",
     subsection: "evaluation/language",
 },

diff --git a/docs/paper_table.md b/docs/paper_table.md
@@ -182,7 +182,7 @@
 | [AgentCF: Collaborative Learning with Autonomous Language Agents for Recommender Systems](https://arxiv.org/abs/2310.09233)                                                                                                                                                                                                                                                                  | 10, 2023 | ['mixed_objectives', 'text']                                                                    | ['prompting_and_in_context_learning', 'more_than_three_agents', 'agents_with_memory', 'agents_with_personas']     | ['rule_based']                         | ['simulated_humans']                                                |
 | [Approximating Online Human Evaluation of Social Chatbots with Prompting](https://aclanthology.org/2023.sigdial-1.25)                                                                                                                                                                                                                                                                        | 9, 2023  | ['mixed_objectives', 'text']                                                                    | ['prompting_and_in_context_learning', 'two_agents']                                                               | ['model_based']                        | ['n/a']                                                             |
 | [CharacterChat: Learning towards Conversational AI with Personalized Social Support](https://arxiv.org/abs/2308.10278)                                                                                                                                                                                                                                                                       | 08, 2023 | ['implicit_objectives', 'text']                                                                 | ['prompting_and_in_context_learning', 'two_agents', 'agents_with_memory', 'agents_with_personas']                 | ['model_based', 'human']               | ['simulated_humans']                                                |
-| [ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate](https://arxiv.org/abs/2308.07201)                                                                                                                                                                                                                                                                                 | 08, 2023 | ['collaboration', 'text']                                                                       | ['prompting_and_in_context_learning', 'more_than_three_agents']                                                   | ['rule_based']                         | ['n/a']                                                             |
+| [ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate](https://arxiv.org/abs/2308.07201)                                                                                                                                                                                                                                                                                 | 08, 2023 | ['collaboration', 'text']                                                                       | ['prompting_and_in_context_learning', 'more_than_three_agents']                                                   | ['model_based']                        | ['n/a']                                                             |
 | [Don{'}t Forget Your {ABC}{'}s: Evaluating the State-of-the-Art in Chat-Oriented Dialogue Systems](https://aclanthology.org/2023.acl-long.839)                                                                                                                                                                                                                                               | 7, 2023  | ['text']                                                                                        | ['n/a']                                                                                                           | ['human']                              | ['human_agent']                                                     |
 | [PersonaLLM: Investigating the Ability of Large Language Models to Express Personality Traits](https://api.semanticscholar.org/CorpusID:268032940)                                                                                                                                                                                                                                           | 5, 2023  | ['text']                                                                                        | ['prompting_and_in_context_learning']                                                                             | ['human', 'model_based']               | ['n/a']                                                             |
 | [Psychological Metrics for Dialog System Evaluation](https://arxiv.org/abs/2305.14757)                                                                                                                                                                                                                                                                                                       | 05, 2023 | ['text']                                                                                        | ['two_agents']                                                                                                    | ['human', 'rule_based']                | ['human_agent']                                                     |
@@ -194,7 +194,7 @@
 | [Survey on evaluation methods for dialogue systems](https://link.springer.com/article/10.1007/s10462-020-09866-x)                                                                                                                                                                                                                                                                            | 1, 2021  | ['text']                                                                                        | ['n/a']                                                                                                           | ['rule_based', 'model_based']          | ['human_agent']                                                     |
 | [{GRADE}: Automatic Graph-Enhanced Coherence Metric for Evaluating Open-Domain Dialogue Systems](https://aclanthology.org/2020.emnlp-main.742)                                                                                                                                                                                                                                               | 11, 2020 | ['text']                                                                                        | ['n/a']                                                                                                           | ['human', 'model_based']               | ['n/a']                                                             |
 | [Towards Unified Dialogue System Evaluation: A Comprehensive Analysis of Current Evaluation Protocols](https://aclanthology.org/2020.sigdial-1.29)                                                                                                                                                                                                                                           | 7, 2020  | ['text']                                                                                        | ['n/a']                                                                                                           | ['human', 'rule_based', 'model_based'] | ['human_agent']                                                     |
-| [u{BLEU}: Uncertainty-Aware Automatic Evaluation Method for Open-Domain Dialogue Systems](https://aclanthology.org/2020.acl-srw.27)                                                                                                                                                                                                                                                          | 7, 2020  | ['text']                                                                                        | ['n/a']                                                                                                           | ['rule_based']                         | ['human_agent']                                                     |
+| [u{BLEU}: Uncertainty-Aware Automatic Evaluation Method for Open-Domain Dialogue Systems](https://aclanthology.org/2020.acl-srw.27)                                                                                                                                                                                                                                                          | 7, 2020  | ['text']                                                                                        | ['n/a']                                                                                                           | ['model_based']                        | ['human_agent']                                                     |
 | [{C}onvo{K}it: A Toolkit for the Analysis of Conversations](https://aclanthology.org/2020.sigdial-1.8)                                                                                                                                                                                                                                                                                       | 7, 2020  | ['text']                                                                                        | ['n/a']                                                                                                           | ['human', 'model_based', 'rule_based'] | ['n/a']                                                             |
 | [Unsupervised Evaluation of Interactive Dialog with {D}ialo{GPT}](https://aclanthology.org/2020.sigdial-1.28)                                                                                                                                                                                                                                                                                | 7, 2020  | ['text']                                                                                        | ['n/a']                                                                                                           | ['human', 'model_based']               | ['n/a']                                                             |
 | [Embodied LLM Agents Learn to Cooperate in Organized Teams](https://arxiv.org/abs/2403.12482)                                                                                                                                                                                                                                                                                                | 03, 2024 | ['collaboration', 'embodied']                                                                   | ['prompting_and_in_context_learning', 'more_than_three_agents']                                                   | ['model_based', 'human']               | ['education']                                                       |