@inproceedings{kuribayashi-etal-2020-language,
title = "Language Models as an Alternative Evaluator of Word Order Hypotheses: A Case Study in {J}apanese",
author = "Kuribayashi, Tatsuki and
Ito, Takumi and
Suzuki, Jun and
Inui, Kentaro",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/2020.acl-main.47",
doi = "10.18653/v1/2020.acl-main.47",
pages = "488--504",
abstract = "We examine a methodology using neural language models (LMs) for analyzing the word order of language. This LM-based method has the potential to overcome the difficulties existing methods face, such as the propagation of preprocessor errors in count-based methods. In this study, we explore whether the LM-based method is valid for analyzing the word order. As a case study, this study focuses on Japanese due to its complex and flexible word order. To validate the LM-based method, we test (i) parallels between LMs and human word order preference, and (ii) consistency of the results obtained using the LM-based method with previous linguistic studies. Through our experiments, we tentatively conclude that LMs display sufficient word order knowledge for usage as an analysis tool. Finally, using the LM-based method, we demonstrate the relationship between the canonical word order and topicalization, which had yet to be analyzed by large-scale experiments.",
}
- conference paper: https://www.aclweb.org/anthology/2020.acl-main.47/
The data used in the experiment 1 is released in this repository. For copyright issues, data used for the experiment 2 is not released. One can download the language models used for our experiments from the release tag.