In [46]:
import itertools
import re
import requests
import yaml

sys.path.insert(0, "..")
from create_xml import rule_to_xml
from helpers import open_

In [47]:
topics = [
    "ablist",
    "condescending",
    "gender",
    "lgbtq",
    "press",
    "race",
    "slogans",
    "suicide",
]
responses = {}
for topic in topics:
    responses[topic] = requests.get(
        "https://raw.githubusercontent.com/retextjs/retext-equality/main/data/en/{}.yml".format(
            topic
        )
    ).content.decode("UTF-8")

In [48]:
rules = {}
for topic in topics:
    # data = yaml.safe_load(responses[topic])
    # dump = yaml.dump(data, encoding="utf-8")
    open("retext_equality_raw_{}.yaml".format(topic), "w").write(responses[topic])
    pass

What we could do now is to look for simple rules where it's only about replacing single words, and translate the words automatically. 

Instead, what we have done so far is to create some LanguageTool XML rules from scratch for the topics touched in the data, also for more complicated ones. This is lots of manual effort but results in good quality.

One aspect that we can automate is the replacement of words like "schizophrenic" with phrases like "person with schizophrenia. For this purpose, we collect a list of words for ill people, also including very inconsiderate words, and a list of considerate words for the illnesses. The list below mainly includes the illnesses mentioned in the `retext-equality` disability list. The list could probably be much much longer. Some words for ill people and illnesses are rather rare and not yet in the POS tagger dictionary, so we add them there, see `added.txt`. A better approach in the mid term would be to use a neural tagger, though.

In [49]:
illness_replacements = [
    ["Schizophrener", ["Schizophrenie"]],
    ["Schizophreniker", ["Schizophrenie"]],
    ["Mongoloide", ["Trisomie 21"]],
    ["Mongo", ["Trisomie 21"]],
    ["Psychotiker", ["Psychose"]],
    ["Alkoholiker", ["Alkoholproblem"]],
    ["Dyslexiker", ["Dyslexie"]],
    ["Epileptiker", ["Epilepsie"]],
    ["Paraplegiker", ["Paraplegie"]],
    ["Quadriplegiker", ["Quadriplegie"]],
    ["Spastiker", ["zerebrale Lähmung", "Zerebralparese"]],
    ["Spast", ["zerebrale Lähmung", "Zerebralparese"]],
    ["Wasserkopf", ["Hydrocephalus"]],
    ["Narkoleptiker", ["Narkolepsie"]],
]


def short_message(pattern):
    return "Beziehe dich in erster Linie auf den Menschen, nicht die Krankheit, Behinderung, Sucht etc."


message = "TODO"

rules = []
for ill_person, illnesses in illness_replacements:
    for number in ["sg", "pl"]:
        rules.append(
            rule_to_xml(
                ill_person,
                "sg",
                ["Person mit {}".format(illness) for illness in illnesses],
                message=message,
                short_message=short_message,
            )
        )
    for number in ["both", "unknown"]:
        rules.append(
            rule_to_xml(
                ill_person,
                "both",
                list(
                    itertools.chain(
                        *[
                            [
                                "Person mit {}".format(illness),
                                "Menschen mit {}".format(illness),
                            ]
                            for illness in illnesses
                        ]
                    )
                ),
                message=message,
                short_message=short_message,
            )
        )
print(rules[0])
print(rules[2])


<rule id="Schizophrener_sg" name="Schizophrener">
    <antipattern><token>Person</token><token>mit</token><token>Schizophrenie</token></antipattern>
    <pattern><token inflected="yes" postag=".*:SIN:.*" postag_regexp="yes"><exception postag=".*:PLU:.*" postag_regexp="yes" />Schizophrener</token></pattern>
    <suggestion>Person mit Schizophrenie</suggestion>
    <message>TODO</message>
    <short>Beziehe dich in erster Linie auf den Menschen, nicht die Krankheit, Behinderung, Sucht etc.</short>
    <example correction="Person mit Schizophrenie"><marker>Schizophrener</marker></example>
</rule>


<rule id="Schizophrener_both" name="Schizophrener">
    <antipattern><token>Person</token><token>mit</token><token>Schizophrenie</token></antipattern>
		<antipattern><token>Menschen</token><token>mit</token><token>Schizophrenie</token></antipattern>
    <pattern><and>
	<token inflected="yes" postag=".*:SIN:.*" postag_regexp="yes">Schizophrener</token>
	<token inflected="yes" postag=".*:PLU:.*"

In [50]:
open_("illness_rules.xml", "w").write("\n".join(rules))

44627

Some rules, like "Mongoloidinnen", do not work.