Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds Papiamento support #2499

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -58,7 +58,9 @@ public class LanguageIdentifier {

// ast and gl often prevent the correct detection of Spanish (as the are quite similar
// to Spanish, I assume) so we disable them for now. See LanguageDetectionEval.java:
private static final List<String> ignoreLangCodes = Arrays.asList("ast", "gl");
//pap is currently not supported by language-detector and and we still don't have a profile to offer,
//so, we add pap here to disable it for now.
private static final List<String> ignoreLangCodes = Arrays.asList("ast", "gl", "pap");

// languages that we offer profiles for as they are not yet supported by language-detector:
private static final List<String> externalLangCodes = Arrays.asList("eo");
Expand Down
Expand Up @@ -382,6 +382,12 @@ pt-MZ = Portuguese (Mozambique preAO)

pt-PT = Portuguese (Portugal posAO)

pap = Papiamento

pap-CW = Papiamento (Phonetic)

pap-AW = Papiamento (Etymological)

guiWarning = Warning

guiDuplicate = Duplicate rule file!
Expand Down
5 changes: 5 additions & 0 deletions languagetool-language-modules/all/pom.xml
Expand Up @@ -107,6 +107,11 @@
<artifactId>language-pt</artifactId>
<version>${languagetool.version}</version>
</dependency>
<dependency>
<groupId>org.languagetool</groupId>
<artifactId>language-pap</artifactId>
<version>${languagetool.version}</version>
</dependency>
<dependency>
<groupId>org.languagetool</groupId>
<artifactId>language-ru</artifactId>
Expand Down
100 changes: 100 additions & 0 deletions languagetool-language-modules/pap/pom.xml
@@ -0,0 +1,100 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>org.languagetool</groupId>
<artifactId>languagetool-parent</artifactId>
<version>4.9-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

<artifactId>language-pap</artifactId>
<url>http://www.languagetool.org</url>
<name>Papiamento module for LanguageTool</name>

<licenses>
<license>
<name>GNU Lesser General Public License</name>
<url>http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt</url>
<distribution>repo</distribution>
<comments>The license refers to the source code, resources may be under different licenses</comments>
</license>
</licenses>

<packaging>jar</packaging>

<developers>
<developer>
<name>Manuel Ortega</name>
<roles><role>Maintainer</role></roles>
</developer>
</developers>

<build>
<resources>
<resource>
<directory>src/main/resources</directory>
<excludes>
<exclude>org/languagetool/resource/en/partlycountable.txt</exclude>
<exclude>org/languagetool/resource/en/uncountable.txt</exclude>
<exclude>org/languagetool/resource/en/filter*.txt</exclude>
<exclude>org/languagetool/resource/en/*.awk</exclude>
<exclude>org/languagetool/resource/en/numbers.txt</exclude>
</excludes>
</resource>
</resources>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
</plugin>
</plugins>
</build>

<dependencies>
<dependency>
<groupId>org.languagetool</groupId>
<artifactId>languagetool-core</artifactId>
<version>${languagetool.version}</version>
</dependency>

<dependency>
<groupId>org.apache.opennlp</groupId>
<artifactId>opennlp-tools</artifactId>
<version>1.9.1</version>
</dependency>
<dependency>
<groupId>edu.washington.cs.knowitall</groupId>
<artifactId>opennlp-tokenize-models</artifactId>
<version>1.5</version>
</dependency>
<dependency>
<groupId>edu.washington.cs.knowitall</groupId>
<artifactId>opennlp-postag-models</artifactId>
<version>1.5</version>
</dependency>
<dependency>
<groupId>edu.washington.cs.knowitall</groupId>
<artifactId>opennlp-chunk-models</artifactId>
<version>1.5</version>
</dependency>

<dependency>
<!-- see http://stackoverflow.com/questions/174560/sharing-test-code-in-maven#174670 -->
<groupId>org.languagetool</groupId>
<artifactId>languagetool-core</artifactId>
<version>${languagetool.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
</dependencies>

</project>
@@ -0,0 +1,54 @@
/* LanguageTool, a natural language style checker
* Copyright (C) 2012 Marcin Miłkowski (http://www.languagetool.org)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/

package org.languagetool.language;

import org.jetbrains.annotations.Nullable;
import org.languagetool.Language;
import org.languagetool.UserConfig;
import org.languagetool.languagemodel.LanguageModel;
import org.languagetool.rules.Rule;
import org.languagetool.language.rules.pap.MorfologikEtymologicalPapiamentoSpellerRule;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.ResourceBundle;

public class EtymologicalPapiamento extends Papiamento {

@Override
public String[] getCountries() {
return new String[]{"AW"};
}

@Override
public String getName() {
return "Papiamento (Etymological)";
}

@Override
public List<Rule> getRelevantRules(ResourceBundle messages, UserConfig userConfig, Language motherTongue, List<Language> altLanguages) throws IOException {
List<Rule> rules = new ArrayList<>();
rules.addAll(super.getRelevantRules(messages, userConfig, motherTongue, altLanguages));
rules.add(new MorfologikEtymologicalPapiamentoSpellerRule(messages, this, userConfig, altLanguages));
return rules;
}

}
@@ -0,0 +1,120 @@
/* LanguageTool, a natural language style checker
* Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool.language;

import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.languagetool.Language;
import org.languagetool.LanguageMaintainedState;
import org.languagetool.UserConfig;
import org.languagetool.languagemodel.LanguageModel;
//import org.languagetool.language.rules.ast.MorfologikAsturianSpellerRule;
import org.languagetool.rules.*;
//import org.languagetool.tagging.Tagger;
//import org.languagetool.tagging.ast.AsturianTagger;
//import org.languagetool.tokenizers.SRXSentenceTokenizer;
//import org.languagetool.tokenizers.SentenceTokenizer;


import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;

/**
* Support for English - use the sub classes {@link BritishEnglish}, {@link AmericanEnglish},
* etc. if you need spell checking.
* Make sure to call {@link #close()} after using this (currently only relevant if you make
* use of {@link EnglishConfusionProbabilityRule}).
*/
public class Papiamento extends Language {

private static final Language PHONETIC_PAPIAMENTO = new PhoneticPapiamento();

/**
* @deprecated use {@link AmericanEnglish} or {@link BritishEnglish} etc. instead -
* they have rules for spell checking, this class doesn't (deprecated since 3.2)
*/
@Deprecated
public Papiamento() {
}

@Override
public Language getDefaultLanguageVariant() {
return PHONETIC_PAPIAMENTO;
}

@Override
public String getName() {
return "Papiamento";
}

@Override
public String getShortCode() {
return "pap";
}

@Override
public String[] getCountries() {
return new String[]{};
}

@Override
public Contributor[] getMaintainers() {
return new Contributor[] { new Contributor("Manuel Ortega") };
}

@Override
public LanguageMaintainedState getMaintainedState() {
return LanguageMaintainedState.ActivelyMaintained;
}

@Override
public List<Rule> getRelevantRules(ResourceBundle messages, UserConfig userConfig, Language motherTongue, List<Language> altLanguages) throws IOException {
return Arrays.asList(
new CommaWhitespaceRule(messages),
new DoublePunctuationRule(messages),
new GenericUnpairedBracketsRule(messages),
new UppercaseSentenceStartRule(messages, this),
new MultipleWhitespaceRule(messages, this)
);
}
/*
@Override
public SentenceTokenizer getSentenceTokenizer() {
if (sentenceTokenizer == null) {
sentenceTokenizer = new SRXSentenceTokenizer(this);
}
return sentenceTokenizer;
}

@Override
public Tagger getTagger() {
if (tagger == null) {
tagger = new AsturianTagger();
}
return tagger;
}
*/
}
@@ -0,0 +1,54 @@
/* LanguageTool, a natural language style checker
* Copyright (C) 2012 Marcin Miłkowski (http://www.languagetool.org)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/

package org.languagetool.language;

import org.jetbrains.annotations.Nullable;
import org.languagetool.Language;
import org.languagetool.UserConfig;
import org.languagetool.languagemodel.LanguageModel;
import org.languagetool.rules.Rule;
import org.languagetool.language.rules.pap.MorfologikPhoneticPapiamentoSpellerRule;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.ResourceBundle;

public class PhoneticPapiamento extends Papiamento {

@Override
public String[] getCountries() {
return new String[]{"CW", "BES"};
}

@Override
public String getName() {
return "Papiamento (Phonetic)";
}

@Override
public List<Rule> getRelevantRules(ResourceBundle messages, UserConfig userConfig, Language motherTongue, List<Language> altLanguages) throws IOException {
List<Rule> rules = new ArrayList<>();
rules.addAll(super.getRelevantRules(messages, userConfig, motherTongue, altLanguages));
rules.add(new MorfologikPhoneticPapiamentoSpellerRule(messages, this, userConfig, altLanguages));
return rules;
}

}