From a883822eb33887ad2b97c2d93860b16010c63101 Mon Sep 17 00:00:00 2001 From: Daniel Naber Date: Sun, 5 Feb 2017 22:05:40 +0100 Subject: [PATCH] add a cache for better performance in the server use case, where sentences often get checked more than once (e.g. due to corrections of other sentences, which cause a re-check of the whole text) --- languagetool-core/pom.xml | 5 + .../java/org/languagetool/InputSentence.java | 72 ++++++++++++ .../java/org/languagetool/JLanguageTool.java | 44 +++++-- .../main/java/org/languagetool/Language.java | 15 +++ .../MultiThreadedJLanguageTool.java | 21 +++- .../java/org/languagetool/ResultCache.java | 71 ++++++++++++ .../org/languagetool/InputSentenceTest.java | 48 ++++++++ .../org/languagetool/server/TextChecker.java | 15 ++- .../server/HTTPServerMultiLangLoadTest.java | 9 +- .../server/HTTPServerMultiLangLoadTest2.java | 107 ++++++++++++++++++ .../languagetool/server/HTTPServerTest.java | 5 +- languagetool-standalone/CHANGES.md | 4 + .../org/languagetool/JLanguageToolTest.java | 22 +++- .../java/org/languagetool/LanguageTest.java | 7 ++ .../rules/patterns/PerformanceTest.java | 10 +- 15 files changed, 429 insertions(+), 26 deletions(-) create mode 100644 languagetool-core/src/main/java/org/languagetool/InputSentence.java create mode 100644 languagetool-core/src/main/java/org/languagetool/ResultCache.java create mode 100644 languagetool-core/src/test/java/org/languagetool/InputSentenceTest.java create mode 100644 languagetool-server/src/test/java/org/languagetool/server/HTTPServerMultiLangLoadTest2.java diff --git a/languagetool-core/pom.xml b/languagetool-core/pom.xml index a31e40f8cce5..f13742fc2f15 100644 --- a/languagetool-core/pom.xml +++ b/languagetool-core/pom.xml @@ -91,6 +91,11 @@ commons-lang3 3.5 + + com.google.guava + guava + 21.0 + net.java.dev.jna jna diff --git a/languagetool-core/src/main/java/org/languagetool/InputSentence.java b/languagetool-core/src/main/java/org/languagetool/InputSentence.java new file mode 100644 index 000000000000..e17aa628073d --- /dev/null +++ b/languagetool-core/src/main/java/org/languagetool/InputSentence.java @@ -0,0 +1,72 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2017 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool; + +import org.languagetool.rules.CategoryId; + +import java.util.Objects; +import java.util.Set; + +/** + * For internal use only. Used as a key for caching check results. + * @since 3.7 + */ +public class InputSentence { + + private final String text; + private final Language lang; + private final Language motherTongue; + private final Set disabledRules; + private final Set disabledRuleCategories; + private final Set enabledRules; + private final Set enabledRuleCategories; + + public InputSentence(String text, Language lang, Language motherTongue, + Set disabledRules, Set disabledRuleCategories, + Set enabledRules, Set enabledRuleCategories) { + this.text = Objects.requireNonNull(text); + this.lang = Objects.requireNonNull(lang); + this.motherTongue = motherTongue; + this.disabledRules = disabledRules; + this.disabledRuleCategories = disabledRuleCategories; + this.enabledRules = enabledRules; + this.enabledRuleCategories = enabledRuleCategories; + } + + @Override + public boolean equals(Object o) { + if (o == null) return false; + if (o == this) return true; + if (o.getClass() != getClass()) return false; + InputSentence other = (InputSentence) o; + return Objects.equals(text, other.text) && + Objects.equals(lang, other.lang) && + Objects.equals(motherTongue, other.motherTongue) && + Objects.equals(disabledRules, other.disabledRules) && + Objects.equals(disabledRuleCategories, other.disabledRuleCategories) && + Objects.equals(enabledRules, other.enabledRules) && + Objects.equals(enabledRuleCategories, other.enabledRuleCategories); + } + + @Override + public int hashCode() { + return Objects.hash(text, lang, motherTongue, disabledRules, disabledRuleCategories, enabledRules, enabledRuleCategories); + } + +} diff --git a/languagetool-core/src/main/java/org/languagetool/JLanguageTool.java b/languagetool-core/src/main/java/org/languagetool/JLanguageTool.java index 3f1e34abfde6..4e576c775886 100644 --- a/languagetool-core/src/main/java/org/languagetool/JLanguageTool.java +++ b/languagetool-core/src/main/java/org/languagetool/JLanguageTool.java @@ -81,6 +81,8 @@ public class JLanguageTool { /** Name of the message bundle for translations. */ public static final String MESSAGE_BUNDLE = "org.languagetool.MessagesBundle"; + private final ResultCache cache; + /** * Returns the build date or {@code null} if not run from JAR. */ @@ -140,7 +142,19 @@ public enum ParagraphHandling { } private static final List temporaryFiles = new ArrayList<>(); - + + /** + * Create a JLanguageTool and setup the built-in rules for the + * given language and false friend rules for the text language / mother tongue pair. + * + * @param lang the language of the text to be checked + * @param motherTongue the user's mother tongue, used for false friend rules, or null. + * The mother tongue may also be used as a source language for checking bilingual texts. + */ + public JLanguageTool(Language lang, Language motherTongue) { + this(lang, motherTongue, null); + } + /** * Create a JLanguageTool and setup the built-in Java rules for the * given language. @@ -148,7 +162,7 @@ public enum ParagraphHandling { * @param language the language of the text to be checked */ public JLanguageTool(Language language) { - this(language, null); + this(language, null, null); } /** @@ -158,8 +172,12 @@ public JLanguageTool(Language language) { * @param language the language of the text to be checked * @param motherTongue the user's mother tongue, used for false friend rules, or null. * The mother tongue may also be used as a source language for checking bilingual texts. + * @param cache a cache to speed up checking if the same sentences get checked more than once, + * e.g. when LT is running as a server and texts are re-checked due to changes + * @since 3.7 */ - public JLanguageTool(Language language, Language motherTongue) { + @Experimental + public JLanguageTool(Language language, Language motherTongue, ResultCache cache) { this.language = Objects.requireNonNull(language, "language cannot be null"); this.motherTongue = motherTongue; ResourceBundle messages = ResourceBundleTools.getMessageBundle(language); @@ -171,6 +189,7 @@ public JLanguageTool(Language language, Language motherTongue) { } catch (Exception e) { throw new RuntimeException("Could not activate rules", e); } + this.cache = cache; } /** @@ -935,10 +954,21 @@ public List call() throws Exception { for (AnalyzedSentence analyzedSentence : analyzedSentences) { String sentence = sentences.get(i++); try { - List sentenceMatches = - checkAnalyzedSentence(paraMode, rules, charCount, lineCount, - columnCount, sentence, analyzedSentence, annotatedText); - + List sentenceMatches = null; + InputSentence cacheKey = null; + if (cache != null) { + cacheKey = new InputSentence(analyzedSentence.getText(), language, motherTongue, + disabledRules, disabledRuleCategories, + enabledRules, enabledRuleCategories); + sentenceMatches = cache.getIfPresent(cacheKey); + } + if (sentenceMatches == null) { + sentenceMatches = checkAnalyzedSentence(paraMode, rules, charCount, lineCount, + columnCount, sentence, analyzedSentence, annotatedText); + } + if (cache != null) { + cache.put(cacheKey, sentenceMatches); + } ruleMatches.addAll(sentenceMatches); charCount += sentence.length(); lineCount += countLineBreaks(sentence); diff --git a/languagetool-core/src/main/java/org/languagetool/Language.java b/languagetool-core/src/main/java/org/languagetool/Language.java index 9530f11b3ffc..95a975583b43 100644 --- a/languagetool-core/src/main/java/org/languagetool/Language.java +++ b/languagetool-core/src/main/java/org/languagetool/Language.java @@ -463,4 +463,19 @@ public int getPriorityForId(String id) { return 0; } + /** + * Considers languages as equal if their language code, including the country and variant codes are equal. + */ + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Language other = (Language) o; + return Objects.equals(getShortCodeWithCountryAndVariant(), other.getShortCodeWithCountryAndVariant()); + } + + @Override + public int hashCode() { + return getShortCodeWithCountryAndVariant().hashCode(); + } } diff --git a/languagetool-core/src/main/java/org/languagetool/MultiThreadedJLanguageTool.java b/languagetool-core/src/main/java/org/languagetool/MultiThreadedJLanguageTool.java index 3c2a74371429..820a5ecf6293 100644 --- a/languagetool-core/src/main/java/org/languagetool/MultiThreadedJLanguageTool.java +++ b/languagetool-core/src/main/java/org/languagetool/MultiThreadedJLanguageTool.java @@ -73,7 +73,26 @@ public MultiThreadedJLanguageTool(Language language, Language motherTongue) { * @since 2.9 */ public MultiThreadedJLanguageTool(Language language, Language motherTongue, int threadPoolSize) { - super(language, motherTongue); + this(language, motherTongue, threadPoolSize, null); + } + + /** + * @see #shutdown() + * @since 3.7 + */ + @Experimental + public MultiThreadedJLanguageTool(Language language, Language motherTongue, ResultCache cache) { + this(language, motherTongue, getDefaultThreadCount(), cache); + } + + /** + * @see #shutdown() + * @param threadPoolSize the number of concurrent threads + * @since 3.7 + */ + @Experimental + public MultiThreadedJLanguageTool(Language language, Language motherTongue, int threadPoolSize, ResultCache cache) { + super(language, motherTongue, cache); if (threadPoolSize < 1) { throw new IllegalArgumentException("threadPoolSize must be >= 1: " + threadPoolSize); } diff --git a/languagetool-core/src/main/java/org/languagetool/ResultCache.java b/languagetool-core/src/main/java/org/languagetool/ResultCache.java new file mode 100644 index 000000000000..70d216a9117b --- /dev/null +++ b/languagetool-core/src/main/java/org/languagetool/ResultCache.java @@ -0,0 +1,71 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2017 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheStats; +import org.languagetool.rules.RuleMatch; + +import java.util.List; +import java.util.concurrent.TimeUnit; + +/** + * A cache to speed up text checking for use cases where sentences are checked more than once. This + * typically happens when using LT as a server and texts get re-checked after corrections have been applied + * for some sentences. Use the same cache object for all {@link JLanguageTool} objects only if + * the JLanguageTool objects all use the same rules. For example, if you call {@code JLanguageTool.addRule()} + * in different ways for the different instances that you use the same cache for, the cache will return invalid results. + * It is okay however, to use same same cache for {@link JLanguageTool} objects with different languages, as + * cached results are not used for a different language. + * @since 3.7 + */ +@Experimental +public class ResultCache { + + private final Cache> cache; + + /** + * Create a cache that expires items 5 minutes after the latest read access. + * @param maxSize maximum cache size in number of sentences + */ + public ResultCache(long maxSize) { + cache = CacheBuilder.newBuilder().maximumSize(maxSize).recordStats().expireAfterAccess(5, TimeUnit.MINUTES).build(); + } + + /** + * @param maxSize maximum cache size in number of sentences + * @param expireAfter time to expire sentences from the cache after last read access + */ + public ResultCache(long maxSize, int expireAfter, TimeUnit timeUnit) { + cache = CacheBuilder.newBuilder().maximumSize(maxSize).recordStats().expireAfterAccess(expireAfter, timeUnit).build(); + } + + public CacheStats stats() { + return cache.stats(); + } + + public List getIfPresent(InputSentence key) { + return cache.getIfPresent(key); + } + + public void put(InputSentence key, List sentenceMatches) { + cache.put(key, sentenceMatches); + } +} diff --git a/languagetool-core/src/test/java/org/languagetool/InputSentenceTest.java b/languagetool-core/src/test/java/org/languagetool/InputSentenceTest.java new file mode 100644 index 000000000000..8360b7c7f901 --- /dev/null +++ b/languagetool-core/src/test/java/org/languagetool/InputSentenceTest.java @@ -0,0 +1,48 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2017 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool; + +import org.junit.Test; +import org.languagetool.rules.CategoryId; + +import java.util.Arrays; +import java.util.HashSet; + +import static org.junit.Assert.*; + +public class InputSentenceTest { + + @Test + public void test() { + Language lang = Languages.getLanguageForShortCode("xx-XX"); + InputSentence inputSentence1a = new InputSentence("foo", lang, lang, + new HashSet<>(Arrays.asList("ID1")), new HashSet<>(Arrays.asList(new CategoryId("C1"))), + new HashSet<>(Arrays.asList("ID2")), new HashSet<>(Arrays.asList(new CategoryId("C2")))); + InputSentence inputSentence1b = new InputSentence("foo", lang, lang, + new HashSet<>(Arrays.asList("ID1")), new HashSet<>(Arrays.asList(new CategoryId("C1"))), + new HashSet<>(Arrays.asList("ID2")), new HashSet<>(Arrays.asList(new CategoryId("C2")))); + assertEquals(inputSentence1a, inputSentence1b); + InputSentence inputSentence2 = new InputSentence("foo", lang, null, + new HashSet<>(Arrays.asList("ID1")), new HashSet<>(Arrays.asList(new CategoryId("C1"))), + new HashSet<>(Arrays.asList("ID2")), new HashSet<>(Arrays.asList(new CategoryId("C2")))); + assertNotEquals(inputSentence1a, inputSentence2); + assertNotEquals(inputSentence1b, inputSentence2); + } + +} \ No newline at end of file diff --git a/languagetool-server/src/main/java/org/languagetool/server/TextChecker.java b/languagetool-server/src/main/java/org/languagetool/server/TextChecker.java index 9aabcfd011fc..713dbe97329f 100644 --- a/languagetool-server/src/main/java/org/languagetool/server/TextChecker.java +++ b/languagetool-server/src/main/java/org/languagetool/server/TextChecker.java @@ -20,9 +20,7 @@ import com.sun.net.httpserver.HttpExchange; import org.jetbrains.annotations.NotNull; -import org.languagetool.JLanguageTool; -import org.languagetool.Language; -import org.languagetool.Languages; +import org.languagetool.*; import org.languagetool.gui.Configuration; import org.languagetool.language.LanguageIdentifier; import org.languagetool.rules.CategoryId; @@ -58,16 +56,19 @@ abstract class TextChecker { protected final HTTPServerConfig config; private static final String ENCODING = "UTF-8"; + private static final int CACHE_STATS_PRINT = 500; // print cache stats every n cache requests private final boolean internalServer; private final LanguageIdentifier identifier; private final ExecutorService executorService; + private final ResultCache cache; TextChecker(HTTPServerConfig config, boolean internalServer) { this.config = config; this.internalServer = internalServer; this.identifier = new LanguageIdentifier(); this.executorService = Executors.newCachedThreadPool(); + this.cache = new ResultCache(1000); } void shutdownNow() { @@ -171,6 +172,12 @@ protected void checkParams(Map parameters) { private List getRuleMatches(String text, Map parameters, Language lang, Language motherTongue, QueryParams params) throws Exception { String sourceText = parameters.get("srctext"); + long cacheRequests = cache.stats().requestCount(); + if (cacheRequests > 0 && cacheRequests % CACHE_STATS_PRINT == 0) { + double hitRate = cache.stats().hitRate(); + String hitPercentage = String.format(Locale.ENGLISH, "%.2f", hitRate * 100.0f); + print("Cache stats: " + hitPercentage + "% hit rate, " + cache.stats()); + } if (sourceText == null) { JLanguageTool lt = getLanguageToolInstance(lang, motherTongue, params); return lt.check(text); @@ -242,7 +249,7 @@ Language detectLanguageOfString(String text, String fallbackLanguage, List langCodeToText = new HashMap<>(); - private final Random random = new Random(1234); - private final AtomicInteger counter = new AtomicInteger(); + protected final Map langCodeToText = new HashMap<>(); + protected final Random random = new Random(1234); + protected final AtomicInteger counter = new AtomicInteger(); @Test @Override @@ -99,7 +100,7 @@ void runTestsV2() throws IOException, SAXException, ParserConfigurationException + ", Length: " + textSubstring.length() + ", Time: " + (System.currentTimeMillis()-startTime) + "ms"); } - private Language getRandomLanguage() { + protected Language getRandomLanguage() { int randomNumber = random.nextInt(langCodeToText.size()); int i = 0; for (Language lang : langCodeToText.keySet()) { diff --git a/languagetool-server/src/test/java/org/languagetool/server/HTTPServerMultiLangLoadTest2.java b/languagetool-server/src/test/java/org/languagetool/server/HTTPServerMultiLangLoadTest2.java new file mode 100644 index 000000000000..d969cd2f8041 --- /dev/null +++ b/languagetool-server/src/test/java/org/languagetool/server/HTTPServerMultiLangLoadTest2.java @@ -0,0 +1,107 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2014 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool.server; + +import org.junit.Ignore; +import org.junit.Test; +import org.languagetool.Language; +import org.languagetool.Languages; +import org.languagetool.language.German; +import org.languagetool.tools.StringTools; +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.Assert.fail; + +/** + * Test HTTP server access from multiple threads with multiple languages. + * Unlike HTTPServerMultiLangLoadTest, this always sends the same text + * but actually checks results (compares multi-thread results to non-multi-thread). + */ +@Ignore("for interactive use; requires local Tatoeba data") +public class HTTPServerMultiLangLoadTest2 extends HTTPServerMultiLangLoadTest { + + private static final String DATA_PATH = "/media/Data/tatoeba/"; + private static final int MIN_TEXT_LENGTH = 500; + private static final int MAX_TEXT_LENGTH = 1_000; + private static final int MAX_SLEEP_MILLIS = 10; + + private final Map textToResult = new HashMap<>(); + + @Test + @Override + public void testHTTPServer() throws Exception { + File dir = new File(DATA_PATH); + List languages = new ArrayList<>(); + //languages.add(new German()); + languages.addAll(Languages.get()); + for (Language language : languages) { + File file = new File(dir, "tatoeba-" + language.getShortCode() + ".txt"); + if (!file.exists()) { + System.err.println("No data found for " + language + ", language will not be tested"); + } else { + String content = StringTools.readerToString(new FileReader(file)); + int fromPos = random.nextInt(content.length()); + int toPos = fromPos + random.nextInt(MAX_TEXT_LENGTH) + MIN_TEXT_LENGTH; + String textSubstring = content.substring(fromPos, Math.min(toPos, content.length())); + langCodeToText.put(language, textSubstring); + String response = checkByPOST(language, textSubstring); + textToResult.put(language, response); + System.err.println("Using " + content.length() + " bytes of data for " + language); + } + } + if (langCodeToText.size() == 0) { + throw new RuntimeException("No input data found in " + dir); + } + System.out.println("Testing " + langCodeToText.keySet().size() + " languages and variants"); + //super.testHTTPServer(); // start server in this JVM + super.doTest(); // assume server has been started manually in its own JVM + } + + @Override + void runTestsV2() throws IOException, SAXException, ParserConfigurationException { + Language language = getRandomLanguage(); + String text = langCodeToText.get(language); + long sleepTime = random.nextInt(MAX_SLEEP_MILLIS); + try { + Thread.sleep(sleepTime); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + long startTime = System.currentTimeMillis(); + counter.incrementAndGet(); + String realResult = checkByPOST(language, text); + String expectedResult = textToResult.get(language); + if (!realResult.equals(expectedResult)) { + fail("Real result != expected result for " + language + ", input: " + text + "\n" + + "Real result: " + realResult + "\n" + + "Exp. result: " + expectedResult + ); + } + System.out.println(counter.get() + ". Sleep: " + sleepTime + "ms, Lang: " + language.getShortCodeWithCountryAndVariant() + + ", Length: " + text.length() + ", Time: " + (System.currentTimeMillis()-startTime) + "ms"); + } + +} diff --git a/languagetool-server/src/test/java/org/languagetool/server/HTTPServerTest.java b/languagetool-server/src/test/java/org/languagetool/server/HTTPServerTest.java index b70cbe6744ec..5a46f83faaff 100644 --- a/languagetool-server/src/test/java/org/languagetool/server/HTTPServerTest.java +++ b/languagetool-server/src/test/java/org/languagetool/server/HTTPServerTest.java @@ -323,7 +323,7 @@ private String checkWithOptionsV2(Language lang, Language motherTongue, String t * Same as {@link #checkV1(Language, String)} but using HTTP POST method instead of GET */ protected String checkByPOST(Language lang, String text) throws IOException { - String postData = "language=" + lang.getShortCode() + "&text=" + URLEncoder.encode(text, "UTF-8"); // latin1 is not enough for languages like Polish, Romanian, etc + String postData = "language=" + lang.getShortCodeWithCountryAndVariant() + "&text=" + URLEncoder.encode(text, "UTF-8"); // latin1 is not enough for languages like Polish, Romanian, etc URL url = new URL("http://localhost:" + HTTPTools.getDefaultPort() + "/v2/check"); try { return HTTPTools.checkAtUrlByPost(url, postData); @@ -333,7 +333,8 @@ protected String checkByPOST(Language lang, String text) throws IOException { System.err.println("Got expected error on long text (" + text.length() + " chars): " + e.getMessage()); return ""; } else { - System.err.println("Got error from server (" + lang.getShortCodeWithCountryAndVariant() + ", " + text.length() + " chars): " + e.getMessage()); + System.err.println("Got error from " + url + " (" + lang.getShortCodeWithCountryAndVariant() + ", " + + text.length() + " chars): " + e.getMessage() + ", text was: '" + text + "'"); return ""; } } diff --git a/languagetool-standalone/CHANGES.md b/languagetool-standalone/CHANGES.md index d58bf0a0418e..9e86e0376a61 100644 --- a/languagetool-standalone/CHANGES.md +++ b/languagetool-standalone/CHANGES.md @@ -40,9 +40,13 @@ the next version, unless users complain and present a valid use case. * The old XML-based API has been removed. The migration to the new JSON-based API is documented at https://languagetool.org/http-api/migration.php + * Speed up with a cache for cases where the same sentences get checked + again (e.g. due to a correction in a text that doesn't affect all sentences + but causes the whole text to be re-checked) #### Java API * Some deprecated methods have been removed. + * A new class `ResultCache` has been added to speed up the LT server #### Internal * OpenNLP has been updated from 1.6.0 to 1.7.1 (only used for English) diff --git a/languagetool-standalone/src/test/java/org/languagetool/JLanguageToolTest.java b/languagetool-standalone/src/test/java/org/languagetool/JLanguageToolTest.java index b163ed87f0b8..315054098cc8 100644 --- a/languagetool-standalone/src/test/java/org/languagetool/JLanguageToolTest.java +++ b/languagetool-standalone/src/test/java/org/languagetool/JLanguageToolTest.java @@ -19,10 +19,7 @@ package org.languagetool; import org.junit.Test; -import org.languagetool.language.AmericanEnglish; -import org.languagetool.language.Demo; -import org.languagetool.language.English; -import org.languagetool.language.German; +import org.languagetool.language.*; import org.languagetool.markup.AnnotatedText; import org.languagetool.markup.AnnotatedTextBuilder; import org.languagetool.rules.CategoryId; @@ -196,4 +193,21 @@ public void testStrangeInput() throws IOException { assertThat(matches.size(), is(0)); } + @Test + public void testCache() throws IOException { + ResultCache cache = new ResultCache(1000); + JLanguageTool ltEnglish = new JLanguageTool(english, null, cache); + assertThat(ltEnglish.check("This is an test").size(), is(1)); + assertThat(cache.stats().hitCount(), is(0L)); + assertThat(ltEnglish.check("This is an test").size(), is(1)); + assertThat(cache.stats().hitCount(), is(1L)); + + JLanguageTool ltGerman = new JLanguageTool(new GermanyGerman(), null, cache); + assertTrue(ltGerman.check("This is an test").size() >= 3); + assertThat(cache.stats().hitCount(), is(1L)); + + assertThat(ltEnglish.check("This is an test").size(), is(1)); + assertThat(cache.stats().hitCount(), is(2L)); + } + } diff --git a/languagetool-standalone/src/test/java/org/languagetool/LanguageTest.java b/languagetool-standalone/src/test/java/org/languagetool/LanguageTest.java index 798e6dc0162b..b2cd4711abc2 100644 --- a/languagetool-standalone/src/test/java/org/languagetool/LanguageTest.java +++ b/languagetool-standalone/src/test/java/org/languagetool/LanguageTest.java @@ -50,6 +50,13 @@ public void testGetShortNameWithVariant() { assertEquals("de", new German().getShortCodeWithCountryAndVariant()); } + @Test + public void testEquals() { + assertEquals(new GermanyGerman(), new GermanyGerman()); + assertNotEquals(new AustrianGerman(), new GermanyGerman()); + assertNotEquals(new AustrianGerman(), new German()); + } + @Test public void testEqualsConsiderVariantIfSpecified() { // every language equals itself: diff --git a/languagetool-standalone/src/test/java/org/languagetool/rules/patterns/PerformanceTest.java b/languagetool-standalone/src/test/java/org/languagetool/rules/patterns/PerformanceTest.java index e77e3605582a..7e1a487c19ce 100644 --- a/languagetool-standalone/src/test/java/org/languagetool/rules/patterns/PerformanceTest.java +++ b/languagetool-standalone/src/test/java/org/languagetool/rules/patterns/PerformanceTest.java @@ -18,14 +18,13 @@ */ package org.languagetool.rules.patterns; -import org.languagetool.JLanguageTool; -import org.languagetool.Languages; -import org.languagetool.MultiThreadedJLanguageTool; +import org.languagetool.*; import org.languagetool.tools.StringTools; import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.util.concurrent.TimeUnit; /** * Check performance per sentence. Not a unit test, for interactive use only. @@ -65,8 +64,11 @@ public static void main(String[] args) throws IOException { PerformanceTest test = new PerformanceTest(); String languageCode = args[0]; File textFile = new File(args[1]); - //JLanguageTool langTool = new JLanguageTool(Languages.getLanguageForShortName(languageCode)); + //ResultCache cache = new ResultCache(1000, 5, TimeUnit.MINUTES); + //JLanguageTool langTool = new JLanguageTool(Languages.getLanguageForShortCode(languageCode)); + //JLanguageTool langTool = new JLanguageTool(Languages.getLanguageForShortCode(languageCode), null, cache); MultiThreadedJLanguageTool langTool = new MultiThreadedJLanguageTool(Languages.getLanguageForShortCode(languageCode)); + //MultiThreadedJLanguageTool langTool = new MultiThreadedJLanguageTool(Languages.getLanguageForShortCode(languageCode), null, cache); test.run(langTool, textFile); langTool.shutdown(); }