diff --git a/languagetool-dev/pom.xml b/languagetool-dev/pom.xml
index 8be7e3385d77..ee55d46e92e4 100644
--- a/languagetool-dev/pom.xml
+++ b/languagetool-dev/pom.xml
@@ -72,6 +72,17 @@
encog-core3.2.0
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+
+ 2.4.0
+
+
+ org.apache.hadoop
+ hadoop-common
+ 2.4.0
+ junit
diff --git a/languagetool-dev/src/main/java/org/languagetool/dev/hadoop/NGramAggregator.java b/languagetool-dev/src/main/java/org/languagetool/dev/hadoop/NGramAggregator.java
new file mode 100644
index 000000000000..60d587148f8e
--- /dev/null
+++ b/languagetool-dev/src/main/java/org/languagetool/dev/hadoop/NGramAggregator.java
@@ -0,0 +1,89 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2015 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package org.languagetool.dev.hadoop;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+
+import java.io.IOException;
+
+/**
+ * Aggregate Google ngram data (http://storage.googleapis.com/books/ngrams/books/datasetsv2.html)
+ * in Hadoop. Based on the Hadoop word counter example.
+ */
+public final class NGramAggregator {
+
+ private static final int MIN_YEAR = 1910;
+
+ private NGramAggregator() {
+ }
+
+ public static class TokenizerMapper extends Mapper