diff --git a/.gitignore b/.gitignore
index c100436f..24a10fc7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
doc/_build*
target
+build
.DS_Store
.project
.settings
@@ -12,21 +13,13 @@ target
*.obj
*.zip
data/maps/customisations.obj
-data/db/db-en
-data/db/db-fr
-data/db/db-de
-data/db/db-it
-data/db/db-es
-data/db/db-kb
+data/db/db-*
data/db/domains-en.db
data/embeddings
-data/embeddings/en
-data/embeddings/fr
-data/embeddings/de
-data/embeddings/es
-data/embeddings/it
+data/embeddings/*
data/corpus/corpus-long/aida/RawText
data/corpus/corpus-long/aida-train/RawText
data/corpus/corpus-long/aida-testa/RawText
data/corpus/corpus-long/aida-testb/RawText
-data/species/*
\ No newline at end of file
+data/species/*
+.gradle
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 00000000..ca2977d1
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,16 @@
+language: java
+dist: trusty
+sudo: false
+
+jdk:
+ - oraclejdk8
+
+addons:
+ apt:
+ packages:
+ - oracle-java8-installer
+
+
+script: ./gradlew clean build -x test
+#after_success:
+# - mvn clean cobertura:cobertura org.eluder.coveralls:coveralls-maven-plugin:report
\ No newline at end of file
diff --git a/build.gradle b/build.gradle
new file mode 100644
index 00000000..d64eef4e
--- /dev/null
+++ b/build.gradle
@@ -0,0 +1,263 @@
+buildscript {
+ repositories {
+ mavenLocal()
+ mavenCentral()
+ jcenter()
+ }
+ dependencies {
+ classpath group: 'net.researchgate', name: 'gradle-release', version: '2.6.0'
+ classpath 'org.kt3k.gradle.plugin:coveralls-gradle-plugin:2.4.0'
+ classpath 'com.jfrog.bintray.gradle:gradle-bintray-plugin:1.7.3'
+ classpath 'com.github.jengelman.gradle.plugins:shadow:2.0.1'
+ }
+}
+
+
+apply plugin: 'maven'
+apply plugin: 'com.jfrog.bintray'
+apply plugin: 'com.github.johnrengelman.shadow'
+apply plugin: 'java'
+apply plugin: 'war'
+apply from: 'https://raw.github.com/gretty-gradle-plugin/gretty/master/pluginScripts/gretty.plugin'
+
+group = "com.scienceminer.nerd"
+
+description = """entity recognition and disambiguation against Wikidata and Wikipedia in a raw text,
+partially-annotated text segment or PDF"""
+
+sourceCompatibility = 1.8
+targetCompatibility = 1.8
+
+tasks.withType(JavaCompile) {
+ options.encoding = 'UTF-8'
+}
+
+repositories {
+ mavenLocal()
+ mavenCentral()
+ jcenter()
+ maven { url "https://dl.bintray.com/rookies/maven" }
+}
+
+dependencies {
+ //Tests
+ testCompile 'junit:junit:4.12'
+ testCompile 'org.easymock:easymock:3.4'
+ testCompile 'org.hamcrest:hamcrest-all:1.3'
+ testCompile group: 'org.easymock', name: 'easymock', version: '3.4'
+ testCompile group: 'com.googlecode.json-simple', name: 'json-simple', version: '1.1.1'
+
+ //Logging
+ compile 'org.slf4j:slf4j-log4j12:1.7.25'
+ compile group: 'log4j', name: 'log4j', version: '1.2.17'
+
+ //GROBID
+ compile group: 'org.grobid', name: 'grobid-ner', version: '0.5.1'
+ compile 'org.grobid:grobid-trainer:0.5.2'
+ compile(group: 'org.grobid', name: 'grobid-core', version: '0.5.2') {
+ exclude(module: 'slf4j-jdk14')
+ }
+ compile group: 'directory-naming', name: 'naming-java', version: '0.8'
+ compile group: 'fr.limsi.wapiti', name: 'wapiti', version: '1.5.0'
+ compile group: 'org.wipo.analysers', name: 'wipo-analysers', version: '0.0.1'
+
+ //Apache commons
+ compile 'org.apache.commons:commons-collections4:4.1'
+ compile 'org.apache.commons:commons-lang3:3.6'
+ compile 'commons-logging:commons-logging:1.2'
+ compile 'commons-io:commons-io:2.5'
+ compile 'commons-pool:commons-pool:1.6'
+ compile group: 'org.apache.httpcomponents', name: 'httpclient', version: '4.5.3'
+ compile group: 'org.apache.httpcomponents', name: 'httpmime', version: '4.5.3'
+ compile group: 'org.apache.commons', name: 'commons-text', version: '1.1'
+ compile group: 'com.google.guava', name: 'guava', version: '25.1-jre'
+
+ //Json
+ compile 'com.fasterxml.jackson.core:jackson-core:2.9.5'
+ compile 'com.fasterxml.jackson.core:jackson-annotations:2.9.5'
+ compile 'com.fasterxml.jackson.core:jackson-databind:2.9.5'
+ compile group: 'com.fasterxml.jackson.dataformat', name: 'jackson-dataformat-yaml', version: '2.9.5'
+ compile group: 'net.arnx', name: 'jsonic', version: '1.3.10'
+
+ //Web interface
+ compile group: 'com.sun.jersey', name: 'jersey-client', version: '1.8'
+ compile group: 'com.sun.jersey', name: 'jersey-server', version: '1.8'
+ compile group: 'com.sun.jersey.contribs', name: 'jersey-multipart', version: '1.8'
+ compile 'javax.ws.rs:javax.ws.rs-api:2.1'
+ compile 'javax.servlet:javax.servlet-api:3.1.0'
+
+ //Misc
+ compile group: 'com.googlecode.clearnlp', name: 'clearnlp', version: '1.3.1'
+ compile group: 'com.google.guava', name: 'guava', version: '21.0'
+ compile group: 'directory-naming', name: 'naming-java', version: '0.8'
+ compile group: 'org.jvnet', name: 'mimepull', version: '1.6'
+ compile group: 'net.arnx', name: 'jsonic', version: '1.3.10'
+
+ //Specialised libraries
+ compile group: 'com.cybozu', name: 'language-detection', version: '09-13-2011'
+ compile group: 'com.github.haifengl', name: 'smile-core', version: '1.3.1'
+ compile group: 'com.googlecode.clearnlp', name: 'clearnlp', version: '1.3.1'
+ compile(group: 'it.unimi.dsi', name: 'sux4j', version: '3.1.2') {
+ exclude(module: 'log4j-over-slf4j')
+ exclude(module: 'logback-classic')
+ }
+ compile group: 'it.unimi.dsi', name: 'fastutil', version: '6.5.12'
+ compile(group: 'it.unimi.dsi', name: 'dsiutils', version: '2.1.9') {
+ exclude(module: 'logback-classic')
+ }
+ compile group: 'de.ruedigermoeller', name: 'fst', version: '2.50'
+
+ //Wikipedia
+ compile group: 'org.sweble.wikitext', name: 'swc-parser-lazy', version: '3.1.5'
+ compile group: 'org.sweble.wikitext', name: 'swc-engine', version: '3.1.5'
+
+ //XML
+ compile group: 'com.thoughtworks.xstream', name: 'xstream', version: '1.4.10'
+
+ //LMDB
+ compile group: 'org.deephacks.lmdbjni', name: 'lmdbjni', version: '0.4.6'
+ compile group: 'org.deephacks.lmdbjni', name: 'lmdbjni-linux64', version: '0.4.6'
+ compile group: 'org.deephacks.lmdbjni', name: 'lmdbjni-osx64', version: '0.4.6'
+ compile group: 'org.deephacks.lmdbjni', name: 'lmdbjni-win64', version: '0.4.6'
+
+ //Hadoop
+ compile group: 'org.apache.hadoop', name: 'hadoop-core', version: '1.2.1'
+ compile group: 'org.apache.avro', name: 'avro', version: '1.7.5'
+
+}
+
+configurations {
+ compile.exclude group: "org.slf4j", module: "slf4j-jdk14"
+}
+
+configurations.all {
+ resolutionStrategy {
+ force 'xml-apis:xml-apis:1.4.01'
+ }
+}
+
+test {
+ exclude '**/**IntegrationTest**'
+}
+
+//integration (type: Test){
+// include '**'
+//}
+
+// Gretty configuration
+
+gretty {
+ httpPort = 8090
+ contextPath = '/'
+ servletContainer = "jetty9.4"
+ webInfIncludeJarPattern = ''
+ reloadOnClassChange = false
+}
+
+// return the default value if the property has not been specified in command line
+ext.getArg = { propName, defaultVal ->
+ return project.hasProperty(propName) ? project.getProperty(propName) : defaultVal;
+}
+
+war {
+ classifier = 'war'
+// dependsOn mainWar
+ enabled true
+}
+
+
+artifacts {
+ archives jar
+ archives war
+}
+
+//TODO: we could create a task to download and unpack the lmdb files automatically
+//task copyModels(type: Copy) {
+// from "${rootDir}/resources/models"
+// include "**/*.wapiti"
+// into "${rootDir}/../grobid-home/models/"
+//}
+
+//tasks.withType(JavaCompile) {
+// options.compilerArgs << "-Xlint:deprecation"
+// options.compilerArgs << "-Xlint:unchecked"
+//}
+
+
+wrapper {
+ gradleVersion "4.6"
+}
+
+// Evaluation
+
+// Run like: ./gradlew evaluation -Pcorpus=[corpusname]
+task(evaluation, dependsOn: 'classes', type: JavaExec, group: 'evaluation') {
+ main = 'com.scienceminer.nerd.evaluation.NEDCorpusEvaluation'
+ classpath = sourceSets.main.runtimeClasspath
+ args getArg('corpus', null)
+ jvmArgs '-Xms2g', '-Xmx8g'
+}
+
+
+// Run like: ./gradlew evaluationDataGeneration -Pcorpus=[corpusname]
+task(evaluationDataGeneration, dependsOn: 'classes', type: JavaExec, group: 'training') {
+ main = 'com.scienceminer.nerd.evaluation.AnnotatedDataGeneration'
+ classpath = sourceSets.main.runtimeClasspath
+ args getArg('corpus', null)
+ jvmArgs '-Xms2g', '-Xmx8g'
+}
+
+// Training
+// Run like: ./gradlew train_annotate_en
+task(train_annotate_en, dependsOn: 'classes', type: JavaExec, group: 'training') {
+ main = 'com.scienceminer.nerd.training.WikipediaTrainer'
+ classpath = sourceSets.main.runtimeClasspath
+ args 'data/wikipedia/training/', 'en'
+ jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g'
+}
+
+// Run like: ./gradlew train_annotate_de
+task(train_annotate_de, dependsOn: 'classes', type: JavaExec, group: 'training') {
+ main = 'com.scienceminer.nerd.training.WikipediaTrainer'
+ classpath = sourceSets.main.runtimeClasspath
+ args 'data/wikipedia/training/', 'de'
+ jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g'
+}
+
+// Run like: ./gradlew train_annotate_fr
+task(train_annotate_fr, dependsOn: 'classes', type: JavaExec, group: 'training') {
+ main = 'com.scienceminer.nerd.training.WikipediaTrainer'
+ classpath = sourceSets.main.runtimeClasspath
+ args 'data/wikipedia/training/', 'fr'
+ jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g'
+}
+
+// Run like: ./gradlew train_annotate_it
+task(train_annotate_it, dependsOn: 'classes', type: JavaExec, group: 'training') {
+ main = 'com.scienceminer.nerd.training.WikipediaTrainer'
+ classpath = sourceSets.main.runtimeClasspath
+ args 'data/wikipedia/training/', 'it'
+ jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g'
+}
+
+// Run like: ./gradlew train_annotate_es
+task(train_annotate_es, dependsOn: 'classes', type: JavaExec, group: 'training') {
+ main = 'com.scienceminer.nerd.training.WikipediaTrainer'
+ classpath = sourceSets.main.runtimeClasspath
+ args 'data/wikipedia/training/', 'es'
+ jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g'
+}
+
+// Embeddings (obsolete)
+// Run like: ./gradlew generate_entity_description
+task(generate_entity_description, dependsOn: 'classes', type: JavaExec, group: 'embeddings') {
+ main = 'com.scienceminer.nerd.embeddings.EntityDescription'
+ classpath = sourceSets.main.runtimeClasspath
+ args 'data/wikipedia/embeddings/en/', 'en'
+ jvmArgs '-Xms2g', '-Xmx8g'
+}
+
+
+
+
+
diff --git a/doc/build.rst b/doc/build.rst
index 4d46a023..036606cb 100644
--- a/doc/build.rst
+++ b/doc/build.rst
@@ -60,14 +60,14 @@ Then install the compiled indexed data:
#. Build the project, under the *entity-fishing* project repository.
::
- $ mvn clean install
+ $ ./gradlew clean build
Some tests will be executed. If all tests are successful, you should be now ready to run the service.
#. Run the service:
::
- $ mvn clean jetty:run
+ $ ./gradlew clean appRun
The test console is available at port ``:8090`` by opening in your browser (preferably *Firefox* or *Chrome*, *Internet Explorer* has not been tested): http://localhost:8090
diff --git a/doc/evaluation.rst b/doc/evaluation.rst
index 3c0cecfd..33b28839 100644
--- a/doc/evaluation.rst
+++ b/doc/evaluation.rst
@@ -37,11 +37,11 @@ Evaluation commands
Use the following maven command with the above dataset identifier for running an evaluation:
::
- $ mvn compile exec:java -Dexec.mainClass=com.scienceminer.nerd.evaluation.NEDCorpusEvaluation -Dexec.args="aquaint"
+ $ ./gradlew evaluation -Pcorpus=[dataset]
For instance for evaluating against the testb subset of the AIDA-CONLL, use:
::
- $ mvn compile exec:java -Dexec.mainClass=com.scienceminer.nerd.evaluation.NEDCorpusEvaluation -Dexec.args="aida-testb"
+ $ ./gradlew evaluation -Pcorpus=aida-testb
The evaluation process will provide standard metrics (accuracy, precision, recall. f1) for micro- and macro-averages for the entity disambiguation algorithm selected as ranker and for priors (as baseline).
@@ -60,7 +60,7 @@ If there is a directory called ``pdf`` or ``PDF``, the process will extract info
Use the following maven command with the above dataset identifier for generating the annotation xml file:
::
- $ mvn compile exec:java -Dexec.mainClass=com.scienceminer.nerd.evaluation.AnnotatedDataGeneration -Dexec.args="toto"
+ $ ./gradlew evaluationDataGeneration -Pcorpus=[corpusname]
References
**********
diff --git a/doc/train.rst b/doc/train.rst
index b8366f3a..eafb7fdd 100644
--- a/doc/train.rst
+++ b/doc/train.rst
@@ -17,10 +17,10 @@ The following command will build the two models used in *entity-fishing*, the ``
For other languages, replace the ending language code (``en``) by the desired one (``fr``, ``de``, ``it`` and ``es`` are supported), e.g.:
::
- $ mvn compile exec:exec -Ptrain_annotate_de
- $ mvn compile exec:exec -Ptrain_annotate_fr
- $ mvn compile exec:exec -Ptrain_annotate_es
- $ mvn compile exec:exec -Ptrain_annotate_it
+ $ ./gradlew train_annotate_de
+ $ ./gradlew train_annotate_fr
+ $ ./gradlew train_annotate_es
+ $ ./gradlew train_annotate_it
Models will be saved under ``data/models``. ``ARFF`` training data files used to build the model are saved under ``data/wikipedia/training/``.
diff --git a/gradle.properties b/gradle.properties
new file mode 100644
index 00000000..8e0d48df
--- /dev/null
+++ b/gradle.properties
@@ -0,0 +1 @@
+version = 0.0.4
\ No newline at end of file
diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 00000000..f6b961fd
Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 00000000..2db4af13
--- /dev/null
+++ b/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Fri Nov 16 08:49:17 CET 2018
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-4.6-all.zip
diff --git a/gradlew b/gradlew
new file mode 100755
index 00000000..cccdd3d5
--- /dev/null
+++ b/gradlew
@@ -0,0 +1,172 @@
+#!/usr/bin/env sh
+
+##############################################################################
+##
+## Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+ ls=`ls -ld "$PRG"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG=`dirname "$PRG"`"/$link"
+ fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS=""
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+ echo "$*"
+}
+
+die () {
+ echo
+ echo "$*"
+ echo
+ exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+ CYGWIN* )
+ cygwin=true
+ ;;
+ Darwin* )
+ darwin=true
+ ;;
+ MINGW* )
+ msys=true
+ ;;
+ NONSTOP* )
+ nonstop=true
+ ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD="$JAVA_HOME/jre/sh/java"
+ else
+ JAVACMD="$JAVA_HOME/bin/java"
+ fi
+ if [ ! -x "$JAVACMD" ] ; then
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+ fi
+else
+ JAVACMD="java"
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+ MAX_FD_LIMIT=`ulimit -H -n`
+ if [ $? -eq 0 ] ; then
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+ MAX_FD="$MAX_FD_LIMIT"
+ fi
+ ulimit -n $MAX_FD
+ if [ $? -ne 0 ] ; then
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
+ fi
+ else
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+ fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin ; then
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+ JAVACMD=`cygpath --unix "$JAVACMD"`
+
+ # We build the pattern for arguments to be converted via cygpath
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+ SEP=""
+ for dir in $ROOTDIRSRAW ; do
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
+ SEP="|"
+ done
+ OURCYGPATTERN="(^($ROOTDIRS))"
+ # Add a user-defined pattern to the cygpath arguments
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+ fi
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
+ i=0
+ for arg in "$@" ; do
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
+
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+ else
+ eval `echo args$i`="\"$arg\""
+ fi
+ i=$((i+1))
+ done
+ case $i in
+ (0) set -- ;;
+ (1) set -- "$args0" ;;
+ (2) set -- "$args0" "$args1" ;;
+ (3) set -- "$args0" "$args1" "$args2" ;;
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+ esac
+fi
+
+# Escape application args
+save () {
+ for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+ echo " "
+}
+APP_ARGS=$(save "$@")
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
+if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
+ cd "$(dirname "$0")"
+fi
+
+exec "$JAVACMD" "$@"
diff --git a/gradlew.bat b/gradlew.bat
new file mode 100644
index 00000000..f9553162
--- /dev/null
+++ b/gradlew.bat
@@ -0,0 +1,84 @@
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS=
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto init
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto init
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:init
+@rem Get command-line arguments, handling Windows variants
+
+if not "%OS%" == "Windows_NT" goto win9xME_args
+
+:win9xME_args
+@rem Slurp the command line arguments.
+set CMD_LINE_ARGS=
+set _SKIP=2
+
+:win9xME_args_slurp
+if "x%~1" == "x" goto execute
+
+set CMD_LINE_ARGS=%*
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/pom.xml b/pom.xml
deleted file mode 100644
index c8787ff2..00000000
--- a/pom.xml
+++ /dev/null
@@ -1,859 +0,0 @@
-
-
-
- 4.0.0
-
- com.scienceminer.nerd
- nerd
- war
- 0.0.4
-
- entity-fishing
- Entity Recognition and Disambiguation
-
-
- UTF-8
- 1.7.5
- 0.4.6
- 2.9.5
-
-
-
-
- Apache License, Version 2.0
- http://www.apache.org/licenses/LICENSE-2.0
-
-
-
-
-
- Patrice Lopez
-
- Developer
-
-
-
- Luca Foppiano
-
- Developer
-
-
-
-
-
-
-
-
- src/main/resources
- true
-
- **/service.properties
-
-
-
-
-
- org.apache.maven.plugins
- maven-jar-plugin
- 3.0.2
-
-
-
- true
- true
- true
-
-
-
-
-
- make-a-jar
- compile
-
- jar
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-surefire-plugin
- 2.20
-
- -Xms256m -Xmx3072m -Dfile.encoding=UTF-8 -Djdk.xml.totalEntitySizeLimit=2147480000
-
-
- **/*IntegrationTest.java
-
-
- log4j.test.xml
-
-
-
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
- 3.6.1
-
-
- 1.8
- UTF-8
-
-
-
-
- org.apache.maven.plugins
- maven-war-plugin
- 3.1.0
-
-
-
- src/main/webapp/WEB-INF
-
-
-
- WEB-INF
-
-
-
- doc
- doc
-
-
- lib
- WEB-INF/lib
-
-
-
-
-
-
- org.eclipse.jetty
- jetty-maven-plugin
- 9.4.8.v20171121
-
-
- -Xmx4g -Dfile.encoding=UTF-8 -Djdk.xml.totalEntitySizeLimit=2147480000
- 0
-
-
-
-
-
- com.sun.management.jmxremote
-
-
-
- jetty.port
- 8090
-
-
- maxIdleTime
- 60000
-
-
- log4j.configuration
- log4j-jetty.xml
-
-
-
-
-
-
-
-
-
-
-
- train_annotate_en
-
-
- train_annotate_en
-
-
-
-
-
- org.codehaus.mojo
- exec-maven-plugin
-
- java
- compile
-
- -XX:+HeapDumpOnOutOfMemoryError
- -Djdk.xml.totalEntitySizeLimit=2147480000
- -classpath
-
- -Xms2g
- -Xmx10g
- com.scienceminer.nerd.training.WikipediaTrainer
- data/wikipedia/training/
- en
-
-
-
-
-
-
-
-
-
- train_annotate_de
-
-
- train_annotate_de
-
-
-
-
-
- org.codehaus.mojo
- exec-maven-plugin
-
- java
- compile
-
- -XX:+HeapDumpOnOutOfMemoryError
- -Djdk.xml.totalEntitySizeLimit=2147480000
- -classpath
-
- -Xms2g
- -Xmx10g
- com.scienceminer.nerd.training.WikipediaTrainer
- data/wikipedia/training/
- de
-
-
-
-
-
-
-
-
-
- train_annotate_fr
-
-
- train_annotate_fr
-
-
-
-
-
- org.codehaus.mojo
- exec-maven-plugin
-
- java
- compile
-
- -XX:+HeapDumpOnOutOfMemoryError
- -Djdk.xml.totalEntitySizeLimit=2147480000
- -classpath
-
- -Xms2g
- -Xmx10g
- com.scienceminer.nerd.training.WikipediaTrainer
- data/wikipedia/training/
- fr
-
-
-
-
-
-
-
-
-
- train_annotate_it
-
-
- train_annotate_it
-
-
-
-
-
- org.codehaus.mojo
- exec-maven-plugin
-
- java
- compile
-
- -XX:+HeapDumpOnOutOfMemoryError
- -Djdk.xml.totalEntitySizeLimit=2147480000
-
- -classpath
-
- -Xms2g
- -Xmx8g
- com.scienceminer.nerd.training.WikipediaTrainer
- data/wikipedia/training/
- it
-
-
-
-
-
-
-
-
-
- train_annotate_es
-
-
- train_annotate_es
-
-
-
-
-
- org.codehaus.mojo
- exec-maven-plugin
-
- java
- compile
-
- -XX:+HeapDumpOnOutOfMemoryError
- -Djdk.xml.totalEntitySizeLimit=2147480000
- -classpath
-
- -Xms2g
- -Xmx8g
- com.scienceminer.nerd.training.WikipediaTrainer
- data/wikipedia/training/
- es
-
-
-
-
-
-
-
-
-
- generate_entity_description
-
-
- generate_entity_description
-
-
-
-
-
- org.codehaus.mojo
- exec-maven-plugin
-
- java
- compile
-
- -classpath
-
- -Xms2g
- -Xmx8g
- com.scienceminer.nerd.embeddings.EntityDescription
- data/wikipedia/embeddings/en/
- en
-
-
-
-
-
-
-
-
- tomcat
-
-
- tomcat
-
- false
-
-
-
-
- org.apache.maven.plugins
- maven-resources-plugin
- 3.0.2
-
-
- copy-resources-source
- package
-
- copy-resources
-
-
- ${basedir}/target/classes
-
-
- ${basedir}/src/main/resources
-
- jetty.xml
- log4j-jetty.xml
- log4j.xml
- log4j.test.xml
-
-
- log4j-tomcat.xml
- *.properties
-
- true
-
-
-
-
-
- copy-resources-test
- package
-
- testResources
-
-
- ${basedir}/target/test-classes
-
-
- ${basedir}/src/test/resources
-
- query.json
-
-
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-war-plugin
- 3.1.0
-
- **/**jetty*.xml, **/log4j.xml
-
-
- ${basedir}/src/main/webapp/WEB-INF/
-
- web.xml
-
- true
- WEB-INF
-
-
-
-
-
- **/db-de/**
- **/db-en/**
- **/db-kb/**
- **/db-fr/**
- **/db-it/**
- **/db-es/**
-
- data
- data
-
-
- doc
- doc
-
-
-
-
-
- org.apache.maven.plugins
- maven-assembly-plugin
- 2.6
-
-
-
- ${project.artifactId}_${project.version}-data
-
- src/main/assembly/resources.xml
-
-
-
-
-
-
-
-
-
-
-
- 3rd-party-local-repo
- This is a local repository included in the project, to access 3rd party libs.
- file:///${basedir}/lib/
- default
-
-
- maven2-repository.dev.java.net
- Java.net Repository for Maven
- http://download.java.net/maven/2/
- default
-
-
- sonatype-oss-public
- https://oss.sonatype.org/content/groups/public/
-
- true
-
-
- true
-
-
-
-
- false
-
- bintray-rookies-maven
- bintray
- https://dl.bintray.com/rookies/maven
-
-
-
-
-
-
- org.apache.maven
- maven-model
- 3.5.3
-
-
-
-
- commons-io
- commons-io
- 2.5
-
-
- org.apache.httpcomponents
- httpclient
- 4.5.3
-
-
- org.apache.httpcomponents
- httpmime
- 4.5.3
-
-
- org.apache.commons
- commons-lang3
- 3.6
-
-
- org.apache.commons
- commons-collections4
- 4.1
-
-
- org.apache.commons
- commons-text
- 1.1
-
-
-
-
- org.slf4j
- slf4j-log4j12
- 1.7.25
-
-
- log4j
- log4j
- 1.2.17
-
-
-
-
- junit
- junit
- 4.12
- test
-
-
- org.hamcrest
- hamcrest-all
- 1.3
- test
-
-
-
-
- com.sun.jersey
- jersey-client
- 1.8
- compile
-
-
-
- com.sun.jersey
- jersey-server
- 1.8
- compile
-
-
-
- com.sun.jersey.contribs
- jersey-multipart
- 1.8
- compile
-
-
-
-
- com.cybozu
- language-detection
- 09-13-2011
-
-
-
-
- com.github.haifengl
- smile-core
- 1.3.1
-
-
-
-
- it.unimi.dsi
- sux4j
- 3.1.2
-
-
- org.slf4j
- log4j-over-slf4j
-
-
- ch.qos.logback
- logback-classic
-
-
-
-
- it.unimi.dsi
- fastutil
- 6.5.12
-
-
- it.unimi.dsi
- dsiutils
- 2.1.9
-
-
- ch.qos.logback
- logback-classic
-
-
-
-
-
-
- org.sweble.wikitext
- swc-parser-lazy
- 3.1.5
-
-
- org.sweble.wikitext
- swc-engine
- 3.1.5
-
-
-
-
- com.thoughtworks.xstream
- xstream
- 1.4.10
-
-
- de.ruedigermoeller
- fst
- 2.50
-
-
- com.fasterxml.jackson.core
- jackson-core
- ${jackson.version}
-
-
- com.fasterxml.jackson.core
- jackson-databind
- ${jackson.version}
-
-
- com.fasterxml.jackson.core
- jackson-annotations
- ${jackson.version}
-
-
- com.fasterxml.jackson.dataformat
- jackson-dataformat-yaml
- ${jackson.version}
-
-
-
- com.googlecode.json-simple
- json-simple
- 1.1.1
- test
-
-
-
-
- org.apache.avro
- avro
- ${avro.version}
-
-
-
-
- org.grobid
- grobid-core
- 0.5.1
-
-
- org.slf4j
- slf4j-jdk14
-
-
-
-
- org.grobid
- grobid-ner
- 0.5.1
-
-
- org.grobid
- grobid-trainer
- 0.5.1
-
-
- directory-naming
- naming-java
- 0.8
-
-
- fr.limsi.wapiti
- wapiti
- 1.5.0
-
-
- org.wipo.analysers
- wipo-analysers
- 0.0.1
-
-
-
-
-
-
- net.arnx
- jsonic
- 1.3.10
-
-
- com.google.guava
- guava
- 25.1-jre
-
-
-
-
- org.deephacks.lmdbjni
- lmdbjni
- ${lmdbjni.version}
-
-
- org.deephacks.lmdbjni
- lmdbjni-linux64
- ${lmdbjni.version}
-
-
- org.deephacks.lmdbjni
- lmdbjni-osx64
- ${lmdbjni.version}
-
-
- org.deephacks.lmdbjni
- lmdbjni-win64
- ${lmdbjni.version}
-
-
-
-
- com.googlecode.clearnlp
- clearnlp
- 1.3.1
-
-
-
-
- org.apache.hadoop
- hadoop-core
- 1.2.1
-
-
-
-
-
diff --git a/settings.gradle b/settings.gradle
new file mode 100644
index 00000000..1ada4e81
--- /dev/null
+++ b/settings.gradle
@@ -0,0 +1 @@
+rootProject.name = 'entity-fishing'
diff --git a/src/main/webapp/WEB-INF/web.xml b/src/main/webapp/WEB-INF/web.xml
index ff6ae6a1..08e5a594 100755
--- a/src/main/webapp/WEB-INF/web.xml
+++ b/src/main/webapp/WEB-INF/web.xml
@@ -1,7 +1,4 @@
-
-
+
NERD service - a RESTful service for the (Named) Entity Recognition and Disambiguation