diff --git a/.gitignore b/.gitignore index c100436f..24a10fc7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ doc/_build* target +build .DS_Store .project .settings @@ -12,21 +13,13 @@ target *.obj *.zip data/maps/customisations.obj -data/db/db-en -data/db/db-fr -data/db/db-de -data/db/db-it -data/db/db-es -data/db/db-kb +data/db/db-* data/db/domains-en.db data/embeddings -data/embeddings/en -data/embeddings/fr -data/embeddings/de -data/embeddings/es -data/embeddings/it +data/embeddings/* data/corpus/corpus-long/aida/RawText data/corpus/corpus-long/aida-train/RawText data/corpus/corpus-long/aida-testa/RawText data/corpus/corpus-long/aida-testb/RawText -data/species/* \ No newline at end of file +data/species/* +.gradle \ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..ca2977d1 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,16 @@ +language: java +dist: trusty +sudo: false + +jdk: + - oraclejdk8 + +addons: + apt: + packages: + - oracle-java8-installer + + +script: ./gradlew clean build -x test +#after_success: +# - mvn clean cobertura:cobertura org.eluder.coveralls:coveralls-maven-plugin:report \ No newline at end of file diff --git a/build.gradle b/build.gradle new file mode 100644 index 00000000..d64eef4e --- /dev/null +++ b/build.gradle @@ -0,0 +1,263 @@ +buildscript { + repositories { + mavenLocal() + mavenCentral() + jcenter() + } + dependencies { + classpath group: 'net.researchgate', name: 'gradle-release', version: '2.6.0' + classpath 'org.kt3k.gradle.plugin:coveralls-gradle-plugin:2.4.0' + classpath 'com.jfrog.bintray.gradle:gradle-bintray-plugin:1.7.3' + classpath 'com.github.jengelman.gradle.plugins:shadow:2.0.1' + } +} + + +apply plugin: 'maven' +apply plugin: 'com.jfrog.bintray' +apply plugin: 'com.github.johnrengelman.shadow' +apply plugin: 'java' +apply plugin: 'war' +apply from: 'https://raw.github.com/gretty-gradle-plugin/gretty/master/pluginScripts/gretty.plugin' + +group = "com.scienceminer.nerd" + +description = """entity recognition and disambiguation against Wikidata and Wikipedia in a raw text, +partially-annotated text segment or PDF""" + +sourceCompatibility = 1.8 +targetCompatibility = 1.8 + +tasks.withType(JavaCompile) { + options.encoding = 'UTF-8' +} + +repositories { + mavenLocal() + mavenCentral() + jcenter() + maven { url "https://dl.bintray.com/rookies/maven" } +} + +dependencies { + //Tests + testCompile 'junit:junit:4.12' + testCompile 'org.easymock:easymock:3.4' + testCompile 'org.hamcrest:hamcrest-all:1.3' + testCompile group: 'org.easymock', name: 'easymock', version: '3.4' + testCompile group: 'com.googlecode.json-simple', name: 'json-simple', version: '1.1.1' + + //Logging + compile 'org.slf4j:slf4j-log4j12:1.7.25' + compile group: 'log4j', name: 'log4j', version: '1.2.17' + + //GROBID + compile group: 'org.grobid', name: 'grobid-ner', version: '0.5.1' + compile 'org.grobid:grobid-trainer:0.5.2' + compile(group: 'org.grobid', name: 'grobid-core', version: '0.5.2') { + exclude(module: 'slf4j-jdk14') + } + compile group: 'directory-naming', name: 'naming-java', version: '0.8' + compile group: 'fr.limsi.wapiti', name: 'wapiti', version: '1.5.0' + compile group: 'org.wipo.analysers', name: 'wipo-analysers', version: '0.0.1' + + //Apache commons + compile 'org.apache.commons:commons-collections4:4.1' + compile 'org.apache.commons:commons-lang3:3.6' + compile 'commons-logging:commons-logging:1.2' + compile 'commons-io:commons-io:2.5' + compile 'commons-pool:commons-pool:1.6' + compile group: 'org.apache.httpcomponents', name: 'httpclient', version: '4.5.3' + compile group: 'org.apache.httpcomponents', name: 'httpmime', version: '4.5.3' + compile group: 'org.apache.commons', name: 'commons-text', version: '1.1' + compile group: 'com.google.guava', name: 'guava', version: '25.1-jre' + + //Json + compile 'com.fasterxml.jackson.core:jackson-core:2.9.5' + compile 'com.fasterxml.jackson.core:jackson-annotations:2.9.5' + compile 'com.fasterxml.jackson.core:jackson-databind:2.9.5' + compile group: 'com.fasterxml.jackson.dataformat', name: 'jackson-dataformat-yaml', version: '2.9.5' + compile group: 'net.arnx', name: 'jsonic', version: '1.3.10' + + //Web interface + compile group: 'com.sun.jersey', name: 'jersey-client', version: '1.8' + compile group: 'com.sun.jersey', name: 'jersey-server', version: '1.8' + compile group: 'com.sun.jersey.contribs', name: 'jersey-multipart', version: '1.8' + compile 'javax.ws.rs:javax.ws.rs-api:2.1' + compile 'javax.servlet:javax.servlet-api:3.1.0' + + //Misc + compile group: 'com.googlecode.clearnlp', name: 'clearnlp', version: '1.3.1' + compile group: 'com.google.guava', name: 'guava', version: '21.0' + compile group: 'directory-naming', name: 'naming-java', version: '0.8' + compile group: 'org.jvnet', name: 'mimepull', version: '1.6' + compile group: 'net.arnx', name: 'jsonic', version: '1.3.10' + + //Specialised libraries + compile group: 'com.cybozu', name: 'language-detection', version: '09-13-2011' + compile group: 'com.github.haifengl', name: 'smile-core', version: '1.3.1' + compile group: 'com.googlecode.clearnlp', name: 'clearnlp', version: '1.3.1' + compile(group: 'it.unimi.dsi', name: 'sux4j', version: '3.1.2') { + exclude(module: 'log4j-over-slf4j') + exclude(module: 'logback-classic') + } + compile group: 'it.unimi.dsi', name: 'fastutil', version: '6.5.12' + compile(group: 'it.unimi.dsi', name: 'dsiutils', version: '2.1.9') { + exclude(module: 'logback-classic') + } + compile group: 'de.ruedigermoeller', name: 'fst', version: '2.50' + + //Wikipedia + compile group: 'org.sweble.wikitext', name: 'swc-parser-lazy', version: '3.1.5' + compile group: 'org.sweble.wikitext', name: 'swc-engine', version: '3.1.5' + + //XML + compile group: 'com.thoughtworks.xstream', name: 'xstream', version: '1.4.10' + + //LMDB + compile group: 'org.deephacks.lmdbjni', name: 'lmdbjni', version: '0.4.6' + compile group: 'org.deephacks.lmdbjni', name: 'lmdbjni-linux64', version: '0.4.6' + compile group: 'org.deephacks.lmdbjni', name: 'lmdbjni-osx64', version: '0.4.6' + compile group: 'org.deephacks.lmdbjni', name: 'lmdbjni-win64', version: '0.4.6' + + //Hadoop + compile group: 'org.apache.hadoop', name: 'hadoop-core', version: '1.2.1' + compile group: 'org.apache.avro', name: 'avro', version: '1.7.5' + +} + +configurations { + compile.exclude group: "org.slf4j", module: "slf4j-jdk14" +} + +configurations.all { + resolutionStrategy { + force 'xml-apis:xml-apis:1.4.01' + } +} + +test { + exclude '**/**IntegrationTest**' +} + +//integration (type: Test){ +// include '**' +//} + +// Gretty configuration + +gretty { + httpPort = 8090 + contextPath = '/' + servletContainer = "jetty9.4" + webInfIncludeJarPattern = '' + reloadOnClassChange = false +} + +// return the default value if the property has not been specified in command line +ext.getArg = { propName, defaultVal -> + return project.hasProperty(propName) ? project.getProperty(propName) : defaultVal; +} + +war { + classifier = 'war' +// dependsOn mainWar + enabled true +} + + +artifacts { + archives jar + archives war +} + +//TODO: we could create a task to download and unpack the lmdb files automatically +//task copyModels(type: Copy) { +// from "${rootDir}/resources/models" +// include "**/*.wapiti" +// into "${rootDir}/../grobid-home/models/" +//} + +//tasks.withType(JavaCompile) { +// options.compilerArgs << "-Xlint:deprecation" +// options.compilerArgs << "-Xlint:unchecked" +//} + + +wrapper { + gradleVersion "4.6" +} + +// Evaluation + +// Run like: ./gradlew evaluation -Pcorpus=[corpusname] +task(evaluation, dependsOn: 'classes', type: JavaExec, group: 'evaluation') { + main = 'com.scienceminer.nerd.evaluation.NEDCorpusEvaluation' + classpath = sourceSets.main.runtimeClasspath + args getArg('corpus', null) + jvmArgs '-Xms2g', '-Xmx8g' +} + + +// Run like: ./gradlew evaluationDataGeneration -Pcorpus=[corpusname] +task(evaluationDataGeneration, dependsOn: 'classes', type: JavaExec, group: 'training') { + main = 'com.scienceminer.nerd.evaluation.AnnotatedDataGeneration' + classpath = sourceSets.main.runtimeClasspath + args getArg('corpus', null) + jvmArgs '-Xms2g', '-Xmx8g' +} + +// Training +// Run like: ./gradlew train_annotate_en +task(train_annotate_en, dependsOn: 'classes', type: JavaExec, group: 'training') { + main = 'com.scienceminer.nerd.training.WikipediaTrainer' + classpath = sourceSets.main.runtimeClasspath + args 'data/wikipedia/training/', 'en' + jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g' +} + +// Run like: ./gradlew train_annotate_de +task(train_annotate_de, dependsOn: 'classes', type: JavaExec, group: 'training') { + main = 'com.scienceminer.nerd.training.WikipediaTrainer' + classpath = sourceSets.main.runtimeClasspath + args 'data/wikipedia/training/', 'de' + jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g' +} + +// Run like: ./gradlew train_annotate_fr +task(train_annotate_fr, dependsOn: 'classes', type: JavaExec, group: 'training') { + main = 'com.scienceminer.nerd.training.WikipediaTrainer' + classpath = sourceSets.main.runtimeClasspath + args 'data/wikipedia/training/', 'fr' + jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g' +} + +// Run like: ./gradlew train_annotate_it +task(train_annotate_it, dependsOn: 'classes', type: JavaExec, group: 'training') { + main = 'com.scienceminer.nerd.training.WikipediaTrainer' + classpath = sourceSets.main.runtimeClasspath + args 'data/wikipedia/training/', 'it' + jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g' +} + +// Run like: ./gradlew train_annotate_es +task(train_annotate_es, dependsOn: 'classes', type: JavaExec, group: 'training') { + main = 'com.scienceminer.nerd.training.WikipediaTrainer' + classpath = sourceSets.main.runtimeClasspath + args 'data/wikipedia/training/', 'es' + jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g' +} + +// Embeddings (obsolete) +// Run like: ./gradlew generate_entity_description +task(generate_entity_description, dependsOn: 'classes', type: JavaExec, group: 'embeddings') { + main = 'com.scienceminer.nerd.embeddings.EntityDescription' + classpath = sourceSets.main.runtimeClasspath + args 'data/wikipedia/embeddings/en/', 'en' + jvmArgs '-Xms2g', '-Xmx8g' +} + + + + + diff --git a/doc/build.rst b/doc/build.rst index 4d46a023..036606cb 100644 --- a/doc/build.rst +++ b/doc/build.rst @@ -60,14 +60,14 @@ Then install the compiled indexed data: #. Build the project, under the *entity-fishing* project repository. :: - $ mvn clean install + $ ./gradlew clean build Some tests will be executed. If all tests are successful, you should be now ready to run the service. #. Run the service: :: - $ mvn clean jetty:run + $ ./gradlew clean appRun The test console is available at port ``:8090`` by opening in your browser (preferably *Firefox* or *Chrome*, *Internet Explorer* has not been tested): http://localhost:8090 diff --git a/doc/evaluation.rst b/doc/evaluation.rst index 3c0cecfd..33b28839 100644 --- a/doc/evaluation.rst +++ b/doc/evaluation.rst @@ -37,11 +37,11 @@ Evaluation commands Use the following maven command with the above dataset identifier for running an evaluation: :: - $ mvn compile exec:java -Dexec.mainClass=com.scienceminer.nerd.evaluation.NEDCorpusEvaluation -Dexec.args="aquaint" + $ ./gradlew evaluation -Pcorpus=[dataset] For instance for evaluating against the testb subset of the AIDA-CONLL, use: :: - $ mvn compile exec:java -Dexec.mainClass=com.scienceminer.nerd.evaluation.NEDCorpusEvaluation -Dexec.args="aida-testb" + $ ./gradlew evaluation -Pcorpus=aida-testb The evaluation process will provide standard metrics (accuracy, precision, recall. f1) for micro- and macro-averages for the entity disambiguation algorithm selected as ranker and for priors (as baseline). @@ -60,7 +60,7 @@ If there is a directory called ``pdf`` or ``PDF``, the process will extract info Use the following maven command with the above dataset identifier for generating the annotation xml file: :: - $ mvn compile exec:java -Dexec.mainClass=com.scienceminer.nerd.evaluation.AnnotatedDataGeneration -Dexec.args="toto" + $ ./gradlew evaluationDataGeneration -Pcorpus=[corpusname] References ********** diff --git a/doc/train.rst b/doc/train.rst index b8366f3a..eafb7fdd 100644 --- a/doc/train.rst +++ b/doc/train.rst @@ -17,10 +17,10 @@ The following command will build the two models used in *entity-fishing*, the `` For other languages, replace the ending language code (``en``) by the desired one (``fr``, ``de``, ``it`` and ``es`` are supported), e.g.: :: - $ mvn compile exec:exec -Ptrain_annotate_de - $ mvn compile exec:exec -Ptrain_annotate_fr - $ mvn compile exec:exec -Ptrain_annotate_es - $ mvn compile exec:exec -Ptrain_annotate_it + $ ./gradlew train_annotate_de + $ ./gradlew train_annotate_fr + $ ./gradlew train_annotate_es + $ ./gradlew train_annotate_it Models will be saved under ``data/models``. ``ARFF`` training data files used to build the model are saved under ``data/wikipedia/training/``. diff --git a/gradle.properties b/gradle.properties new file mode 100644 index 00000000..8e0d48df --- /dev/null +++ b/gradle.properties @@ -0,0 +1 @@ +version = 0.0.4 \ No newline at end of file diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 00000000..f6b961fd Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 00000000..2db4af13 --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +#Fri Nov 16 08:49:17 CET 2018 +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-4.6-all.zip diff --git a/gradlew b/gradlew new file mode 100755 index 00000000..cccdd3d5 --- /dev/null +++ b/gradlew @@ -0,0 +1,172 @@ +#!/usr/bin/env sh + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS="" + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin, switch paths to Windows format before running java +if $cygwin ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=$((i+1)) + done + case $i in + (0) set -- ;; + (1) set -- "$args0" ;; + (2) set -- "$args0" "$args1" ;; + (3) set -- "$args0" "$args1" "$args2" ;; + (4) set -- "$args0" "$args1" "$args2" "$args3" ;; + (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=$(save "$@") + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong +if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then + cd "$(dirname "$0")" +fi + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 00000000..f9553162 --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,84 @@ +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS= + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windows variants + +if not "%OS%" == "Windows_NT" goto win9xME_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/pom.xml b/pom.xml deleted file mode 100644 index c8787ff2..00000000 --- a/pom.xml +++ /dev/null @@ -1,859 +0,0 @@ - - - - 4.0.0 - - com.scienceminer.nerd - nerd - war - 0.0.4 - - entity-fishing - Entity Recognition and Disambiguation - - - UTF-8 - 1.7.5 - 0.4.6 - 2.9.5 - - - - - Apache License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0 - - - - - - Patrice Lopez - - Developer - - - - Luca Foppiano - - Developer - - - - - - - - - src/main/resources - true - - **/service.properties - - - - - - org.apache.maven.plugins - maven-jar-plugin - 3.0.2 - - - - true - true - true - - - - - - make-a-jar - compile - - jar - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - 2.20 - - -Xms256m -Xmx3072m -Dfile.encoding=UTF-8 -Djdk.xml.totalEntitySizeLimit=2147480000 - - - **/*IntegrationTest.java - - - log4j.test.xml - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.6.1 - - 1.8 - 1.8 - UTF-8 - - - - - org.apache.maven.plugins - maven-war-plugin - 3.1.0 - - - - src/main/webapp/WEB-INF - - - - WEB-INF - - - - doc - doc - - - lib - WEB-INF/lib - - - - - - - org.eclipse.jetty - jetty-maven-plugin - 9.4.8.v20171121 - - - -Xmx4g -Dfile.encoding=UTF-8 -Djdk.xml.totalEntitySizeLimit=2147480000 - 0 - - - - - - com.sun.management.jmxremote - - - - jetty.port - 8090 - - - maxIdleTime - 60000 - - - log4j.configuration - log4j-jetty.xml - - - - - - - - - - - - train_annotate_en - - - train_annotate_en - - - - - - org.codehaus.mojo - exec-maven-plugin - - java - compile - - -XX:+HeapDumpOnOutOfMemoryError - -Djdk.xml.totalEntitySizeLimit=2147480000 - -classpath - - -Xms2g - -Xmx10g - com.scienceminer.nerd.training.WikipediaTrainer - data/wikipedia/training/ - en - - - - - - - - - - train_annotate_de - - - train_annotate_de - - - - - - org.codehaus.mojo - exec-maven-plugin - - java - compile - - -XX:+HeapDumpOnOutOfMemoryError - -Djdk.xml.totalEntitySizeLimit=2147480000 - -classpath - - -Xms2g - -Xmx10g - com.scienceminer.nerd.training.WikipediaTrainer - data/wikipedia/training/ - de - - - - - - - - - - train_annotate_fr - - - train_annotate_fr - - - - - - org.codehaus.mojo - exec-maven-plugin - - java - compile - - -XX:+HeapDumpOnOutOfMemoryError - -Djdk.xml.totalEntitySizeLimit=2147480000 - -classpath - - -Xms2g - -Xmx10g - com.scienceminer.nerd.training.WikipediaTrainer - data/wikipedia/training/ - fr - - - - - - - - - - train_annotate_it - - - train_annotate_it - - - - - - org.codehaus.mojo - exec-maven-plugin - - java - compile - - -XX:+HeapDumpOnOutOfMemoryError - -Djdk.xml.totalEntitySizeLimit=2147480000 - - -classpath - - -Xms2g - -Xmx8g - com.scienceminer.nerd.training.WikipediaTrainer - data/wikipedia/training/ - it - - - - - - - - - - train_annotate_es - - - train_annotate_es - - - - - - org.codehaus.mojo - exec-maven-plugin - - java - compile - - -XX:+HeapDumpOnOutOfMemoryError - -Djdk.xml.totalEntitySizeLimit=2147480000 - -classpath - - -Xms2g - -Xmx8g - com.scienceminer.nerd.training.WikipediaTrainer - data/wikipedia/training/ - es - - - - - - - - - - generate_entity_description - - - generate_entity_description - - - - - - org.codehaus.mojo - exec-maven-plugin - - java - compile - - -classpath - - -Xms2g - -Xmx8g - com.scienceminer.nerd.embeddings.EntityDescription - data/wikipedia/embeddings/en/ - en - - - - - - - - - tomcat - - - tomcat - - false - - - - - org.apache.maven.plugins - maven-resources-plugin - 3.0.2 - - - copy-resources-source - package - - copy-resources - - - ${basedir}/target/classes - - - ${basedir}/src/main/resources - - jetty.xml - log4j-jetty.xml - log4j.xml - log4j.test.xml - - - log4j-tomcat.xml - *.properties - - true - - - - - - copy-resources-test - package - - testResources - - - ${basedir}/target/test-classes - - - ${basedir}/src/test/resources - - query.json - - - - - - - - - org.apache.maven.plugins - maven-war-plugin - 3.1.0 - - **/**jetty*.xml, **/log4j.xml - - - ${basedir}/src/main/webapp/WEB-INF/ - - web.xml - - true - WEB-INF - - - - - - **/db-de/** - **/db-en/** - **/db-kb/** - **/db-fr/** - **/db-it/** - **/db-es/** - - data - data - - - doc - doc - - - - - - org.apache.maven.plugins - maven-assembly-plugin - 2.6 - - - - ${project.artifactId}_${project.version}-data - - src/main/assembly/resources.xml - - - - - - - - - - - - 3rd-party-local-repo - This is a local repository included in the project, to access 3rd party libs. - file:///${basedir}/lib/ - default - - - maven2-repository.dev.java.net - Java.net Repository for Maven - http://download.java.net/maven/2/ - default - - - sonatype-oss-public - https://oss.sonatype.org/content/groups/public/ - - true - - - true - - - - - false - - bintray-rookies-maven - bintray - https://dl.bintray.com/rookies/maven - - - - - - - org.apache.maven - maven-model - 3.5.3 - - - - - commons-io - commons-io - 2.5 - - - org.apache.httpcomponents - httpclient - 4.5.3 - - - org.apache.httpcomponents - httpmime - 4.5.3 - - - org.apache.commons - commons-lang3 - 3.6 - - - org.apache.commons - commons-collections4 - 4.1 - - - org.apache.commons - commons-text - 1.1 - - - - - org.slf4j - slf4j-log4j12 - 1.7.25 - - - log4j - log4j - 1.2.17 - - - - - junit - junit - 4.12 - test - - - org.hamcrest - hamcrest-all - 1.3 - test - - - - - com.sun.jersey - jersey-client - 1.8 - compile - - - - com.sun.jersey - jersey-server - 1.8 - compile - - - - com.sun.jersey.contribs - jersey-multipart - 1.8 - compile - - - - - com.cybozu - language-detection - 09-13-2011 - - - - - com.github.haifengl - smile-core - 1.3.1 - - - - - it.unimi.dsi - sux4j - 3.1.2 - - - org.slf4j - log4j-over-slf4j - - - ch.qos.logback - logback-classic - - - - - it.unimi.dsi - fastutil - 6.5.12 - - - it.unimi.dsi - dsiutils - 2.1.9 - - - ch.qos.logback - logback-classic - - - - - - - org.sweble.wikitext - swc-parser-lazy - 3.1.5 - - - org.sweble.wikitext - swc-engine - 3.1.5 - - - - - com.thoughtworks.xstream - xstream - 1.4.10 - - - de.ruedigermoeller - fst - 2.50 - - - com.fasterxml.jackson.core - jackson-core - ${jackson.version} - - - com.fasterxml.jackson.core - jackson-databind - ${jackson.version} - - - com.fasterxml.jackson.core - jackson-annotations - ${jackson.version} - - - com.fasterxml.jackson.dataformat - jackson-dataformat-yaml - ${jackson.version} - - - - com.googlecode.json-simple - json-simple - 1.1.1 - test - - - - - org.apache.avro - avro - ${avro.version} - - - - - org.grobid - grobid-core - 0.5.1 - - - org.slf4j - slf4j-jdk14 - - - - - org.grobid - grobid-ner - 0.5.1 - - - org.grobid - grobid-trainer - 0.5.1 - - - directory-naming - naming-java - 0.8 - - - fr.limsi.wapiti - wapiti - 1.5.0 - - - org.wipo.analysers - wipo-analysers - 0.0.1 - - - - - - - net.arnx - jsonic - 1.3.10 - - - com.google.guava - guava - 25.1-jre - - - - - org.deephacks.lmdbjni - lmdbjni - ${lmdbjni.version} - - - org.deephacks.lmdbjni - lmdbjni-linux64 - ${lmdbjni.version} - - - org.deephacks.lmdbjni - lmdbjni-osx64 - ${lmdbjni.version} - - - org.deephacks.lmdbjni - lmdbjni-win64 - ${lmdbjni.version} - - - - - com.googlecode.clearnlp - clearnlp - 1.3.1 - - - - - org.apache.hadoop - hadoop-core - 1.2.1 - - - - - diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 00000000..1ada4e81 --- /dev/null +++ b/settings.gradle @@ -0,0 +1 @@ +rootProject.name = 'entity-fishing' diff --git a/src/main/webapp/WEB-INF/web.xml b/src/main/webapp/WEB-INF/web.xml index ff6ae6a1..08e5a594 100755 --- a/src/main/webapp/WEB-INF/web.xml +++ b/src/main/webapp/WEB-INF/web.xml @@ -1,7 +1,4 @@ - - + NERD service - a RESTful service for the (Named) Entity Recognition and Disambiguation