Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
migration to gradle #33
 - build
 - documentation
  • Loading branch information
lfoppiano committed Nov 16, 2018
1 parent 3a31cfa commit 82bc35b
Show file tree
Hide file tree
Showing 14 changed files with 558 additions and 884 deletions.
17 changes: 5 additions & 12 deletions .gitignore
@@ -1,5 +1,6 @@
doc/_build*
target
build
.DS_Store
.project
.settings
Expand All @@ -12,21 +13,13 @@ target
*.obj
*.zip
data/maps/customisations.obj
data/db/db-en
data/db/db-fr
data/db/db-de
data/db/db-it
data/db/db-es
data/db/db-kb
data/db/db-*
data/db/domains-en.db
data/embeddings
data/embeddings/en
data/embeddings/fr
data/embeddings/de
data/embeddings/es
data/embeddings/it
data/embeddings/*
data/corpus/corpus-long/aida/RawText
data/corpus/corpus-long/aida-train/RawText
data/corpus/corpus-long/aida-testa/RawText
data/corpus/corpus-long/aida-testb/RawText
data/species/*
data/species/*
.gradle
16 changes: 16 additions & 0 deletions .travis.yml
@@ -0,0 +1,16 @@
language: java
dist: trusty
sudo: false

jdk:
- oraclejdk8

addons:
apt:
packages:
- oracle-java8-installer


script: ./gradlew clean build -x test
#after_success:
# - mvn clean cobertura:cobertura org.eluder.coveralls:coveralls-maven-plugin:report
263 changes: 263 additions & 0 deletions build.gradle
@@ -0,0 +1,263 @@
buildscript {
repositories {
mavenLocal()
mavenCentral()
jcenter()
}
dependencies {
classpath group: 'net.researchgate', name: 'gradle-release', version: '2.6.0'
classpath 'org.kt3k.gradle.plugin:coveralls-gradle-plugin:2.4.0'
classpath 'com.jfrog.bintray.gradle:gradle-bintray-plugin:1.7.3'
classpath 'com.github.jengelman.gradle.plugins:shadow:2.0.1'
}
}


apply plugin: 'maven'
apply plugin: 'com.jfrog.bintray'
apply plugin: 'com.github.johnrengelman.shadow'
apply plugin: 'java'
apply plugin: 'war'
apply from: 'https://raw.github.com/gretty-gradle-plugin/gretty/master/pluginScripts/gretty.plugin'

group = "com.scienceminer.nerd"

description = """entity recognition and disambiguation against Wikidata and Wikipedia in a raw text,
partially-annotated text segment or PDF"""

sourceCompatibility = 1.8
targetCompatibility = 1.8

tasks.withType(JavaCompile) {
options.encoding = 'UTF-8'
}

repositories {
mavenLocal()
mavenCentral()
jcenter()
maven { url "https://dl.bintray.com/rookies/maven" }
}

dependencies {
//Tests
testCompile 'junit:junit:4.12'
testCompile 'org.easymock:easymock:3.4'
testCompile 'org.hamcrest:hamcrest-all:1.3'
testCompile group: 'org.easymock', name: 'easymock', version: '3.4'
testCompile group: 'com.googlecode.json-simple', name: 'json-simple', version: '1.1.1'

//Logging
compile 'org.slf4j:slf4j-log4j12:1.7.25'
compile group: 'log4j', name: 'log4j', version: '1.2.17'

//GROBID
compile group: 'org.grobid', name: 'grobid-ner', version: '0.5.1'
compile 'org.grobid:grobid-trainer:0.5.2'
compile(group: 'org.grobid', name: 'grobid-core', version: '0.5.2') {
exclude(module: 'slf4j-jdk14')
}
compile group: 'directory-naming', name: 'naming-java', version: '0.8'
compile group: 'fr.limsi.wapiti', name: 'wapiti', version: '1.5.0'
compile group: 'org.wipo.analysers', name: 'wipo-analysers', version: '0.0.1'

//Apache commons
compile 'org.apache.commons:commons-collections4:4.1'
compile 'org.apache.commons:commons-lang3:3.6'
compile 'commons-logging:commons-logging:1.2'
compile 'commons-io:commons-io:2.5'
compile 'commons-pool:commons-pool:1.6'
compile group: 'org.apache.httpcomponents', name: 'httpclient', version: '4.5.3'
compile group: 'org.apache.httpcomponents', name: 'httpmime', version: '4.5.3'
compile group: 'org.apache.commons', name: 'commons-text', version: '1.1'
compile group: 'com.google.guava', name: 'guava', version: '25.1-jre'

//Json
compile 'com.fasterxml.jackson.core:jackson-core:2.9.5'
compile 'com.fasterxml.jackson.core:jackson-annotations:2.9.5'
compile 'com.fasterxml.jackson.core:jackson-databind:2.9.5'
compile group: 'com.fasterxml.jackson.dataformat', name: 'jackson-dataformat-yaml', version: '2.9.5'
compile group: 'net.arnx', name: 'jsonic', version: '1.3.10'

//Web interface
compile group: 'com.sun.jersey', name: 'jersey-client', version: '1.8'
compile group: 'com.sun.jersey', name: 'jersey-server', version: '1.8'
compile group: 'com.sun.jersey.contribs', name: 'jersey-multipart', version: '1.8'
compile 'javax.ws.rs:javax.ws.rs-api:2.1'
compile 'javax.servlet:javax.servlet-api:3.1.0'

//Misc
compile group: 'com.googlecode.clearnlp', name: 'clearnlp', version: '1.3.1'
compile group: 'com.google.guava', name: 'guava', version: '21.0'
compile group: 'directory-naming', name: 'naming-java', version: '0.8'
compile group: 'org.jvnet', name: 'mimepull', version: '1.6'
compile group: 'net.arnx', name: 'jsonic', version: '1.3.10'

//Specialised libraries
compile group: 'com.cybozu', name: 'language-detection', version: '09-13-2011'
compile group: 'com.github.haifengl', name: 'smile-core', version: '1.3.1'
compile group: 'com.googlecode.clearnlp', name: 'clearnlp', version: '1.3.1'
compile(group: 'it.unimi.dsi', name: 'sux4j', version: '3.1.2') {
exclude(module: 'log4j-over-slf4j')
exclude(module: 'logback-classic')
}
compile group: 'it.unimi.dsi', name: 'fastutil', version: '6.5.12'
compile(group: 'it.unimi.dsi', name: 'dsiutils', version: '2.1.9') {
exclude(module: 'logback-classic')
}
compile group: 'de.ruedigermoeller', name: 'fst', version: '2.50'

//Wikipedia
compile group: 'org.sweble.wikitext', name: 'swc-parser-lazy', version: '3.1.5'
compile group: 'org.sweble.wikitext', name: 'swc-engine', version: '3.1.5'

//XML
compile group: 'com.thoughtworks.xstream', name: 'xstream', version: '1.4.10'

//LMDB
compile group: 'org.deephacks.lmdbjni', name: 'lmdbjni', version: '0.4.6'
compile group: 'org.deephacks.lmdbjni', name: 'lmdbjni-linux64', version: '0.4.6'
compile group: 'org.deephacks.lmdbjni', name: 'lmdbjni-osx64', version: '0.4.6'
compile group: 'org.deephacks.lmdbjni', name: 'lmdbjni-win64', version: '0.4.6'

//Hadoop
compile group: 'org.apache.hadoop', name: 'hadoop-core', version: '1.2.1'
compile group: 'org.apache.avro', name: 'avro', version: '1.7.5'

}

configurations {
compile.exclude group: "org.slf4j", module: "slf4j-jdk14"
}

configurations.all {
resolutionStrategy {
force 'xml-apis:xml-apis:1.4.01'
}
}

test {
exclude '**/**IntegrationTest**'
}

//integration (type: Test){
// include '**'
//}

// Gretty configuration

gretty {
httpPort = 8090
contextPath = '/'
servletContainer = "jetty9.4"
webInfIncludeJarPattern = ''
reloadOnClassChange = false
}

// return the default value if the property has not been specified in command line
ext.getArg = { propName, defaultVal ->
return project.hasProperty(propName) ? project.getProperty(propName) : defaultVal;
}

war {
classifier = 'war'
// dependsOn mainWar
enabled true
}


artifacts {
archives jar
archives war
}

//TODO: we could create a task to download and unpack the lmdb files automatically
//task copyModels(type: Copy) {
// from "${rootDir}/resources/models"
// include "**/*.wapiti"
// into "${rootDir}/../grobid-home/models/"
//}

//tasks.withType(JavaCompile) {
// options.compilerArgs << "-Xlint:deprecation"
// options.compilerArgs << "-Xlint:unchecked"
//}


wrapper {
gradleVersion "4.6"
}

// Evaluation

// Run like: ./gradlew evaluation -Pcorpus=[corpusname]
task(evaluation, dependsOn: 'classes', type: JavaExec, group: 'evaluation') {
main = 'com.scienceminer.nerd.evaluation.NEDCorpusEvaluation'
classpath = sourceSets.main.runtimeClasspath
args getArg('corpus', null)
jvmArgs '-Xms2g', '-Xmx8g'
}


// Run like: ./gradlew evaluationDataGeneration -Pcorpus=[corpusname]
task(evaluationDataGeneration, dependsOn: 'classes', type: JavaExec, group: 'training') {
main = 'com.scienceminer.nerd.evaluation.AnnotatedDataGeneration'
classpath = sourceSets.main.runtimeClasspath
args getArg('corpus', null)
jvmArgs '-Xms2g', '-Xmx8g'
}

// Training
// Run like: ./gradlew train_annotate_en
task(train_annotate_en, dependsOn: 'classes', type: JavaExec, group: 'training') {
main = 'com.scienceminer.nerd.training.WikipediaTrainer'
classpath = sourceSets.main.runtimeClasspath
args 'data/wikipedia/training/', 'en'
jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g'
}

// Run like: ./gradlew train_annotate_de
task(train_annotate_de, dependsOn: 'classes', type: JavaExec, group: 'training') {
main = 'com.scienceminer.nerd.training.WikipediaTrainer'
classpath = sourceSets.main.runtimeClasspath
args 'data/wikipedia/training/', 'de'
jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g'
}

// Run like: ./gradlew train_annotate_fr
task(train_annotate_fr, dependsOn: 'classes', type: JavaExec, group: 'training') {
main = 'com.scienceminer.nerd.training.WikipediaTrainer'
classpath = sourceSets.main.runtimeClasspath
args 'data/wikipedia/training/', 'fr'
jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g'
}

// Run like: ./gradlew train_annotate_it
task(train_annotate_it, dependsOn: 'classes', type: JavaExec, group: 'training') {
main = 'com.scienceminer.nerd.training.WikipediaTrainer'
classpath = sourceSets.main.runtimeClasspath
args 'data/wikipedia/training/', 'it'
jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g'
}

// Run like: ./gradlew train_annotate_es
task(train_annotate_es, dependsOn: 'classes', type: JavaExec, group: 'training') {
main = 'com.scienceminer.nerd.training.WikipediaTrainer'
classpath = sourceSets.main.runtimeClasspath
args 'data/wikipedia/training/', 'es'
jvmArgs '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Djdk.xml.totalEntitySizeLimit=2147480000', '-Xms2g', '-Xmx8g'
}

// Embeddings (obsolete)
// Run like: ./gradlew generate_entity_description
task(generate_entity_description, dependsOn: 'classes', type: JavaExec, group: 'embeddings') {
main = 'com.scienceminer.nerd.embeddings.EntityDescription'
classpath = sourceSets.main.runtimeClasspath
args 'data/wikipedia/embeddings/en/', 'en'
jvmArgs '-Xms2g', '-Xmx8g'
}





4 changes: 2 additions & 2 deletions doc/build.rst
Expand Up @@ -60,14 +60,14 @@ Then install the compiled indexed data:

#. Build the project, under the *entity-fishing* project repository.
::
$ mvn clean install
$ ./gradlew clean build

Some tests will be executed. If all tests are successful, you should be now ready to run the service.


#. Run the service:
::
$ mvn clean jetty:run
$ ./gradlew clean appRun

The test console is available at port ``:8090`` by opening in your browser (preferably *Firefox* or *Chrome*, *Internet Explorer* has not been tested): http://localhost:8090

Expand Down
6 changes: 3 additions & 3 deletions doc/evaluation.rst
Expand Up @@ -37,11 +37,11 @@ Evaluation commands

Use the following maven command with the above dataset identifier for running an evaluation:
::
$ mvn compile exec:java -Dexec.mainClass=com.scienceminer.nerd.evaluation.NEDCorpusEvaluation -Dexec.args="aquaint"
$ ./gradlew evaluation -Pcorpus=[dataset]

For instance for evaluating against the testb subset of the AIDA-CONLL, use:
::
$ mvn compile exec:java -Dexec.mainClass=com.scienceminer.nerd.evaluation.NEDCorpusEvaluation -Dexec.args="aida-testb"
$ ./gradlew evaluation -Pcorpus=aida-testb

The evaluation process will provide standard metrics (accuracy, precision, recall. f1) for micro- and macro-averages for the entity disambiguation algorithm selected as ranker and for priors (as baseline).

Expand All @@ -60,7 +60,7 @@ If there is a directory called ``pdf`` or ``PDF``, the process will extract info

Use the following maven command with the above dataset identifier for generating the annotation xml file:
::
$ mvn compile exec:java -Dexec.mainClass=com.scienceminer.nerd.evaluation.AnnotatedDataGeneration -Dexec.args="toto"
$ ./gradlew evaluationDataGeneration -Pcorpus=[corpusname]

References
**********
Expand Down
8 changes: 4 additions & 4 deletions doc/train.rst
Expand Up @@ -17,10 +17,10 @@ The following command will build the two models used in *entity-fishing*, the ``

For other languages, replace the ending language code (``en``) by the desired one (``fr``, ``de``, ``it`` and ``es`` are supported), e.g.:
::
$ mvn compile exec:exec -Ptrain_annotate_de
$ mvn compile exec:exec -Ptrain_annotate_fr
$ mvn compile exec:exec -Ptrain_annotate_es
$ mvn compile exec:exec -Ptrain_annotate_it
$ ./gradlew train_annotate_de
$ ./gradlew train_annotate_fr
$ ./gradlew train_annotate_es
$ ./gradlew train_annotate_it


Models will be saved under ``data/models``. ``ARFF`` training data files used to build the model are saved under ``data/wikipedia/training/``.
Expand Down
1 change: 1 addition & 0 deletions gradle.properties
@@ -0,0 +1 @@
version = 0.0.4
Binary file added gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
6 changes: 6 additions & 0 deletions gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
#Fri Nov 16 08:49:17 CET 2018
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-4.6-all.zip

0 comments on commit 82bc35b

Please sign in to comment.