Skip to content

Commit

Permalink
Improved the makefile a bit for fetching OpenNLP dependencies. Update…
Browse files Browse the repository at this point in the history
…d the setup guide accordingly.

Improved output from dependency checker.
  • Loading branch information
Mark Granroth-Wilding committed Mar 18, 2016
1 parent 67bd78e commit 7a0880f
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 195 deletions.
4 changes: 1 addition & 3 deletions examples/setup_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,10 @@ Beautiful Soup.
Simple as that.

OpenNLP is a little trickier. To make things simple, we just get all the OpenNLP tools at once. There are also a
couple of other libraries required to run the OpenNLP wrappers and we get them in the same way.
couple of other libraries required to run the OpenNLP wrappers. The `opennlp` make target gets all of these at once.

cd ~/myproject/pimlico/lib/java
make opennlp
make py4j
make argparse4j.jar

At the moment, it's also necessary to build the Java wrappers around OpenNLP that are provided as part of Pimlico. For
this, you'll need a Java compiler installed on your system.
Expand Down
13 changes: 12 additions & 1 deletion lib/java/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,13 @@ else
endif


########## Meta targets ##########
# These are collections of targets for whole modules, including all the dependencies for the module
opennlp : opennlp-all py4j argparse4j guava


########## OPEN NLP ##############
opennlp : opennlp-maxent-3.0.3.jar opennlp-tools-1.5.3.jar opennlp-uima-1.5.3.jar jwnl-1.3.3.jar
opennlp-all : opennlp-maxent-3.0.3.jar opennlp-tools-1.5.3.jar opennlp-uima-1.5.3.jar jwnl-1.3.3.jar
# Clear up downloaded archive
rm -f apache-opennlp-1.5.3-bin.tar.gz
$(MAKE) argparse4j.jar
Expand Down Expand Up @@ -47,6 +52,10 @@ apache-opennlp-1.5.3-bin.tar.gz :
# Fetch OpenNlp archive
$(FETCH) http://apache.mesi.com.ar//opennlp/opennlp-1.5.3/apache-opennlp-1.5.3-bin.tar.gz

##############################

argparse4j : argparse4j.jar

argparse4j.jar :
$(FETCH_STDOUT) http://sourceforge.net/projects/argparse4j/files/latest/download?source=files >argparse4j.jar

Expand Down Expand Up @@ -81,6 +90,8 @@ stanford-postagger-2014-01-04.zip :

####################################

guava : guava.jar

guava.jar :
@echo "Fetching Guava"
$(FETCH_STDOUT) http://search.maven.org/remotecontent?filepath=com/google/guava/guava/15.0/guava-15.0.jar >guava.jar
Expand Down
89 changes: 0 additions & 89 deletions src/java/pimlico/opennlp/PosTag.java

This file was deleted.

94 changes: 0 additions & 94 deletions src/java/pimlico/opennlp/Tokenize.java

This file was deleted.

9 changes: 5 additions & 4 deletions src/python/pimlico/cli/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from pimlico.core.config import check_for_cycles, PipelineStructureError
from pimlico.core.modules.base import ModuleInfoLoadError
from pimlico.utils.format import multiline_tablate


def check_cmd(pipeline, opts):
Expand Down Expand Up @@ -46,8 +47,8 @@ def check_cmd(pipeline, opts):
missing_dependencies.extend(pipeline[module_name].check_runtime_dependencies())

if len(missing_dependencies):
print "Runtime dependencies not satisfied:\n%s" % (
"\n".join("- %s for '%s' (%s)" % (name, module, desc) for (name, module, desc) in missing_dependencies)
)
print "\nRuntime dependencies not satisfied:\n%s" % \
multiline_tablate(missing_dependencies, [30, 30, 150],
tablefmt="orgtbl", headers=["Dependency", "Module", "Description"])
else:
print "Runtime dependencies all satisfied"
print "\nRuntime dependencies all satisfied"
8 changes: 4 additions & 4 deletions src/python/pimlico/core/external/java.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
import Queue
from collections import deque
import os
import time
from subprocess import Popen, PIPE, check_output, STDOUT, CalledProcessError

from pimlico import JAVA_LIB_DIR, JAVA_BUILD_DIR
from pimlico.core.modules.base import DependencyError
from py4j.java_gateway import JavaGateway, GatewayParameters, OutputConsumer, ProcessConsumer

CLASSPATH = ":".join(["%s/*" % JAVA_LIB_DIR, JAVA_BUILD_DIR])

Expand Down Expand Up @@ -83,6 +79,8 @@ def start(self):
Likewise with python_port and a --python-port option.
"""
from py4j.java_gateway import JavaGateway, GatewayParameters

args = list(self.gateway_args)
gateway_kwargs = {}

Expand Down Expand Up @@ -117,6 +115,8 @@ def launch_gateway(gateway_class="py4j.GatewayServer", args=[],
"""
Our own more flexble version of Py4J's launch_gateway.
"""
from py4j.java_gateway import OutputConsumer, ProcessConsumer

# Launch the server in a subprocess.
command = ["java", "-classpath", CLASSPATH] + javaopts + [gateway_class] + args
proc = Popen(command, stdout=PIPE, stdin=PIPE, stderr=PIPE)
Expand Down
14 changes: 14 additions & 0 deletions src/python/pimlico/utils/format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from tabulate import tabulate
from textwrap import wrap

def multiline_tablate(table, widths, **kwargs):
# Wrap columns
table = [[wrap(cell, width=width) for (cell, width) in zip(row, widths)] for row in table]
table_split = []
for row in table:
subrows = max(len(cell) for cell in row)
new_row = [cell + [""] * (subrows - len(cell)) for cell in row]
table_split.extend(zip(*new_row))
# Add a blank line
table_split.append([""] * len(table_split[0]))
return tabulate(table_split[:-1], **kwargs)

0 comments on commit 7a0880f

Please sign in to comment.