Skip to content

Commit

Permalink
Update python runtime, make file, setup.py script
Browse files Browse the repository at this point in the history
  • Loading branch information
Yoshiki Shibukawa committed Feb 14, 2013
1 parent 20d329f commit 8863276
Show file tree
Hide file tree
Showing 9 changed files with 611 additions and 24 deletions.
55 changes: 31 additions & 24 deletions snowball/GNUmakefile
Expand Up @@ -3,9 +3,11 @@
c_src_dir = src_c
java_src_main_dir = java/org/tartarus/snowball
java_src_dir = $(java_src_main_dir)/ext
python_src_dir = snowballstemmer
jsx_src_dir = lib
jsx_runtime_dir = jsx
python_output_dir = python_out
python_runtime_dir = snowballstemmer
python_sample_dir = sample
jsx_output_dir = jsx_out
jsx_runtime_dir = lib
jsx_sample_dir = sample

libstemmer_algorithms = danish dutch english finnish french german hungarian \
Expand All @@ -31,12 +33,14 @@ COMPILER_SOURCES = compiler/space.c \
compiler/generator_python.c \
compiler/generator_jsx.c \
compiler/driver.c

COMPILER_HEADERS = compiler/header.h \
compiler/syswords.h \
compiler/syswords2.h

RUNTIME_SOURCES = runtime/api.c \
runtime/utilities.c

RUNTIME_HEADERS = runtime/api.h \
runtime/header.h

Expand All @@ -45,11 +49,10 @@ JAVARUNTIME_SOURCES = java/org/tartarus/snowball/Among.java \
java/org/tartarus/snowball/SnowballStemmer.java \
java/org/tartarus/snowball/TestApp.java

PYTHON_RUNTIME_SOURCES = python_runtime/dummy.py

JSX_RUNTIME_SOURCES = jsx/among.jsx \
jsx/base-stemmer.jsx \
jsx/stemmer.jsx

JSX_SAMPLE_SOURCES = jsx/testapp.jsx \
jsx/stemwords.jsx

Expand All @@ -60,6 +63,9 @@ PYTHON_RUNTIME_SOURCES = python/snowballstemmer/__init__.py \
PYTHON_SAMPLE_SOURCES = python/testapp.py \
python/stemwords.py

PYTHON_PACKAGE_FILES = python/MANIFEST.in \
python/setup.py

LIBSTEMMER_SOURCES = libstemmer/libstemmer.c
LIBSTEMMER_UTF8_SOURCES = libstemmer/libstemmer_utf8.c
LIBSTEMMER_HEADERS = include/libstemmer.h libstemmer/modules.h libstemmer/modules_utf8.h
Expand All @@ -81,8 +87,8 @@ C_LIB_HEADERS = $(libstemmer_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h) \
C_OTHER_SOURCES = $(other_algorithms:%=$(c_src_dir)/stem_UTF_8_%.c)
C_OTHER_HEADERS = $(other_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h)
JAVA_SOURCES = $(libstemmer_algorithms:%=$(java_src_dir)/%Stemmer.java)
PYTHON_SOURCES = $(libstemmer_algorithms:%=$(python_src_dir)/%_stemmer.py)
JSX_SOURCES = $(libstemmer_algorithms:%=$(jsx_src_dir)/%-stemmer.jsx)
PYTHON_SOURCES = $(libstemmer_algorithms:%=$(python_output_dir)/%_stemmer.py)
JSX_SOURCES = $(libstemmer_algorithms:%=$(jsx_output_dir)/%-stemmer.jsx)

COMPILER_OBJECTS=$(COMPILER_SOURCES:.c=.o)
RUNTIME_OBJECTS=$(RUNTIME_SOURCES:.c=.o)
Expand Down Expand Up @@ -185,17 +191,17 @@ $(java_src_dir)/%Stemmer.java: algorithms/%/stem_Unicode.sbl snowball
echo "./snowball $< -j -o $${o} -p \"org.tartarus.snowball.SnowballStemmer\" -eprefix $${l}_ -r ../runtime -n $${l}Stemmer"; \
./snowball $< -j -o $${o} -p "org.tartarus.snowball.SnowballStemmer" -eprefix $${l}_ -r ../runtime -n $${l}Stemmer

$(python_src_dir)/%_stemmer.py: algorithms/%/stem_Unicode.sbl snowball
@mkdir -p $(python_src_dir)
$(python_output_dir)/%_stemmer.py: algorithms/%/stem_Unicode.sbl snowball
@mkdir -p $(python_output_dir)
@l=`echo "$<" | sed 's!\(.*\)/stem_Unicode.sbl$$!\1!;s!^.*/!!'`; \
o="$(python_src_dir)/$${l}_stemmer"; \
o="$(python_output_dir)/$${l}_stemmer"; \
echo "./snowball $< -py -o $${o} -p \"SnowballStemmer\" -eprefix $${l}_ -r ../runtime -n `python -c "print('$${l}'.title())"`Stemmer"; \
./snowball $< -py -o $${o} -p "BaseStemmer" -eprefix $${l}_ -r ../runtime -n `python -c "print('$${l}'.title())"`Stemmer

$(jsx_src_dir)/%-stemmer.jsx: algorithms/%/stem_Unicode.sbl snowball
@mkdir -p $(jsx_src_dir)
$(jsx_output_dir)/%-stemmer.jsx: algorithms/%/stem_Unicode.sbl snowball
@mkdir -p $(jsx_output_dir)
@l=`echo "$<" | sed 's!\(.*\)/stem_Unicode.sbl$$!\1!;s!^.*/!!'`; \
o="$(jsx_src_dir)/$${l}-stemmer"; \
o="$(jsx_output_dir)/$${l}-stemmer"; \
echo "./snowball $< -jsx -o $${o} -p \"SnowballStemmer\" -eprefix $${l}_ -r ../runtime -n `python -c "print('$${l}'.title())"`Stemmer"; \
./snowball $< -jsx -o $${o} -p "BaseStemmer" -eprefix $${l}_ -r ../runtime -n `python -c "print('$${l}'.title())"`Stemmer

Expand Down Expand Up @@ -300,16 +306,17 @@ dist_libstemmer_python: $(PYTHON_SOURCES)
dest=dist/$${destname}; \
rm -rf $${dest} && \
rm -f $${dest}.tgz && \
echo "a1" && \
mkdir -p $${dest} && \
mkdir -p $${dest}/$(python_src_dir) && \
cp -a $(PYTHON_SOURCES) $${dest}/$(python_src_dir) && \
cp -a $(PYTHON_SAMPLE_SOURCES) $${dest}/$(python_sample_dir) && \
cp -a $(PYTHON_RUNTIME_SOURCES) $${dest}/$(python_src_dir) && \
(cd $${dest} && \
echo "README" >> MANIFEST && \
ls $(python_src_dir)/*.py >> MANIFEST && \
ls $(python_sample_dir)/*.py >> MANIFEST) && \
(cd dist && tar zcf $${destname}.tgz $${destname})
mkdir -p $${dest}/src/$(python_runtime_dir) && \
mkdir -p $${dest}/src/$(python_sample_dir) && \
cp doc/libstemmer_python_README.rst $${dest}/README.rst && \
cp doc/libstemmer_python_LICENSE.rst $${dest}/LICENSE.rst && \
cp -a $(PYTHON_SOURCES) $${dest}/src/$(python_runtime_dir) && \
cp -a $(PYTHON_SAMPLE_SOURCES) $${dest}/src/$(python_sample_dir) && \
cp -a $(PYTHON_RUNTIME_SOURCES) $${dest}/src/$(python_runtime_dir) && \
cp -a $(PYTHON_PACKAGE_FILES) $${dest} && \
(cd $${dest} && python setup.py sdist && cp dist/*.tar.gz ..)

dist_libstemmer_jsx: $(JSX_SOURCES)
destname=jsxstemmer; \
Expand All @@ -323,11 +330,11 @@ dist_libstemmer_jsx: $(JSX_SOURCES)
cp -a doc/libstemmer_jsx_LICENSE.rst $${dest}/LICENSE.rst && \
cp -a $(JSX_RUNTIME_SOURCES) $${dest}/$(jsx_src_dir) && \
cp -a $(JSX_SAMPLE_SOURCES) $${dest}/$(jsx_sample_dir) && \
cp -a $(JSX_SOURCES) $${dest}/$(jsx_src_dir) && \
cp -a $(JSX_SOURCES) $${dest}/$(jsx_runtime_dir) && \
(cd $${dest} && \
echo "README.rst" >> MANIFEST && \
echo "LICENSE.rst" >> MANIFEST && \
ls $(jsx_src_dir)/*.jsx >> MANIFEST && \
ls $(jsx_runtime_dir)/*.jsx >> MANIFEST && \
ls $(jsx_sample_dir)/*.jsx >> MANIFEST) && \
(cd dist && tar zcf $${destname}.tgz $${destname}) && \
rm -rf $${dest}
Expand Down
24 changes: 24 additions & 0 deletions snowball/doc/libstemmer_python_LICENSE.rst
@@ -0,0 +1,24 @@
License
-------

Copyright (c) 2013, Yoshiki Shibukawa

All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided
that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this list of conditions and
the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions
and the following disclaimer in the documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

94 changes: 94 additions & 0 deletions snowball/doc/libstemmer_python_README.rst
@@ -0,0 +1,94 @@
Snowball stemming library collection for Python
===============================================

This document pertains to the Python version of the stemmer library distribution,
available for download from:

* https://github.com/shibukawa/snowball_jsx/

Original program is maintained at following place:

* http://snowball.tartarus.org/

Original Snowball product is created by Dr Martin Porter and Richard Boulton (Java porting) under
BSD license.

How to use library
------------------

You can use each stemming modules from Python program.

.. code-block:: python
import snowballstemmer
stemmer = snowballstemmer.EnglishStemmer();
print(stemmer.stem("We are the world"));
Following modules are common modules. Don't forget bundle these modules to your program:

* ``snowballstemmer/__init__.py``
* ``snowballstemmer/among.py``
* ``snowballstemmer/basestemmer.jsx``

Following modules are optiona modules. Select your needed language modules:

* ``danish_stemmer.py``
* ``dutch_stemmer.py``
* ``english_stemmer.py``
* ``finnish_stemmer.py``
* ``french_stemmer.py``
* ``german_stemmer.py``
* ``hungarian_stemmer.py``
* ``italian_stemmer.py``
* ``norwegian_stemmer.py``
* ``porter_stemmer.py``
* ``portuguese_stemmer.py``
* ``romanian_stemmer.py``
* ``russian_stemmer.py``
* ``spanish_stemmer.py``
* ``swedish_stemmer.py``
* ``turkish_stemmer.py``

The TestApp example
-------------------

The :file:`testapp.jsx` example program allows you to run any of the stemmers
on a sample vocabulary.

Usage::

testapp.py <algorithm> "sentences ... "

.. code-block:: bash
$ python testapp.py English "sentences... "
License
-------

It is a BSD licensed library.

-----------------------------

Copyright (c) 2013, Yoshiki Shibukawa

All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided
that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this list of conditions and
the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions
and the following disclaimer in the documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

4 changes: 4 additions & 0 deletions snowball/python/MANIFEST.in
@@ -0,0 +1,4 @@
include *.rst
include setup.*
recursive-include src *.py
include MANIFEST.in
55 changes: 55 additions & 0 deletions snowball/python/setup.py
@@ -0,0 +1,55 @@
#!/usr/bin/env python

from distutils.core import setup

setup(name='snowballstemmer',
version='0.1.0',
description='This package provides 16 stemmer algorithms (15 + Poerter English stemmer) generated from Snowball algorithms.',
long_description='''
It includes following language algorithms:
* Danish
* Dutch
* English (Standard, Porter)
* Finnish
* French
* German
* Hungarian
* Italian
* Norwegian
* Portuguese
* Romanian
* Russian
* Spanish
* Swedish
* Turkish
''',
author='Yoshiki Shibukawa',
author_email='yoshiki at shibu.jp',
url='https://github.com/shibukawa/snowball_py',
keywords="stemmer",
license="BSD",
package_dir={"snowballstemmer": "src/snowballstemmer"},
classifiers = [
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'License :: OSI Approved :: BSD License',
'Programming Language :: Python',
'Natural Language :: Danish',
'Natural Language :: Dutch',
'Natural Language :: English',
'Natural Language :: Finnish',
'Natural Language :: French',
'Natural Language :: German',
'Natural Language :: Hungarian',
'Natural Language :: Italian',
'Natural Language :: Norwegian',
'Natural Language :: Portuguese',
'Natural Language :: Romanian',
'Natural Language :: Russian',
'Natural Language :: Spanish',
'Natural Language :: Swedish',
'Natural Language :: Turkish',
'Operating System :: OS Independent'
]
)
Empty file.
15 changes: 15 additions & 0 deletions snowball/python/snowballstemmer/among.py
@@ -0,0 +1,15 @@

class Among(object):
def __init__(self, s, substring_i, result, method=None):
"""
@ivar s_size search string size
@ivar s search string
@ivar substring index to longest matching substring
@ivar result of the lookup
@ivar method method to use if substring matches
"""
self.s_size = len(s)
self.s = s
self.substring_i = substring_i
self.result = result
self.method = method

0 comments on commit 8863276

Please sign in to comment.