diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index f657b6c..65c02c5 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -1,6 +1,3 @@ -# This workflows will upload a Python Package using Twine when a release is created -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries - name: Upload Python Package on: @@ -10,7 +7,6 @@ on: jobs: deploy: runs-on: ubuntu-latest - steps: - uses: actions/checkout@v2 - name: Set up Python @@ -20,11 +16,12 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install setuptools wheel twine - - name: Build and publish - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + pip install setuptools wheel twine build + - name: Build Dists run: | - python setup.py sdist bdist_wheel - twine upload dist/* + python -m build + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.github/workflows/pypi-test-publish.yml b/.github/workflows/pypi-test-publish.yml new file mode 100644 index 0000000..05b16a5 --- /dev/null +++ b/.github/workflows/pypi-test-publish.yml @@ -0,0 +1,29 @@ +name: Test Upload Python Package + +on: + push: + tags: + - "v*" + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: "3.x" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine build + - name: Build Dists + run: | + python -m build + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.TEST_PYPI_API_TOKEN }} + repository_url: https://test.pypi.org/legacy/ diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index b72876b..7b15abc 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -6,12 +6,23 @@ name: Python CI on: [push, pull_request] jobs: - build: + lint: + runs-on: ubuntu-latest + steps: + - name: Check Out + uses: actions/checkout@v2 + - name: Black + uses: psf/black@stable + - name: Pylint + uses: cclauss/GitHub-Action-for-pylint@0.7.0 + with: + args: pip install .; pylint src/**/*.py + + test: runs-on: ubuntu-latest strategy: matrix: python-version: [2.7, 3.6, 3.7, 3.8, 3.9] - steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} @@ -24,14 +35,25 @@ jobs: - name: Install module run: | pip install -e . - - name: Lint with flake8 - run: | - pip install flake8 - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | pip install pytest pytest + + check_build: + runs-on: ubuntu-latest + steps: + - name: Check Out + uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: "3.x" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine build + - name: Build + run: python -m build + - name: Check + run: twine check dist/* diff --git a/.markdownlint.yaml b/.markdownlint.yaml new file mode 100644 index 0000000..ad9cabf --- /dev/null +++ b/.markdownlint.yaml @@ -0,0 +1,2 @@ +MD034: false # no-bare-urls: Bare URL used +MD024: false # no-duplicate-heading/no-duplicate-header: Multiple headings with the same content diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..05a170c --- /dev/null +++ b/.prettierrc @@ -0,0 +1,4 @@ +{ + "proseWrap": "always", + "trailingComma": "all" +} diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..509f95e --- /dev/null +++ b/.pylintrc @@ -0,0 +1,596 @@ +[MASTER] + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-whitelist= + +# Specify a score threshold to be exceeded before program exits with error. +fail-under=10 + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=CVS + +# Add files or directories matching the regex patterns to the blacklist. The +# regex matches against base names, not paths. +ignore-patterns= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. +confidence= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=print-statement, + parameter-unpacking, + unpacking-in-except, + old-raise-syntax, + backtick, + long-suffix, + old-ne-operator, + old-octal-literal, + import-star-module-level, + non-ascii-bytes-literal, + raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + apply-builtin, + basestring-builtin, + buffer-builtin, + cmp-builtin, + coerce-builtin, + execfile-builtin, + file-builtin, + long-builtin, + raw_input-builtin, + reduce-builtin, + standarderror-builtin, + unicode-builtin, + xrange-builtin, + coerce-method, + delslice-method, + getslice-method, + setslice-method, + no-absolute-import, + old-division, + dict-iter-method, + dict-view-method, + next-method-called, + metaclass-assignment, + indexing-exception, + raising-string, + reload-builtin, + oct-method, + hex-method, + nonzero-method, + cmp-method, + input-builtin, + round-builtin, + intern-builtin, + unichr-builtin, + map-builtin-not-iterating, + zip-builtin-not-iterating, + range-builtin-not-iterating, + filter-builtin-not-iterating, + using-cmp-argument, + eq-without-hash, + div-method, + idiv-method, + rdiv-method, + exception-message-attribute, + invalid-str-codec, + sys-max-int, + bad-python3-import, + deprecated-string-function, + deprecated-str-translate-call, + deprecated-itertools-function, + deprecated-types-field, + next-method-defined, + dict-items-not-iterating, + dict-keys-not-iterating, + dict-values-not-iterating, + deprecated-operator-function, + deprecated-urllib-function, + xreadlines-attribute, + deprecated-sys-function, + exception-escape, + comprehension-escape, + + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'error', 'warning', 'refactor', and 'convention' +# which contain the number of messages in each category, as well as 'statement' +# which is the total number of statements analyzed. This score is used by the +# global evaluation report (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +#msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it work, +# install the python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + +# Regular expression of note tags to take in consideration. +#notes-rgx= + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=100 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# List of optional constructs for which whitespace checking is disabled. `dict- +# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. +# `trailing-comma` allows a space between comma and closing bracket: (a, ). +# `empty-line` allows space-only lines. +no-space-check=trailing-comma, + dict-separator + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[SIMILARITIES] + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. +#class-attribute-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + _ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. +#variable-rgx= + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules=optparse,tkinter.tix + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled). +ext-import-graph= + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled). +import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=cls + + +[DESIGN] + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "BaseException, Exception". +overgeneral-exceptions=BaseException, + Exception diff --git a/MANIFEST.in b/MANIFEST.in index 8ab6370..42f7cc6 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,7 @@ include MANIFEST.in -include README.rst +include README.md include LICENSE -graft pointofview +graft src graft tests global-exclude .git global-exclude __pycache__ diff --git a/README.md b/README.md new file mode 100644 index 0000000..0f9d78e --- /dev/null +++ b/README.md @@ -0,0 +1,66 @@ +# pointofview + +[![Latest PyPI version](https://img.shields.io/pypi/v/pointofview.svg)](https://pypi.python.org/pypi/pointofview) +[![GitHub Workflow Status](https://github.com/prosegrinder/python-pointofview/workflows/Python%20CI/badge.svg?branch=main)](https://github.com/prosegrinder/python-pointofview/actions?query=workflow%3A%22Python+CI%22+branch%3Amain) + +A Python package for determining a piece of text's point of view (first, second, +third, or unknown). + +## Installation + +`pointofview` is available on PyPI. Simply install it with `pip`: + +```bash +pip install pointofview +``` + +You can also install it from source: + +```bash +$ git clone https://github.com/prosegrinder/python-pointofview.git +Cloning into 'python-pointofview'... +... + +$ cd python-pointofview +$ python setup.py install +... +``` + +## Usage + +`pointofview` guesses a text's point of view by counting point of view pronouns. +The main function `get_text_pov()` will return 'first', 'second', 'third', or +null (Python's `None` object): + +```python +>>> import pointofview +>>> text = "I'm a piece of text written in first person! What are you?" +>>> pointofview.get_text_pov(text) +'first' +``` + +There are two other helper functions as well. + +`get_word_pov()` returns the point of view of a single word: + +```python +>>> pointofview.get_word_pov("I") +'first' +>>> pointofview.get_word_pov("nope") +None +``` + +`parse_pov_words()` returns a dict containing all first-, second-, and +third-person point-of-view words: + + + +```python +>>> text = """ +... When I try to analyze my own cravings, motives, actions and so forth, I surrender to a sort of retrospective imagination which feeds the analytic faculty with boundless alternatives and which causes each visualized route to fork and re-fork without end in the maddeningly complex prospect of my past. +... """ +>>> pointofview.parse_pov_words(text) +{'first': ['i', 'i'], 'second': [], 'third': []} +``` + + diff --git a/README.rst b/README.rst deleted file mode 100644 index 784066d..0000000 --- a/README.rst +++ /dev/null @@ -1,65 +0,0 @@ -pointofview -=========== - -.. image:: https://img.shields.io/pypi/v/pointofview.svg - :target: https://pypi.python.org/pypi/pointofview - :alt: Latest PyPI version - -.. image:: https://github.com/prosegrinder/python-pointofview/workflows/Python%20CI/badge.svg?branch=main - :target: https://github.com/prosegrinder/python-pointofview/actions?query=workflow%3A%22Python+CI%22+branch%3Amain - :alt: GitHub Workflow Status - -.. image:: https://api.codacy.com/project/badge/Grade/df0afcc70ffc4a86a8777588567820c0 - :target: https://www.codacy.com/app/ProseGrinder/python-pointofview?utm_source=github.com&utm_medium=referral&utm_content=prosegrinder/python-pointofview&utm_campaign=Badge_Grade - :alt: Latest Codacy Coverage Report - -A Python package for determining a piece of text's point of view (first, second, third, or unknown). - -Installation ------------- - -``pointofview`` is available on PyPI. Simply install it with ``pip``:: - - $ pip install pointofview - -You can also install it from source:: - - $ git clone https://github.com/prosegrinder/python-pointofview.git - Cloning into 'python-pointofview'... - ... - - $ cd python-pointofview - $ python setup.py install - ... - -Usage ------ - -``pointofview`` guesses a text's point of view by counting point of view pronouns. The main function ``get_text_pov()`` will return 'first', 'second', 'third', or null (Python's ``None`` object):: - - >>> import pointofview - >>> text = "I'm a piece of text written in first person! What are you?" - >>> pointofview.get_text_pov(text) - 'first' - -There are two other helper functions as well. - -``get_word_pov()`` returns the point of view of a single word:: - - >>> pointofview.get_word_pov("I") - 'first' - >>> pointofview.get_word_pov("nope") - None - -``parse_pov_words`` returns a dict containing all first, second, and third person pov words:: - - >>> text = """ - ... When I try to analyze my own cravings, motives, actions and so forth, I surrender to a sort of retrospective imagination which feeds the analytic faculty with boundless alternatives and which causes each visualized route to fork and re-fork without end in the maddeningly complex prospect of my past. - ... """ - >>> pointofview.parse_pov_words(text) - {'first': ['i', 'i'], 'second': [], 'third': []} - -Authors -------- - -`pointofview` was written by `David L. Day `_. diff --git a/pointofview/VERSION b/pointofview/VERSION deleted file mode 100644 index 3eefcb9..0000000 --- a/pointofview/VERSION +++ /dev/null @@ -1 +0,0 @@ -1.0.0 diff --git a/pointofview/__init__.py b/pointofview/__init__.py deleted file mode 100644 index b3f6447..0000000 --- a/pointofview/__init__.py +++ /dev/null @@ -1,68 +0,0 @@ -# -*- coding: utf-8 -*- - -"""pointofview - A Python package for determining a piece of text's point of view (first, second, third, or unknown).""" - -import re -from collections import OrderedDict - -import pkg_resources - -__version__ = pkg_resources.resource_string( - 'pointofview', 'VERSION').decode('utf-8').strip() - -# Constants for use in comparisons -FIRST = 'first' -SECOND = 'second' -THIRD = 'third' -NONE = None - -# NOTE: -# Words are expected to be in lower case. -# -# Point of view is in order of precedence. -# First person PoV can also contain second and third person words. -# Second person PoV can also contain third person words. -# Third person PoV can only contain third person words. -POV_WORDS = OrderedDict([ - (FIRST, - ["i", "i'm", "i'll", "i'd", "i've", "me", "mine", "myself", "we", - "we're", "we'll", "we'd", "we've", "us", "ours", "ourselves"]), - (SECOND, - ["you", "you're", "you'll", "you'd", "you've", - "your", "yours", "yourself", "yourselves"]), - (THIRD, - ["he", "he's", "he'll", "he'd", "him", "his", "himself", "she", "she's", - "she'll", "she'd", "her", "hers", "herself", "it", "it's", "it'll", - "it'd", "itself", "they", "they're", "they'll", "they'd", "they've", - "them", "their", "theirs", "themselves"]) -]) - -RE_WORDS = re.compile(r"[^\w’']+") - - -def get_word_pov(word, pov_words=POV_WORDS): - for pov in pov_words: - if word.lower().replace("’", "'") in ( - pov_word.lower() for pov_word in pov_words[pov]): - return pov - return None - - -def parse_pov_words(text, pov_words=POV_WORDS): - text_pov_words = {} - words = re.split(RE_WORDS, text.strip().lower()) - for pov in pov_words: - text_pov_words[pov] = [] - for word in words: - word_pov = get_word_pov(word, pov_words) - if word_pov != None: - text_pov_words[word_pov].append(word) - return text_pov_words - - -def get_text_pov(text, pov_words=POV_WORDS): - text_pov_words = parse_pov_words(text, pov_words) - for pov in POV_WORDS: - if len(text_pov_words[pov]) > 0: - return pov - return None diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..374b58c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = [ + "setuptools>=42", + "wheel" +] +build-backend = "setuptools.build_meta" diff --git a/setup.cfg b/setup.cfg index ed8a958..aa99afe 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,45 @@ -[bdist_wheel] -universal = 1 - [metadata] +name = pointofview +version = file: src/pointofview/VERSION +author = David L. Day +author_email = dday376@gmail.com +description = "A Python package for determining a piece of text's point of view (first, second, third, or unknown)." +long_description = file: README.md +long_description_content_type = text/markdown license_file = LICENSE +url = https://github.com/prosegrinder/python-pointofview +project_urls = + Bug Tracker = https://github.com/prosegrinder/python-pointofview/issues +classifiers = + Intended Audience :: Developers + License :: OSI Approved :: GNU General Public License v3 (GPLv3) + Natural Language :: English + Programming Language :: Python :: 2 + Programming Language :: Python :: 2.7 + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + +[options] +python_requires = >= 2.7 +package_dir = + = src +packages = find: +include_package_data = True +setup_requires = + setuptools +build_requires = + build +tests_requires = + pytest + +[options.packages.find] +where = src + +[bdist_wheel] +universal = true + +[sdist] +formats = zip, gztar diff --git a/setup.py b/setup.py index 0dc5162..6068493 100644 --- a/setup.py +++ b/setup.py @@ -1,48 +1,3 @@ -# -*- coding: utf-8 -*- - -from os import path - from setuptools import setup -# Version -with open(path.join(path.dirname(__file__), 'pointofview', 'VERSION')) as version_file: - VERSION = version_file.read().strip() -# Long Description -with open(path.join(path.dirname(__file__), 'README.rst')) as readme_file: - LONG_DESCRIPTION = readme_file.read() - - -setup( - name="pointofview", - version=VERSION, - url="https://github.com/prosegrinder/python-pointofview", - - author="David L. Day", - author_email="dday376@gmail.com", - - description="A Python package for determining a piece of text's point of view (first, second, third, or unknown).", - long_description=LONG_DESCRIPTION, - - packages=[ - 'pointofview' - ], - package_dir={'pointofview': 'pointofview'}, - package_data={ - '': ['LICENSE', '*.rst', 'MANIFEST.in'], - }, - include_package_data=True, - - classifiers=[ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', - 'Natural Language :: English', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - ], -) +setup() diff --git a/src/pointofview/VERSION b/src/pointofview/VERSION new file mode 100644 index 0000000..7dea76e --- /dev/null +++ b/src/pointofview/VERSION @@ -0,0 +1 @@ +1.0.1 diff --git a/src/pointofview/__init__.py b/src/pointofview/__init__.py new file mode 100644 index 0000000..bdc2bc7 --- /dev/null +++ b/src/pointofview/__init__.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- + +"""pointofview + +A Python package for determining a piece of text's +point of view (first, second, third, or unknown). +""" + +import re +from collections import OrderedDict + +import pkg_resources + +__version__ = ( + pkg_resources.resource_string("pointofview", "VERSION").decode("utf-8").strip() +) + +# Constants for use in comparisons +FIRST = "first" +SECOND = "second" +THIRD = "third" +NONE = None + +# NOTE: +# Words are expected to be in lower case. +# +# Point of view is in order of precedence. +# First person PoV can also contain second and third person words. +# Second person PoV can also contain third person words. +# Third person PoV can only contain third person words. +POV_WORDS = OrderedDict( + [ + ( + FIRST, + [ + "i", + "i'm", + "i'll", + "i'd", + "i've", + "me", + "mine", + "myself", + "we", + "we're", + "we'll", + "we'd", + "we've", + "us", + "ours", + "ourselves", + ], + ), + ( + SECOND, + [ + "you", + "you're", + "you'll", + "you'd", + "you've", + "your", + "yours", + "yourself", + "yourselves", + ], + ), + ( + THIRD, + [ + "he", + "he's", + "he'll", + "he'd", + "him", + "his", + "himself", + "she", + "she's", + "she'll", + "she'd", + "her", + "hers", + "herself", + "it", + "it's", + "it'll", + "it'd", + "itself", + "they", + "they're", + "they'll", + "they'd", + "they've", + "them", + "their", + "theirs", + "themselves", + ], + ), + ] +) + +RE_WORDS = re.compile(r"[^\w’']+") + + +def get_word_pov(word, pov_words=None): + """Get the point-of-view indicated by the word + + Parameters: + ---------- + word : str + The English-langauge word to find the point-of-view for + + Returns: + ------- + str + the point-of-view indicated by the word (first, second, third) + returns None if no point-of-view indicated + """ + if pov_words is None: + pov_words = POV_WORDS + for pov in pov_words: + if word.lower().replace("’", "'") in ( + pov_word.lower() for pov_word in pov_words[pov] + ): + return pov + return None + + +def parse_pov_words(text, pov_words=None): + """Parse out all the point-of-view indicator words in text + + Parameters: + ---------- + text : str + a block of english languaget text + + Returns: + ------- + list[str] + a list of point-of-view indicator words + """ + if pov_words is None: + pov_words = POV_WORDS + text_pov_words = {} + words = re.split(RE_WORDS, text.strip().lower()) + for pov in pov_words: + text_pov_words[pov] = [] + for word in words: + word_pov = get_word_pov(word, pov_words) + if word_pov is not None: + text_pov_words[word_pov].append(word) + return text_pov_words + + +def get_text_pov(text, pov_words=None): + """Get the point-of-view of a piece of text + + Parameters: + ---------- + text : str + a block of english languaget text + + Returns: + ------- + str + the point-of-view of the text (first, second, third) + returns None if no point-of-view words found + """ + text_pov_words = parse_pov_words(text, pov_words) + for pov in POV_WORDS: + if len(text_pov_words[pov]) > 0: + return pov + return None diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_pointofview.py b/tests/test_pointofview.py index 98d2b41..30ceb10 100644 --- a/tests/test_pointofview.py +++ b/tests/test_pointofview.py @@ -2,44 +2,48 @@ import pointofview -POV_FIRST_SINGULAR=""" +POV_FIRST_SINGULAR = """ When I try to analyze my own cravings, motives, actions and so forth, I surrender to a sort of retrospective imagination which feeds the analytic faculty with boundless alternatives and which causes each visualized route to fork and re-fork without end in the maddeningly complex prospect of my past. """ # Lolita by Vladimir Nabakov -POV_FIRST_PLURAL=""" +POV_FIRST_PLURAL = """ Most of us on the boat were accomplished, and were sure we would make good wives. We knew how to cook and sew. We knew how to serve tea and arrange flowers and sit quietly on our flat wide feet for hours, saying absolutely nothing of substance at all. """ # The Buddha in the Attic by Julie Otsuka -POV_SECOND=""" +POV_SECOND = """ You are not the kind of guy who would be at a place like this at this time of the morning. But here you are, and you cannot say that the terrain is entirely unfamiliar, although the details are fuzzy. """ # Bright Lights, Big City by Jay Mclnemey -POV_SECOND=""" +POV_SECOND = """ While standing in his parents kitchen, you tell your boyfriend you’re leaving. You’re not going to college. You’re not buying into the schedules, the credits, or the points. No standardized success for you. """ -POV_THIRD=""" +POV_THIRD = """ The family of Dashwood had long been settled in Sussex. Their estate was large, and their residence was at Norland Park, in the centre of their property, where, for many generations, they had lived in so respectable a manner as to engage the general good opinion of their surrounding acquaintance. """ # Sense and Sensibility by Jane Austen -POV_NONE=""" +POV_NONE = """ Due to pytest‘s detailed assertion introspection, only plain assert statements are used. See Getting Started for more examples. """ # https://docs.pytest.org/en/latest/index.html + def test_first(): - assert(pointofview.get_text_pov(POV_FIRST_SINGULAR) == pointofview.FIRST) # nosec - assert(pointofview.get_text_pov(POV_FIRST_PLURAL) == pointofview.FIRST) # nosec + assert pointofview.get_text_pov(POV_FIRST_SINGULAR) == pointofview.FIRST # nosec + assert pointofview.get_text_pov(POV_FIRST_PLURAL) == pointofview.FIRST # nosec + def test_second(): - assert(pointofview.get_text_pov(POV_SECOND) == pointofview.SECOND) # nosec + assert pointofview.get_text_pov(POV_SECOND) == pointofview.SECOND # nosec + def test_third(): - assert(pointofview.get_text_pov(POV_THIRD) == pointofview.THIRD) # nosec + assert pointofview.get_text_pov(POV_THIRD) == pointofview.THIRD # nosec + def test_none(): - assert(pointofview.get_text_pov(POV_NONE) == pointofview.NONE) # nosec + assert pointofview.get_text_pov(POV_NONE) == pointofview.NONE # nosec