Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add package support and support for launching via python -m scrapyrt #155

Open
wants to merge 1 commit into
base: master
Choose a base branch
from

Conversation

SamuelMarks
Copy link

@SamuelMarks SamuelMarks commented Nov 2, 2023

Also reflect Python 3.12 support; add some env vars default fallbacks for common CLI args; and fix a os.path.join to be properly cross-platform

As for the package CLI arg, you can include, say given the output of scrapy startproject tutorial that you run within your existing package hierarchy:

/tmp$ mkdir package_name && cd "$_"
/tmp/package_name$ touch setup.py
/tmp/package_name$ mkdir package_name && cd "$_"
/tmp/package_name/package_name$ touch __init__.py
/tmp/package_name/package_name$ scrapy startproject tutorial
/tmp/package_name$ touch package_name/tutorial/__init__.py
/tmp/package_name$ curl -L https://raw.githubusercontent.com/scrapinghub/sample-projects/master/quotes_crawler/quotes_crawler/spiders/toscrape-infinite-scrolling.py -o package_name/tutorial/tutorial/spiders/toscrape-infinite-scrolling.py
/tmp/package_name$ tree --charset=ascii
.
|-- package_name
|   |-- __init__.py
|   `-- tutorial
|       |-- __init__.py
|       |-- scrapy.cfg
|       `-- tutorial
|           |-- __init__.py
|           |-- items.py
|           |-- middlewares.py
|           |-- pipelines.py
|           |-- settings.py
|           `-- spiders
|               |-- __init__.py
|               `-- toscrape-infinite-scrolling.py
`-- setup.py

Then you can ensure scrapy.cfg gets installed when you python -m pip install . or python -m pip install -e . with this Python 3.12 compatible implementation:

import sys
from ast import Assign, Constant, Str, parse
from functools import partial
from operator import attrgetter
from os import path
from os.path import extsep

from setuptools import find_packages, setup

if sys.version_info[:2] >= (3, 12):
    import os
    from sysconfig import _BASE_EXEC_PREFIX as BASE_EXEC_PREFIX
    from sysconfig import _BASE_PREFIX as BASE_PREFIX
    from sysconfig import _EXEC_PREFIX as EXEC_PREFIX
    from sysconfig import _PREFIX as PREFIX
    from sysconfig import get_python_version

    def is_virtual_environment():
        """
        Whether one is in a virtual environment
        """
        return sys.base_prefix != sys.prefix or hasattr(sys, "real_prefix")

    def get_python_lib(plat_specific=0, standard_lib=0, prefix=None):
        """Return the directory containing the Python library (standard or
        site additions).

        If 'plat_specific' is true, return the directory containing
        platform-specific modules, i.e. any module from a non-pure-Python
        module distribution; otherwise, return the platform-shared library
        directory.  If 'standard_lib' is true, return the directory
        containing standard Python library modules; otherwise, return the
        directory for site-specific modules.

        If 'prefix' is supplied, use it instead of sys.base_prefix or
        sys.base_exec_prefix -- i.e., ignore 'plat_specific'.
        """
        is_default_prefix = not prefix or os.path.normpath(prefix) in (
            "/usr",
            "/usr/local",
        )
        if prefix is None:
            if standard_lib:
                prefix = plat_specific and BASE_EXEC_PREFIX or BASE_PREFIX
            else:
                prefix = plat_specific and EXEC_PREFIX or PREFIX

        if os.name == "posix":
            if plat_specific or standard_lib:
                # Platform-specific modules (any module from a non-pure-Python
                # module distribution) or standard Python library modules.
                libdir = sys.platlibdir
            else:
                # Pure Python
                libdir = "lib"
            libpython = os.path.join(prefix, libdir, "python" + get_python_version())
            if standard_lib:
                return libpython
            elif is_default_prefix and not is_virtual_environment():
                return os.path.join(prefix, "lib", "python3", "dist-packages")
            else:
                return os.path.join(libpython, "site-packages")
        elif os.name == "nt":
            if standard_lib:
                return os.path.join(prefix, "Lib")
            else:
                return os.path.join(prefix, "Lib", "site-packages")
        else:

            class DistutilsPlatformError(Exception):
                """DistutilsPlatformError"""

            raise DistutilsPlatformError(
                "I don't know where Python installs its library "
                "on platform '%s'" % os.name
            )

else:
    from distutils.sysconfig import get_python_lib

package_name = "package_name"


def to_funcs(*paths):
    """
    Produce function tuples that produce the local and install dir, respectively.

    :param paths: one or more str, referring to relative folder names
    :type paths: ```*paths```

    :return: 2 functions
    :rtype: ```Tuple[Callable[Optional[List[str]], str], Callable[Optional[List[str]], str]]```
    """
    return (
        partial(path.join, path.dirname(__file__), package_name, *paths),
        partial(path.join, get_python_lib(prefix=""), package_name, *paths),
    )


def main():
    """Main function for setup.py; this actually does the installation"""

    tutorial_join, tutorial_install_dir = to_funcs("tutorial")

    setup(
        name=package_name,
        packages=find_packages(),
        package_dir={package_name: package_name},
        classifiers=[],
        python_requires=">=3.8",
        entry_points={
            "scrapy.commands": [
                "scroll=package_name.package_name.tutorial.spiders.toscrape-infinite-scrolling:ToScrapeInfiniteScrollingSpider",
            ],
        },
        data_files=[
            (
                tutorial_install_dir(),
                list(
                    filter(
                        lambda p: path.isfile(p) and not p.endswith(".py"),
                        list(map(tutorial_join, os.listdir(tutorial_join()))),
                    )
                ),
            ),
        ],
    )


def setup_py_main():
    """Calls main if `__name__ == '__main__'`"""
    if __name__ == "__main__":
        main()


setup_py_main()

Which, as of this PR, would enable this to work:

$ scrapyrt --package package_name.tutorial

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant