Skip to content

Commit

Permalink
Merge pull request #510 from pypa/data-dir
Browse files Browse the repository at this point in the history
External data directory
  • Loading branch information
takluyver committed Feb 21, 2022
2 parents 9112376 + 08ac42d commit 4794dce
Show file tree
Hide file tree
Showing 14 changed files with 189 additions and 13 deletions.
37 changes: 37 additions & 0 deletions doc/pyproject_toml.rst
Expand Up @@ -425,5 +425,42 @@ Exclusions have priority over inclusions.
You'll have to adapt your inclusion/exclusion rules to achieve the same result
as you'd get with :ref:`build_cmd`.

External data section
---------------------

.. versionadded:: 3.7

Data files which your code will use should go inside the Python package folder.
Flit will package these with no special configuration.

However, sometimes it's useful to package external files for system integration,
such as man pages or files defining a Jupyter extension. To do this, arrange
the files within a directory such as ``data``, next to your ``pyproject.toml``
file, and add a section like this:

.. code-block:: toml
[tool.flit.external-data]
directory = "data"
Paths within this directory are typically installed to corresponding paths under
a prefix (such as a virtualenv directory). E.g. you might save a man page for a
script as ``(data)/share/man/man1/foo.1``.

Whether these files are detected by the systems they're meant to integrate with
depends on how your package is installed and how those systems are configured.
For instance, installing in a virtualenv usually doesn't affect anything outside
that environment. Don't rely on these files being picked up unless you have
close control of how the package will be installed.

If you install a package with ``flit install --symlink``, a symlink is made
for each file in the external data directory. Otherwise (including development
installs with ``pip install -e``), these files are copied to their destination,
so changes here won't take effect until you reinstall the package.

.. note::

For users coming from setuptools: external data corresponds to setuptools'
``data_files`` parameter, although setuptools offers more flexibility.

.. _environment marker: https://www.python.org/dev/peps/pep-0508/#environment-markers
13 changes: 13 additions & 0 deletions flit/install.py
Expand Up @@ -196,6 +196,17 @@ def install_scripts(self, script_defs, scripts_dir):

self.installed_files.append(cmd_file)

def install_data_dir(self, target_data_dir):
for src_path in common.walk_data_dir(self.ini_info.data_directory):
rel_path = os.path.relpath(src_path, self.ini_info.data_directory)
dst_path = os.path.join(target_data_dir, rel_path)
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
if self.symlink:
os.symlink(os.path.realpath(src_path), dst_path)
else:
shutil.copy2(src_path, dst_path)
self.installed_files.append(dst_path)

def _record_installed_directory(self, path):
for dirpath, dirnames, files in os.walk(path):
for f in files:
Expand Down Expand Up @@ -332,6 +343,8 @@ def install_directly(self):
scripts = self.ini_info.entrypoints.get('console_scripts', {})
self.install_scripts(scripts, dirs['scripts'])

self.install_data_dir(dirs['data'])

self.write_dist_info(dirs['purelib'])

def install_with_pip(self):
Expand Down
17 changes: 17 additions & 0 deletions flit_core/flit_core/common.py
Expand Up @@ -416,3 +416,20 @@ def normalize_dist_name(name: str, version: str) -> str:
def dist_info_name(distribution, version):
"""Get the correct name of the .dist-info folder"""
return normalize_dist_name(distribution, version) + '.dist-info'


def walk_data_dir(data_directory):
"""Iterate over the files in the given data directory.
Yields paths prefixed with data_directory - caller may want to make them
relative to that. Excludes any __pycache__ subdirectories.
"""
if data_directory is None:
return

for dirpath, dirs, files in os.walk(data_directory):
for file in sorted(files):
full_path = os.path.join(dirpath, file)
yield full_path

dirs[:] = [d for d in sorted(dirs) if d != '__pycache__']
26 changes: 24 additions & 2 deletions flit_core/flit_core/config.py
Expand Up @@ -120,7 +120,7 @@ def prep_toml_config(d, path):
)

unknown_sections = set(dtool) - {
'metadata', 'module', 'scripts', 'entrypoints', 'sdist'
'metadata', 'module', 'scripts', 'entrypoints', 'sdist', 'external-data'
}
unknown_sections = [s for s in unknown_sections if not s.lower().startswith('x-')]
if unknown_sections:
Expand All @@ -142,6 +142,27 @@ def prep_toml_config(d, path):
dtool['sdist'].get('exclude', []), 'exclude'
)

data_dir = dtool.get('external-data', {}).get('directory', None)
if data_dir is not None:
toml_key = "tool.flit.external-data.directory"
if not isinstance(data_dir, str):
raise ConfigError(f"{toml_key} must be a string")

normp = osp.normpath(data_dir)
if osp.isabs(normp):
raise ConfigError(f"{toml_key} cannot be an absolute path")
if normp.startswith('..' + os.sep):
raise ConfigError(
f"{toml_key} cannot point outside the directory containing pyproject.toml"
)
if normp == '.':
raise ConfigError(
f"{toml_key} cannot refer to the directory containing pyproject.toml"
)
loaded_cfg.data_directory = path.parent / data_dir
if not loaded_cfg.data_directory.is_dir():
raise ConfigError(f"{toml_key} must refer to a directory")

return loaded_cfg

def flatten_entrypoints(ep):
Expand Down Expand Up @@ -207,7 +228,7 @@ def _check_glob_patterns(pats, clude):
raise ConfigError(
'{} pattern {!r} is an absolute path'.format(clude, p)
)
if osp.normpath(p).startswith('..' + os.sep):
if normp.startswith('..' + os.sep):
raise ConfigError(
'{} pattern {!r} points out of the directory containing pyproject.toml'
.format(clude, p)
Expand All @@ -227,6 +248,7 @@ def __init__(self):
self.sdist_include_patterns = []
self.sdist_exclude_patterns = []
self.dynamic_metadata = []
self.data_directory = None

def add_scripts(self, scripts_dict):
if scripts_dict:
Expand Down
9 changes: 6 additions & 3 deletions flit_core/flit_core/sdist.py
Expand Up @@ -72,13 +72,14 @@ class SdistBuilder:
which is what should normally be published to PyPI.
"""
def __init__(self, module, metadata, cfgdir, reqs_by_extra, entrypoints,
extra_files, include_patterns=(), exclude_patterns=()):
extra_files, data_directory, include_patterns=(), exclude_patterns=()):
self.module = module
self.metadata = metadata
self.cfgdir = cfgdir
self.reqs_by_extra = reqs_by_extra
self.entrypoints = entrypoints
self.extra_files = extra_files
self.data_directory = data_directory
self.includes = FilePatterns(include_patterns, str(cfgdir))
self.excludes = FilePatterns(exclude_patterns, str(cfgdir))

Expand All @@ -93,8 +94,8 @@ def from_ini_path(cls, ini_path: Path):
extra_files = [ini_path.name] + ini_info.referenced_files
return cls(
module, metadata, srcdir, ini_info.reqs_by_extra,
ini_info.entrypoints, extra_files, ini_info.sdist_include_patterns,
ini_info.sdist_exclude_patterns,
ini_info.entrypoints, extra_files, ini_info.data_directory,
ini_info.sdist_include_patterns, ini_info.sdist_exclude_patterns,
)

def prep_entry_points(self):
Expand All @@ -115,6 +116,8 @@ def select_files(self):
cfgdir_s = str(self.cfgdir)
return [
osp.relpath(p, cfgdir_s) for p in self.module.iter_files()
] + [
osp.relpath(p, cfgdir_s) for p in common.walk_data_dir(self.data_directory)
] + self.extra_files

def apply_includes_excludes(self, files):
Expand Down
1 change: 1 addition & 0 deletions flit_core/flit_core/tests/samples/with_data_dir/LICENSE
@@ -0,0 +1 @@
This file should be added to wheels
1 change: 1 addition & 0 deletions flit_core/flit_core/tests/samples/with_data_dir/README.rst
@@ -0,0 +1 @@
This contains a nön-ascii character
@@ -0,0 +1 @@
Example data file
3 changes: 3 additions & 0 deletions flit_core/flit_core/tests/samples/with_data_dir/module1.py
@@ -0,0 +1,3 @@
"""Example module"""

__version__ = '0.1'
26 changes: 26 additions & 0 deletions flit_core/flit_core/tests/samples/with_data_dir/pyproject.toml
@@ -0,0 +1,26 @@
[build-system]
requires = ["flit_core >=3.2,<4"]
build-backend = "flit_core.buildapi"

[project]
name = "module1"
authors = [
{name = "Sir Röbin", email = "robin@camelot.uk"}
]
readme = "README.rst"
license = {file = "LICENSE"}
requires-python = ">=3.7"
dependencies = [
"requests >= 2.18",
"docutils",
]
dynamic = [
"version",
"description",
]

[project.scripts]
foo = "module1:main"

[tool.flit.external-data]
directory = "data"
9 changes: 9 additions & 0 deletions flit_core/flit_core/tests/test_sdist.py
Expand Up @@ -49,3 +49,12 @@ def test_include_exclude():
assert osp.join('doc', 'test.rst') in files
assert osp.join('doc', 'test.txt') not in files
assert osp.join('doc', 'subdir', 'test.txt') in files


def test_data_dir():
builder = sdist.SdistBuilder.from_ini_path(
samples_dir / 'with_data_dir' / 'pyproject.toml'
)
files = builder.apply_includes_excludes(builder.select_files())

assert osp.join('data', 'share', 'man', 'man1', 'foo.1') in files
7 changes: 7 additions & 0 deletions flit_core/flit_core/tests/test_wheel.py
Expand Up @@ -29,3 +29,10 @@ def test_zero_timestamp(tmp_path, monkeypatch):
# Minimum value for zip timestamps is 1980-1-1
with ZipFile(info.file, 'r') as zf:
assert zf.getinfo('module1a.py').date_time == (1980, 1, 1, 0, 0, 0)


def test_data_dir(tmp_path):
info = make_wheel_in(samples_dir / 'with_data_dir' / 'pyproject.toml', tmp_path)
assert_isfile(info.file)
with ZipFile(info.file, 'r') as zf:
assert 'module1-0.1.data/data/share/man/man1/foo.1' in zf.namelist()
19 changes: 16 additions & 3 deletions flit_core/flit_core/wheel.py
Expand Up @@ -57,13 +57,16 @@ def zip_timestamp_from_env() -> Optional[tuple]:


class WheelBuilder:
def __init__(self, directory, module, metadata, entrypoints, target_fp):
def __init__(
self, directory, module, metadata, entrypoints, target_fp, data_directory
):
"""Build a wheel from a module/package
"""
self.directory = directory
self.module = module
self.metadata = metadata
self.entrypoints = entrypoints
self.data_directory = data_directory

self.records = []
self.source_time_stamp = zip_timestamp_from_env()
Expand All @@ -74,14 +77,15 @@ def __init__(self, directory, module, metadata, entrypoints, target_fp):

@classmethod
def from_ini_path(cls, ini_path, target_fp):
# Local import so bootstrapping doesn't try to load toml
from .config import read_flit_config
directory = ini_path.parent
ini_info = read_flit_config(ini_path)
entrypoints = ini_info.entrypoints
module = common.Module(ini_info.module, directory)
metadata = common.make_metadata(module, ini_info)
return cls(directory, module, metadata, entrypoints, target_fp)
return cls(
directory, module, metadata, entrypoints, target_fp, ini_info.data_directory
)

@property
def dist_info(self):
Expand Down Expand Up @@ -160,6 +164,14 @@ def add_pth(self):
with self._write_to_zip(self.module.name + ".pth") as f:
f.write(str(self.module.source_dir.resolve()))

def add_data_directory(self):
dir_in_whl = '{}.data/data/'.format(
common.normalize_dist_name(self.metadata.name, self.metadata.version)
)
for full_path in common.walk_data_dir(self.data_directory):
rel_path = os.path.relpath(full_path, self.data_directory)
self._add_file(full_path, dir_in_whl + rel_path)

def write_metadata(self):
log.info('Writing metadata files')

Expand Down Expand Up @@ -193,6 +205,7 @@ def build(self, editable=False):
self.add_pth()
else:
self.copy_module()
self.add_data_directory()
self.write_metadata()
self.write_record()
finally:
Expand Down
33 changes: 28 additions & 5 deletions tests/test_install.py
Expand Up @@ -21,11 +21,13 @@
class InstallTests(TestCase):
def setUp(self):
td = tempfile.TemporaryDirectory()
scripts_dir = os.path.join(td.name, 'scripts')
purelib_dir = os.path.join(td.name, 'site-packages')
self.addCleanup(td.cleanup)
self.get_dirs_patch = patch('flit.install.get_dirs',
return_value={'scripts': scripts_dir, 'purelib': purelib_dir})
return_value={
'scripts': os.path.join(td.name, 'scripts'),
'purelib': os.path.join(td.name, 'site-packages'),
'data': os.path.join(td.name, 'data'),
})
self.get_dirs_patch.start()
self.tmpdir = pathlib.Path(td.name)

Expand Down Expand Up @@ -246,11 +248,13 @@ def test_symlink_other_python(self):
# Called by Installer._get_dirs() :
script2 = ("#!{python}\n"
"import json, sys\n"
"json.dump({{'purelib': {purelib!r}, 'scripts': {scripts!r} }}, "
"json.dump({{'purelib': {purelib!r}, 'scripts': {scripts!r}, 'data': {data!r} }}, "
"sys.stdout)"
).format(python=sys.executable,
purelib=str(self.tmpdir / 'site-packages2'),
scripts=str(self.tmpdir / 'scripts2'))
scripts=str(self.tmpdir / 'scripts2'),
data=str(self.tmpdir / 'data'),
)

with MockCommand('mock_python', content=script1):
ins = Installer.from_ini_path(samples_dir / 'package1' / 'pyproject.toml', python='mock_python',
Expand Down Expand Up @@ -288,6 +292,25 @@ def test_extras_error(self):
Installer.from_ini_path(samples_dir / 'requires-requests.toml',
user=False, deps='none', extras='dev')

def test_install_data_dir(self):
Installer.from_ini_path(
core_samples_dir / 'with_data_dir' / 'pyproject.toml',
).install_directly()
assert_isfile(self.tmpdir / 'site-packages' / 'module1.py')
assert_isfile(self.tmpdir / 'data' / 'share' / 'man' / 'man1' / 'foo.1')

def test_symlink_data_dir(self):
if os.name == 'nt':
raise SkipTest("symlink")
Installer.from_ini_path(
core_samples_dir / 'with_data_dir' / 'pyproject.toml', symlink=True
).install_directly()
assert_isfile(self.tmpdir / 'site-packages' / 'module1.py')
assert_islink(
self.tmpdir / 'data' / 'share' / 'man' / 'man1' / 'foo.1',
to=core_samples_dir / 'with_data_dir' / 'data' / 'share' / 'man' / 'man1' / 'foo.1'
)

@pytest.mark.parametrize(('deps', 'extras', 'installed'), [
('none', [], set()),
('develop', [], {'pytest ;', 'toml ;'}),
Expand Down

0 comments on commit 4794dce

Please sign in to comment.