Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Git file install: support for file creation from Git repositories #2754

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ jobs:
__HERE__
cat "$HOME/.bashrc"

- name: Configure git # Configure Git for Git dependent tests.
uses: cylc/release-actions/configure-git@v1

- name: Apt-Get Install
if: startsWith(matrix.os, 'ubuntu')
run: |
Expand Down
7 changes: 4 additions & 3 deletions metomi/rose/config_processors/fileinstall.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def _process(self, conf_tree, nodes, loc_dao, **kwargs):
source.scheme = scheme
break
self.loc_handlers_manager.parse(source, conf_tree)
except ValueError:
except ValueError as exc:
if source.is_optional:
sources.pop(source.name)
for name in source.used_by_names:
Expand All @@ -220,6 +220,7 @@ def _process(self, conf_tree, nodes, loc_dao, **kwargs):
raise ConfigProcessError(
["file:" + source.used_by_names[0], "source"],
source.name,
exc
)
prev_source = loc_dao.select(source.name)
source.is_out_of_date = (
Expand Down Expand Up @@ -856,7 +857,7 @@ def parse(self, loc, conf_tree):
# Scheme specified in the configuration.
handler = self.get_handler(loc.scheme)
if handler is None:
raise ValueError(loc.name)
raise ValueError(f"don't support scheme {loc.scheme}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[minor] maybe a slightly cryptic error message for users to digest:

Suggested change
raise ValueError(f"don't support scheme {loc.scheme}")
raise ValueError(f"Rose doesn't support scheme {loc.scheme}")

else:
# Scheme not specified in the configuration.
scheme = urlparse(loc.name).scheme
Expand All @@ -865,7 +866,7 @@ def parse(self, loc, conf_tree):
if handler is None:
handler = self.guess_handler(loc)
if handler is None:
raise ValueError(loc.name)
raise ValueError(f"don't know how to process {loc.name}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[minor] maybe a slightly cryptic error message for users to digest:

Suggested change
raise ValueError(f"don't know how to process {loc.name}")
raise ValueError(f"Rose doesn't know how to process {loc.name}")

else:
handler = self.get_handler(self.DEFAULT_SCHEME)
return handler.parse(loc, conf_tree)
Expand Down
2 changes: 1 addition & 1 deletion metomi/rose/loc_handlers/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def parse(cls, loc, _):
loc.scheme = "fs"
name = os.path.expanduser(loc.name)
if not os.path.exists(name):
raise ValueError(loc.name)
raise ValueError(f"path does not exist or not accessible: {name}")
paths_and_checksums = get_checksum(name)
for path, checksum, access_mode in paths_and_checksums:
loc.add_path(path, checksum, access_mode)
Expand Down
181 changes: 181 additions & 0 deletions metomi/rose/loc_handlers/git.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
# Copyright (C) British Crown (Met Office) & Contributors.
# This file is part of Rose, a framework for meteorological suites.
#
# Rose is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Rose is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Rose. If not, see <http://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------
"""A handler of Git locations."""

import os
import re
import tempfile
from urllib.parse import urlparse


REC_COMMIT_HASH = re.compile(r"^[0-9a-f]+$")


class GitLocHandler:
"""Handler of Git locations."""

GIT = "git"
SCHEMES = [GIT]
WEB_SCHEMES = ["https"]
URI_SEPARATOR = "::"

def __init__(self, manager):
self.manager = manager
# Determine (just once) what git version we have, if any.
ret_code, versiontext, stderr = self.manager.popen.run(
"git", "version")
if ret_code:
# Git not installed.
self.git_version = None
else:
# Git is installed, get the version.
version_nums = []
for num_string in versiontext.split()[-1].split("."):
try:
version_nums.append(int(num_string))
except ValueError:
break
self.git_version = tuple(version_nums)

def can_pull(self, loc):
"""Determine if this is a suitable handler for loc."""
if self.git_version is None:
return False
scheme = urlparse(loc.name).scheme
if scheme in self.SCHEMES:
return True
if self.URI_SEPARATOR not in loc.name:
return False
remote = self._parse_name(loc)[0]
return (
scheme in self.WEB_SCHEMES
MetRonnie marked this conversation as resolved.
Show resolved Hide resolved
and not os.path.exists(loc.name) # same as svn...
and not self.manager.popen.run(
"git", "ls-remote", "--exit-code", remote)[0]
# https://superuser.com/questions/227509/git-ping-check-if-remote-repository-exists
)

def parse(self, loc, conf_tree):
oliver-sanders marked this conversation as resolved.
Show resolved Hide resolved
"""Set loc.real_name, loc.scheme, loc.loc_type.

Within Git we have a lot of trouble figuring out remote
loc_type - a clone is required, unfortunately. There is a
tradeoff between extra Git commands and bandwidth vs
parse failure behaviour. We have decided to short cut the
loc_type calculation to save commands and bandwidth,
catching failures later.

"""
loc.scheme = self.SCHEMES[0]
remote, path, ref = self._parse_name(loc)

# Extract the commit hash if we don't already have it.
commithash = self._get_commithash(remote, ref)

if path.endswith("/"): # This is a short cut, checked later.
loc.loc_type = loc.TYPE_TREE
else:
loc.loc_type = loc.TYPE_BLOB
oliver-sanders marked this conversation as resolved.
Show resolved Hide resolved
loc.real_name = (
f"remote:{remote} ref:{ref} commit:{commithash} path:{path}"
)
loc.key = commithash # We'll notice branch/tag updates.

async def pull(self, loc, conf_tree):
"""Get loc to its cache.

git sparse-checkout is not available below Git 2.25, and seems to omit
contents altogether if set to the root of the repo (as of 2.40.1).

Filtering requires uploadpack.allowFilter to be set true on
the remote repo or server.

"""
if not loc.real_name:
self.parse(loc, conf_tree)
remote, path, ref = self._parse_name(loc)
with tempfile.TemporaryDirectory() as tmpdirname:
git_dir_opt = f"--git-dir={tmpdirname}/.git"
await self.manager.popen.run_ok_async(
"git", git_dir_opt, "init"
)
if self.git_version >= (2, 25, 0) and path != "./":
# sparse-checkout available and suitable for this case.
await self.manager.popen.run_ok_async(
"git", git_dir_opt, "sparse-checkout", "set", path,
"--no-cone"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing this is required, but worth noting the documented warnings:

https://git-scm.com/docs/git-sparse-checkout#_commands

)
await self.manager.popen.run_ok_async(
"git", git_dir_opt, "fetch", "--depth=1",
"--filter=blob:none", remote, loc.key
)
else:
# Fallback.
await self.manager.popen.run_ok_async(
"git", git_dir_opt, "fetch", "--depth=1", remote, loc.key
)

# Checkout to temporary location, then extract only 'path' later.
await self.manager.popen.run_ok_async(
"git", git_dir_opt, f"--work-tree={tmpdirname}", "checkout",
loc.key
)
name = tmpdirname + "/" + path
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[minor] os.path.join would be nicer.


# Check that we have inferred the right type from the path name.
real_loc_type = (
loc.TYPE_TREE if os.path.isdir(name) else loc.TYPE_BLOB
)
if real_loc_type != loc.loc_type:
raise ValueError(
f"Expected path '{path}' to be type '{loc.loc_type}', "
+ f"but it was '{real_loc_type}'. Check trailing slash."
)

# Extract only 'path' to cache.
dest = loc.cache
if loc.loc_type == "tree":
dest += "/"
cmd = self.manager.popen.get_cmd("rsync", name, dest)
await self.manager.popen.run_ok_async(*cmd)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm getting a failure here:

[INFO] 2024-06-03T11:56:18+0100 rsync -a '--exclude=.*' --timeout=1800 '--rsh=ssh -oBatchMode=yes -oStrictHostKeyChecking=no -oConnectTimeout=8' /var/tmp/tmpzpxb2lpj/README.md /var/tmp/tmpjul8aid7/e99d80506f38bfee39893fd042f2f5d6
[FAIL] 2024-06-03T11:56:19+0100 rsync -a '--exclude=.*' --timeout=1800 '--rsh=ssh -oBatchMode=yes -oStrictHostKeyChecking=no -oConnectTimeout=8' /var/tmp/tmpzpxb2lpj/README.md /var/tmp/tmpjul8aid7/e99d80506f38bfee39893fd042f2f5d6 # return-code=3, stderr=
[FAIL] 2024-06-03T11:56:19+0100 rsync: change_dir#3 "/var/tmp/tmpjul8aid7" failed: No such file or directory (2)
[FAIL] 2024-06-03T11:56:19+0100 rsync error: errors selecting input/output files, dirs (code 3) at main.c(694) [Receiver=3.1.2]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(status update - we're finding it hard to independently reproduce this one, currently a mystery)


def _parse_name(self, loc):
scheme, nonscheme = loc.name.split(":", 1)
return nonscheme.split(self.URI_SEPARATOR, maxsplit=3)

def _get_commithash(self, remote, ref):
"""Get the commit hash given a branch, tag, or commit hash.

Short commit hashes will not resolve since there is no remote
rev-parse functionality.

"""
ret_code, info, _ = self.manager.popen.run(
"git", "ls-remote", "--exit-code", remote, ref)
oliver-sanders marked this conversation as resolved.
Show resolved Hide resolved
if ret_code and ret_code != 2:
# repo not found
raise ValueError(f"ls-remote: could not locate '{remote}'")
if ret_code:
err = f"ls-remote: could not find ref '{ref}' in '{remote}'"
if REC_COMMIT_HASH.match(ref):
if len(ref) in [40, 64]: # SHA1, SHA256 hashes.
# Likely a full commit hash, but the server
# uploadpack.allowAnySHA1InWant configuration is not set.
return ref
err += ": you may be using an unsupported short commit hash"
raise ValueError(err)
return info.split()[0]
2 changes: 1 addition & 1 deletion metomi/rose/loc_handlers/namelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def parse(self, loc, conf_tree):
if section_value is None:
sections.remove(section)
if not sections:
raise ValueError(loc.name)
raise ValueError(f"could not locate {loc.name}")
return sections

async def pull(self, loc, conf_tree):
Expand Down
2 changes: 1 addition & 1 deletion metomi/rose/loc_handlers/rsync.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def parse(self, loc, _):
out = self.manager.popen(*cmd, stdin=stdin)[0]
lines = out.splitlines()
if not lines or lines[0] not in [loc.TYPE_BLOB, loc.TYPE_TREE]:
raise ValueError(loc.name)
raise ValueError(f"could not locate {path} on host {host}")
loc.loc_type = lines.pop(0)
if loc.loc_type == loc.TYPE_BLOB:
line = lines.pop(0)
Expand Down
45 changes: 45 additions & 0 deletions sphinx/api/configuration/file-creation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,51 @@ root directory to install file targets with a relative path:
:opt svn: The Subversion scheme. The location is a Subversion URL or
an FCM location keyword. A URI with these schemes ``svn``,
``svn+ssh`` and ``fcm`` are automatically recognised.
:opt git: The Git scheme. The location is complex due to Git semantics.
It must have the scheme ``git`` and be of the form
``git:REPOSITORY_URL::PATHSPEC::TREEISH``:

* ``REPOSITORY_URL`` should
be a Git repository URI which may itself have a scheme ``ssh``,
``git``, ``https``, or be of the form ``HOST:PATH``, or ``PATH`` for
local repositories.
* ``PATHSPEC`` should be a path to a file or
directory that you want to extract.
The ``PATHSPEC`` must end with a
trailing slash (``/``) if it is a directory. To extract from the root
of the repository use a ``PATHSPEC`` of ``./`` e.g.
``git:git@github.com:metomi/rose::./::2.2.0``.
* ``TREEISH`` should be a tag,
branch, or long commit hash to specify the commit at which you want
to extract.

These should follow the same semantics as if you git
cloned ``REPOSITORY_URL``, git checkout'ed ``TREEISH``, and extracted
the path ``PATHSPEC`` within the clone. It may help to think
of the parts of the location as ``git:Where::What::When``.

..rubric:: Examples:

.. code-block:: rose

# Download the sphinx directory from the master branch of
# the github.com/metomi/rose repo. Note trailing slash.
[file:rose-docs]
source=git:git@github.com:metomi/rose::sphinx/::master

# Extract the whole contents of version 2.0.1 of the local
# repository at /home/user/some/path/to/my/git/repo.
[file:all_of_my_repo]
source=git:/home/user/some/path/to/my/git/repo::./::2.0.1

# Extract a single file from a particular commit of a repo
# on a machine that we have ssh access to.
[file:my_file]
source=git:machine01:/data/user/my_repo_name::etc/my_file::7261bff4d9a6c582ec759ef52c46dd794fe8794e

You should set ``git config uploadpack.allowFilter true`` and
optionally ``git config uploadpack.allowAnySHA1InWant true`` on
repositories if you are setting them up to pull from.
:opt rsync: This scheme is useful for pulling a file or directory from
a remote host using ``rsync`` via ``ssh``. A URI should have the
form ``HOST:PATH``.
Expand Down
3 changes: 3 additions & 0 deletions sphinx/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ non-Python dependencies are satisfied.
+ ``rose task-run``
+ ``cylc install``

Git is likewise required for installing files from Git repositories or
hosting services such as GitHub. Note Git is not automatically installed
by the metomi-rose conda.

Configuring Rose
----------------
Expand Down
15 changes: 13 additions & 2 deletions t/rose-app-run/05-file.t
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Hello and good bye.
__CONTENT__
OUT=$(cd config/file && cat hello1 hello2 hello3/text)
#-------------------------------------------------------------------------------
tests 62
tests 65
#-------------------------------------------------------------------------------
# Normal mode with free format files.
TEST_KEY=$TEST_KEY_BASE
Expand All @@ -70,7 +70,18 @@ run_fail "$TEST_KEY" rose app-run --config=../config -q \
--define='[file:hello4]source=stuff:ing'
file_cmp "$TEST_KEY.out" "$TEST_KEY.out" </dev/null
file_cmp "$TEST_KEY.err" "$TEST_KEY.err" <<'__CONTENT__'
[FAIL] file:hello4=source=stuff:ing: bad or missing value
[FAIL] file:hello4=source=stuff:ing: don't know how to process stuff:ing
__CONTENT__
test_teardown
#-------------------------------------------------------------------------------
# Normal mode with free format files and a file with an invalid scheme.
TEST_KEY=$TEST_KEY_BASE-invalid-content
test_setup
run_fail "$TEST_KEY" rose app-run --config=../config -q \
--define='schemes=stuff*=where_is_the_stuff' --define='[file:hello4]source=stuff:ing'
file_cmp "$TEST_KEY.out" "$TEST_KEY.out" </dev/null
file_cmp "$TEST_KEY.err" "$TEST_KEY.err" <<'__CONTENT__'
[FAIL] file:hello4=source=stuff:ing: don't support scheme where_is_the_stuff
__CONTENT__
test_teardown
#-------------------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions t/rose-app-run/06-namelist.t
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ run_fail "$TEST_KEY" rose app-run --config=../config -q \
'--define=[file:shopping-list-3.nl]source=namelist:shopping_list'
file_cmp "$TEST_KEY.out" "$TEST_KEY.out" </dev/null
file_cmp "$TEST_KEY.err" "$TEST_KEY.err" <<'__CONTENT__'
[FAIL] file:shopping-list-3.nl=source=namelist:shopping_list: bad or missing value
[FAIL] file:shopping-list-3.nl=source=namelist:shopping_list: could not locate namelist:shopping_list
__CONTENT__
test_teardown
#-------------------------------------------------------------------------------
Expand All @@ -221,7 +221,7 @@ run_fail "$TEST_KEY" rose app-run --config=../config -q \
'--define=[!namelist:shopping_list]'
file_cmp "$TEST_KEY.out" "$TEST_KEY.out" </dev/null
file_cmp "$TEST_KEY.err" "$TEST_KEY.err" <<'__CONTENT__'
[FAIL] file:shopping-list-3.nl=source=namelist:shopping_list: bad or missing value
[FAIL] file:shopping-list-3.nl=source=namelist:shopping_list: could not locate namelist:shopping_list
__CONTENT__
test_teardown
#-------------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion t/rose-app-run/15-file-permission.t
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ run_fail "$TEST_KEY" rose app-run --config=../config -q
file_cmp "$TEST_KEY.out" "$TEST_KEY.out" <<'__OUT__'
__OUT__
file_cmp "$TEST_KEY.err" "$TEST_KEY.err" <<__ERR__
[FAIL] file:qux=source=$TEST_DIR/no_read_target_dir/qux: bad or missing value
[FAIL] file:qux=source=$TEST_DIR/no_read_target_dir/qux: path does not exist or not accessible: $TEST_DIR/no_read_target_dir/qux
__ERR__
chmod u+x $TEST_DIR/no_read_target_dir
rm $TEST_DIR/no_read_target_dir/qux
Expand Down