diff --git a/pygit2/_pygit2.pyi b/pygit2/_pygit2.pyi index fd4a5c6ef..9d21c7218 100644 --- a/pygit2/_pygit2.pyi +++ b/pygit2/_pygit2.pyi @@ -471,6 +471,7 @@ class Repository: def expand_id(self, hex: str) -> Oid: ... def free(self) -> None: ... def git_object_lookup_prefix(self, oid: _OidArg) -> Object: ... + def hashfile(self, path: str) -> Oid: ... def init_submodules(self, submodules: list[Submodule] = None, overwrite = False) -> None: ... def list_worktrees(self) -> list[str]: ... def listall_branches(self, flag: int = GIT_BRANCH_LOCAL) -> list[str]: ... diff --git a/src/repository.c b/src/repository.c index f16cd0b43..d7b28a16f 100644 --- a/src/repository.c +++ b/src/repository.c @@ -2435,6 +2435,41 @@ Repository_listall_stashes(Repository *self, PyObject *args) } } +PyDoc_STRVAR(Repository_hashfile__doc__, + "hashfile(path: str) -> Oid\n" + "\n" + "Calculate hash of file using repository filtering rules.\n" + "\n" + "If you simply want to calculate the hash of a file on disk with no filters, " + "you can just use the ``pygit2.hashfile`` API. However, if you want to hash " + "a file in the repository and you want to apply filtering rules (e.g. crlf " + "filters) before generating the SHA, then use this function.\n" + "\n" + "Note: if the repository has ``core.safecrlf`` set to fail and the filtering " + "triggers that failure, then this function will raise ``GitError``."); + +PyObject * +Repository_hashfile(Repository *self, PyObject *args) +{ + git_oid oid; + PyBytesObject *py_path = NULL; + const char* path = NULL; + int err; + + if (!PyArg_ParseTuple(args, "O&", PyUnicode_FSConverter, &py_path)) + return NULL; + + if (py_path != NULL) + path = PyBytes_AS_STRING(py_path); + + err = git_repository_hashfile(&oid, self->repo, path, GIT_OBJ_BLOB, NULL); + Py_XDECREF(py_path); + if (err < 0) + return Error_set(err); + + return git_oid_to_python(&oid); +} + PyMethodDef Repository_methods[] = { METHOD(Repository, create_blob, METH_VARARGS), METHOD(Repository, create_blob_fromworkdir, METH_O), @@ -2490,6 +2525,7 @@ PyMethodDef Repository_methods[] = { METHOD(Repository, set_odb, METH_O), METHOD(Repository, set_refdb, METH_O), METHOD(Repository, listall_stashes, METH_NOARGS), + METHOD(Repository, hashfile, METH_VARARGS), {NULL} }; diff --git a/src/repository.h b/src/repository.h index 5a228b1d4..10796a194 100755 --- a/src/repository.h +++ b/src/repository.h @@ -69,4 +69,6 @@ PyObject* Repository_cherrypick(Repository *self, PyObject *py_oid); PyObject* Repository_apply(Repository *self, PyObject *py_diff, PyObject *kwds); PyObject* Repository_merge_analysis(Repository *self, PyObject *args); +PyObject* Repository_hashfile(Repository *self, PyObject *args); + #endif diff --git a/test/test_repository.py b/test/test_repository.py index 82a822a07..cf1defe72 100644 --- a/test/test_repository.py +++ b/test/test_repository.py @@ -759,3 +759,54 @@ def test_is_shallow(testrepo): f.write('abcdef0123456789abcdef0123456789abcdef00\n') assert testrepo.is_shallow + +def test_repo_hashfile_same_hash(testrepo): + data = 'Some multi-\nline text\n' + with (Path(testrepo.workdir) / 'untracked.txt').open('w') as f: + f.write(data) + + hashed_file_sha1 = testrepo.hashfile(str(Path(testrepo.workdir) / 'untracked.txt')) + hashed_data_sha1 = pygit2.hash(data) + assert hashed_file_sha1 == hashed_data_sha1 + +def test_repo_hashfile_crlf_normalization(testrepo): + with (Path(testrepo.workdir) / '.gitattributes').open('w+') as f: + print('*.txt eol=lf\n', file=f) + + data = 'Some multi-\nline text\n' + with (Path(testrepo.workdir) / 'untracked_lf.txt').open('w') as f: + f.write(data) + with (Path(testrepo.workdir) / 'untracked_crlf.txt').open('w') as f: + f.write(data.replace('\n','\r\n')) + + hashed_lf_sha1 = testrepo.hashfile(str(Path(testrepo.workdir) / 'untracked_lf.txt')) + hashed_crlf_sha1 = testrepo.hashfile(str(Path(testrepo.workdir) / 'untracked_crlf.txt')) + assert hashed_lf_sha1 == hashed_crlf_sha1 + +def test_repo_hashfile_no_normalization(testrepo): + with (Path(testrepo.workdir) / '.gitattributes').open('w+') as f: + print('*.txt -text\n', file=f) + + data = 'Some multi-\nline text\n' + with (Path(testrepo.workdir) / 'untracked_lf.txt').open('w') as f: + f.write(data) + with (Path(testrepo.workdir) / 'untracked_crlf.txt').open('w') as f: + f.write(data.replace('\n','\r\n')) + + hashed_lf_sha1 = testrepo.hashfile(str(Path(testrepo.workdir) / 'untracked_lf.txt')) + hashed_crlf_sha1 = testrepo.hashfile(str(Path(testrepo.workdir) / 'untracked_crlf.txt')) + assert hashed_lf_sha1 != hashed_crlf_sha1 + +def test_repo_hashfile_crlf_normalization_error(testrepo): + testrepo.config['core.safecrlf'] = True + with (Path(testrepo.workdir) / '.gitattributes').open('w+') as f: + print('*.txt eol=lf\n', file=f) + with (Path(testrepo.workdir) / 'untracked_crlf.txt').open('w') as f: + f.write('Some multi-\r\nline text\r\n') + + with pytest.raises(pygit2.GitError) as exc: + testrepo.hashfile(str(Path(testrepo.workdir) / 'untracked_crlf.txt')) + + assert "CRLF would be replaced by LF" in str(exc.value) + +