Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pygit2/_pygit2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@ class Repository:
def expand_id(self, hex: str) -> Oid: ...
def free(self) -> None: ...
def git_object_lookup_prefix(self, oid: _OidArg) -> Object: ...
def hashfile(self, path: str) -> Oid: ...
def init_submodules(self, submodules: list[Submodule] = None, overwrite = False) -> None: ...
def list_worktrees(self) -> list[str]: ...
def listall_branches(self, flag: int = GIT_BRANCH_LOCAL) -> list[str]: ...
Expand Down
36 changes: 36 additions & 0 deletions src/repository.c
Original file line number Diff line number Diff line change
Expand Up @@ -2435,6 +2435,41 @@ Repository_listall_stashes(Repository *self, PyObject *args)
}
}

PyDoc_STRVAR(Repository_hashfile__doc__,
"hashfile(path: str) -> Oid\n"
"\n"
"Calculate hash of file using repository filtering rules.\n"
"\n"
"If you simply want to calculate the hash of a file on disk with no filters, "
"you can just use the ``pygit2.hashfile`` API. However, if you want to hash "
"a file in the repository and you want to apply filtering rules (e.g. crlf "
"filters) before generating the SHA, then use this function.\n"
"\n"
"Note: if the repository has ``core.safecrlf`` set to fail and the filtering "
"triggers that failure, then this function will raise ``GitError``.");

PyObject *
Repository_hashfile(Repository *self, PyObject *args)
{
git_oid oid;
PyBytesObject *py_path = NULL;
const char* path = NULL;
int err;

if (!PyArg_ParseTuple(args, "O&", PyUnicode_FSConverter, &py_path))
return NULL;

if (py_path != NULL)
path = PyBytes_AS_STRING(py_path);

err = git_repository_hashfile(&oid, self->repo, path, GIT_OBJ_BLOB, NULL);
Py_XDECREF(py_path);
if (err < 0)
return Error_set(err);

return git_oid_to_python(&oid);
}

PyMethodDef Repository_methods[] = {
METHOD(Repository, create_blob, METH_VARARGS),
METHOD(Repository, create_blob_fromworkdir, METH_O),
Expand Down Expand Up @@ -2490,6 +2525,7 @@ PyMethodDef Repository_methods[] = {
METHOD(Repository, set_odb, METH_O),
METHOD(Repository, set_refdb, METH_O),
METHOD(Repository, listall_stashes, METH_NOARGS),
METHOD(Repository, hashfile, METH_VARARGS),
{NULL}
};

Expand Down
2 changes: 2 additions & 0 deletions src/repository.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,6 @@ PyObject* Repository_cherrypick(Repository *self, PyObject *py_oid);
PyObject* Repository_apply(Repository *self, PyObject *py_diff, PyObject *kwds);
PyObject* Repository_merge_analysis(Repository *self, PyObject *args);

PyObject* Repository_hashfile(Repository *self, PyObject *args);

#endif
51 changes: 51 additions & 0 deletions test/test_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,3 +759,54 @@ def test_is_shallow(testrepo):
f.write('abcdef0123456789abcdef0123456789abcdef00\n')

assert testrepo.is_shallow

def test_repo_hashfile_same_hash(testrepo):
data = 'Some multi-\nline text\n'
with (Path(testrepo.workdir) / 'untracked.txt').open('w') as f:
f.write(data)

hashed_file_sha1 = testrepo.hashfile(str(Path(testrepo.workdir) / 'untracked.txt'))
hashed_data_sha1 = pygit2.hash(data)
assert hashed_file_sha1 == hashed_data_sha1

def test_repo_hashfile_crlf_normalization(testrepo):
with (Path(testrepo.workdir) / '.gitattributes').open('w+') as f:
print('*.txt eol=lf\n', file=f)

data = 'Some multi-\nline text\n'
with (Path(testrepo.workdir) / 'untracked_lf.txt').open('w') as f:
f.write(data)
with (Path(testrepo.workdir) / 'untracked_crlf.txt').open('w') as f:
f.write(data.replace('\n','\r\n'))

hashed_lf_sha1 = testrepo.hashfile(str(Path(testrepo.workdir) / 'untracked_lf.txt'))
hashed_crlf_sha1 = testrepo.hashfile(str(Path(testrepo.workdir) / 'untracked_crlf.txt'))
assert hashed_lf_sha1 == hashed_crlf_sha1

def test_repo_hashfile_no_normalization(testrepo):
with (Path(testrepo.workdir) / '.gitattributes').open('w+') as f:
print('*.txt -text\n', file=f)

data = 'Some multi-\nline text\n'
with (Path(testrepo.workdir) / 'untracked_lf.txt').open('w') as f:
f.write(data)
with (Path(testrepo.workdir) / 'untracked_crlf.txt').open('w') as f:
f.write(data.replace('\n','\r\n'))

hashed_lf_sha1 = testrepo.hashfile(str(Path(testrepo.workdir) / 'untracked_lf.txt'))
hashed_crlf_sha1 = testrepo.hashfile(str(Path(testrepo.workdir) / 'untracked_crlf.txt'))
assert hashed_lf_sha1 != hashed_crlf_sha1

def test_repo_hashfile_crlf_normalization_error(testrepo):
testrepo.config['core.safecrlf'] = True
with (Path(testrepo.workdir) / '.gitattributes').open('w+') as f:
print('*.txt eol=lf\n', file=f)
with (Path(testrepo.workdir) / 'untracked_crlf.txt').open('w') as f:
f.write('Some multi-\r\nline text\r\n')

with pytest.raises(pygit2.GitError) as exc:
testrepo.hashfile(str(Path(testrepo.workdir) / 'untracked_crlf.txt'))

assert "CRLF would be replaced by LF" in str(exc.value)