From 063987bd722be57b712cbd130186ea8e143e8878 Mon Sep 17 00:00:00 2001 From: Andrew Lilley Brinker Date: Thu, 30 Jan 2025 14:03:49 -0800 Subject: [PATCH] feat: Big refactor. This refactor eliminates the `gitoid` crate, merging its key logic into the `omnibor` crate. It also substantially reorganizes the OmniBOR crate. There are too many changes to cover here, many of them minor. The most essential change is that cryptography providers and the hash algorithm type have been separated from each other. `Sha256` is now a marker type that can be produced by any compliant cryptography provider (RustCrypto, OpenSSL, and BoringSSL today), meaning that information about the identity of the cryptography provider doesn't leak into the identity of the resulting `ArtifactId` or `InputManifest`. Signed-off-by: Andrew Lilley Brinker --- Cargo.toml | 2 +- README.md | 5 - gitoid/CHANGELOG.md | 119 ---- gitoid/Cargo.toml | 117 ---- gitoid/README.md | 139 ----- gitoid/src/backend/boringssl.rs | 105 ---- gitoid/src/backend/mod.rs | 10 - gitoid/src/backend/openssl.rs | 105 ---- gitoid/src/backend/rustcrypto.rs | 37 -- gitoid/src/error.rs | 173 ------ gitoid/src/hash_algorithm.rs | 68 --- gitoid/src/internal.rs | 309 ---------- gitoid/src/object_type.rs | 61 -- gitoid/src/tests.rs | 259 --------- gitoid/src/util/mod.rs | 4 - gitoid/test/data/hello_world.txt | 1 - omnibor-cli/src/cli.rs | 13 +- omnibor-cli/src/cmd/manifest/create.rs | 27 +- omnibor-cli/src/error.rs | 2 +- omnibor-cli/src/fs.rs | 8 +- omnibor/Cargo.toml | 71 ++- .../new-benches}/benchmark.rs | 10 +- omnibor/src/artifact_id.rs | 535 ------------------ omnibor/src/artifact_id/artifact_id.rs | 302 ++++++++++ .../src/artifact_id/artifact_id_builder.rs | 111 ++++ omnibor/src/artifact_id/mod.rs | 5 + omnibor/src/error.rs | 60 +- omnibor/src/ffi/artifact_id.rs | 118 ++-- omnibor/src/ffi/error.rs | 24 +- omnibor/src/ffi/util.rs | 12 +- {gitoid/src => omnibor/src/gitoid}/gitoid.rs | 106 +--- .../src/gitoid}/gitoid_url_parser.rs | 36 +- omnibor/src/gitoid/internal.rs | 192 +++++++ .../src/lib.rs => omnibor/src/gitoid/mod.rs | 73 +-- omnibor/src/hash_algorithm.rs | 41 ++ omnibor/src/hash_provider/boringssl.rs | 68 +++ omnibor/src/hash_provider/mod.rs | 28 + omnibor/src/hash_provider/openssl.rs | 68 +++ omnibor/src/hash_provider/rustcrypto.rs | 70 +++ .../{ => input_manifest}/embedding_mode.rs | 6 +- .../{ => input_manifest}/input_manifest.rs | 91 ++- .../input_manifest_builder.rs | 125 ++-- omnibor/src/input_manifest/mod.rs | 8 + omnibor/src/into_artifact_id.rs | 42 -- omnibor/src/lib.rs | 89 +-- omnibor/src/object_type.rs | 32 ++ omnibor/src/sealed.rs | 1 - omnibor/src/storage.rs | 136 +++-- omnibor/src/supported_hash.rs | 21 - omnibor/src/test.rs | 218 ++++++- .../src/util/for_each_buf_fill.rs | 7 +- omnibor/src/util/mod.rs | 4 + omnibor/src/util/pathbuf.rs | 17 + {gitoid/src => omnibor/src/util}/sealed.rs | 0 {gitoid => omnibor}/src/util/stream_len.rs | 14 +- {gitoid => omnibor}/test/data/unix_line.txt | 0 .../test/data/windows_line.txt | 0 57 files changed, 1617 insertions(+), 2688 deletions(-) delete mode 100644 gitoid/CHANGELOG.md delete mode 100644 gitoid/Cargo.toml delete mode 100644 gitoid/README.md delete mode 100644 gitoid/src/backend/boringssl.rs delete mode 100644 gitoid/src/backend/mod.rs delete mode 100644 gitoid/src/backend/openssl.rs delete mode 100644 gitoid/src/backend/rustcrypto.rs delete mode 100644 gitoid/src/error.rs delete mode 100644 gitoid/src/hash_algorithm.rs delete mode 100644 gitoid/src/internal.rs delete mode 100644 gitoid/src/object_type.rs delete mode 100644 gitoid/src/tests.rs delete mode 100644 gitoid/src/util/mod.rs delete mode 100644 gitoid/test/data/hello_world.txt rename {gitoid/benches => omnibor/new-benches}/benchmark.rs (96%) delete mode 100644 omnibor/src/artifact_id.rs create mode 100644 omnibor/src/artifact_id/artifact_id.rs create mode 100644 omnibor/src/artifact_id/artifact_id_builder.rs create mode 100644 omnibor/src/artifact_id/mod.rs rename {gitoid/src => omnibor/src/gitoid}/gitoid.rs (63%) rename {gitoid/src => omnibor/src/gitoid}/gitoid_url_parser.rs (68%) create mode 100644 omnibor/src/gitoid/internal.rs rename gitoid/src/lib.rs => omnibor/src/gitoid/mod.rs (67%) create mode 100644 omnibor/src/hash_algorithm.rs create mode 100644 omnibor/src/hash_provider/boringssl.rs create mode 100644 omnibor/src/hash_provider/mod.rs create mode 100644 omnibor/src/hash_provider/openssl.rs create mode 100644 omnibor/src/hash_provider/rustcrypto.rs rename omnibor/src/{ => input_manifest}/embedding_mode.rs (88%) rename omnibor/src/{ => input_manifest}/input_manifest.rs (79%) rename omnibor/src/{ => input_manifest}/input_manifest_builder.rs (75%) create mode 100644 omnibor/src/input_manifest/mod.rs delete mode 100644 omnibor/src/into_artifact_id.rs create mode 100644 omnibor/src/object_type.rs delete mode 100644 omnibor/src/sealed.rs delete mode 100644 omnibor/src/supported_hash.rs rename {gitoid => omnibor}/src/util/for_each_buf_fill.rs (88%) create mode 100644 omnibor/src/util/mod.rs create mode 100644 omnibor/src/util/pathbuf.rs rename {gitoid/src => omnibor/src/util}/sealed.rs (100%) rename {gitoid => omnibor}/src/util/stream_len.rs (91%) rename {gitoid => omnibor}/test/data/unix_line.txt (100%) rename {gitoid => omnibor}/test/data/windows_line.txt (100%) diff --git a/Cargo.toml b/Cargo.toml index 3c07639..a3113a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ # Overall workspace configuration. [workspace] -members = ["gitoid", "omnibor", "omnibor-cli", "xtask"] +members = ["omnibor", "omnibor-cli", "xtask"] resolver = "2" # Shared settings across packages in the workspace. diff --git a/README.md b/README.md index 31df268..0f131b7 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,6 @@ Analysis."][cisa_report] |:--------------|:----------------------------------------------------------|:------------------------------------------|:----------------------------------------------------------------------------------------------------------------| | `omnibor` | ![Library](https://img.shields.io/badge/Library-darkblue) | OmniBOR Identifiers and Manifests | [README][omnibor_r] · [Changelog][omnibor_c] · [API Docs][omnibor_d] · [Crate][omnibor_cr] | | `omnibor-cli` | ![Binary](https://img.shields.io/badge/Binary-darkgreen) | CLI for OmniBOR Identifiers and Manifests | [README][omnibor_cli_r] · [Changelog][omnibor_cli_c] · [Crate][omnibor_cli_cr] | -| `gitoid` | ![Library](https://img.shields.io/badge/Library-darkblue) | Git Object Identifiers (GitOIDs) | [README][gitoid_r] · [Changelog][gitoid_c] · [API Docs][gitoid_d] · [Crate][gitoid_cr] | | `xtask` | ![Binary](https://img.shields.io/badge/Binary-darkgreen) | OmniBOR Rust Workspace Automation | [README][xtask_r] | ## Contributing @@ -101,10 +100,6 @@ license text in the [`LICENSE`][license] file. [cisa_report]: https://www.cisa.gov/sites/default/files/2023-10/Software-Identification-Ecosystem-Option-Analysis-508c.pdf [cpe]: https://nvd.nist.gov/products/cpe [gitoid]: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects -[gitoid_cr]: https://crates.io/crates/gitoid -[gitoid_r]: https://github.com/omnibor/omnibor-rs/blob/main/gitoid/README.md -[gitoid_c]: https://github.com/omnibor/omnibor-rs/blob/main/gitoid/CHANGELOG.md -[gitoid_d]: https://docs.rs/crate/gitoid/latest [license]: https://github.com/omnibor/omnibor-rs/blob/main/LICENSE [omnibor]: https://omnibor.io [omnibor_cr]: https://crates.io/crates/omnibor diff --git a/gitoid/CHANGELOG.md b/gitoid/CHANGELOG.md deleted file mode 100644 index 43536ed..0000000 --- a/gitoid/CHANGELOG.md +++ /dev/null @@ -1,119 +0,0 @@ -# Changelog - -All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## [0.9.0] - 2025-01-29 - -### Changed - -- Implement newline normalization. (#228) -- Feature cleanup - -### Fixed - -- Another `xtask` Display impl fix. (#233) - -## [0.8.0] - 2024-09-26 - -### Changed - -- Add BoringSSL backend support for SHA-1 and SHA-256 in gitoid (#182) -- Add OpenSSL backend support for gitoid (#186) -- Add comments, fixup conditional compilation. (#189) -- Overhaul CI testing. (#192) -- Update gitoid CHANGELOG for 0.8.0 -- Release gitoid-v0.8.0 - -## [0.7.1] - 2024-03-09 - -### Changed - -- Update project and crate READMEs (#173) -- Update `gitoid` crate CHANGELOG.md -- Release gitoid-v0.7.1 - -## [0.7.0] - 2024-03-07 - -### Changed - -- Fix broken `CHANGELOG.md` -- Rename `GitOid` methods for clarity (#167) -- Update `gitoid` crate CHANGELOG.md -- Release - -## [0.6.0] - 2024-03-07 - -### Changed - -- Initial full ArtifactId impl (#114) -- Make GitOid display print URL format (#116) -- Add gitoid crate README, update crate desc. (#128) -- Add 'cargo xtask' for custom tasks (#131) -- Introduce gitoid crate features incl. std. (#148) -- Added 'serde' feature to `gitoid` crate (#149) -- Introduce `omnibor` FFI. (#160) -- Update `gitoid` crate CHANGELOG.md -- Release - -## [0.5.0] - 2024-02-19 - -### Added - -- Add top-level docs example. (#113) - -### Changed - -- Windows test, FFI test, and commit msg CI (#106) -- Simplify GitOid crate substantially. (#108) -- Further simplify GitOID interface. (#109) -- Add async constructors for GitOid. (#110) -- Improve GitOid trait bounds (#111) -- Minor cleanup of docs and trait defs. (#112) - -## [0.4.0] - 2024-02-14 - -### Changed - -- Rewrite docs intro, reorg docs slightly (#94) -- Improved gitoid_from_buffer, misc. fixups (#95) -- First draft of README rewrite (#88) -- Simplify GitOid trait bounds (#96) - -### Fixed - -- Fixed broken FFI code on Windows. (#97) -- Windows FFI mistakenly using BufReader (#98) - -## [0.3.0] - 2024-02-12 - -### Changed - -- Make GitOid::new_invalid pub(crate) -- Improve clarity / reliability of FFI tests -- Bump dependency versions to latest. -- Make GitOid::new_invalid pub(crate) -- Improve clarity / reliability of FFI tests -- Bump dependency versions to latest. -- Remove BufReader req for GitOid construction (#85) - -### Fixed - -- Add missing conditional compilation for unix imports -- Hide C test executables -- Add missing conditional compilation for unix imports -- Hide C test executables -- Moved, cleaned up cbindgen.toml - -[0.9.0]: https://github.com/omnibor/omnibor-rs/compare/gitoid-v0.8.0..gitoid-v0.9.0 -[0.8.0]: https://github.com/omnibor/omnibor-rs/compare/gitoid-v0.7.1..gitoid-v0.8.0 -[0.7.1]: https://github.com/omnibor/omnibor-rs/compare/gitoid-v0.7.0..gitoid-v0.7.1 -[0.7.0]: https://github.com/omnibor/omnibor-rs/compare/gitoid-v0.6.0..gitoid-v0.7.0 -[0.6.0]: https://github.com/omnibor/omnibor-rs/compare/gitoid-v0.5.0..gitoid-v0.6.0 -[0.5.0]: https://github.com/omnibor/omnibor-rs/compare/gitoid-v0.4.0..gitoid-v0.5.0 -[0.4.0]: https://github.com/omnibor/omnibor-rs/compare/gitoid-v0.3.0..gitoid-v0.4.0 -[0.3.0]: https://github.com/omnibor/omnibor-rs/compare/v0.1.3..gitoid-v0.3.0 - - diff --git a/gitoid/Cargo.toml b/gitoid/Cargo.toml deleted file mode 100644 index d165223..0000000 --- a/gitoid/Cargo.toml +++ /dev/null @@ -1,117 +0,0 @@ -[package] -categories = ["cryptography", "development-tools"] -description = "Git Object Identifiers in Rust" -keywords = ["gitbom", "omnibor", "sbom", "gitoid"] -name = "gitoid" -readme = "README.md" -repository = "https://github.com/omnibor/omnibor-rs" -version = "0.9.0" -homepage.workspace = true -license.workspace = true -edition.workspace = true - -[dependencies] - -# no_std compatible dependencies. - -# NOTE: Must match the version used in the hash crate. -# -# Technically, we could rely on the re-export from one of those crates, -# but since all the hash crates are optional dependencies our usage code -# within the 'gitoid' crate would be more complex to handle the possibility -# for any/all of them to be missing. It's simpler to just specify it here -# so we know we always get the crate. -digest = { version = "0.10.7" } - -sha1 = { version = "0.10.6", default-features = false, optional = true } -sha1collisiondetection = { version = "0.3.3", default-features = false, features = [ - "digest-trait", -], optional = true } -sha2 = { version = "0.10.8", default-features = false, optional = true } - -# std-requiring dependencies. - -hex = { version = "0.4.3", optional = true } -serde = { version = "1.0.197", optional = true } -tokio = { version = "1.36.0", features = ["io-util"], optional = true } -url = { version = "2.4.1", optional = true } -boring = { version = "4.6.0", optional = true } -openssl = { version = "0.10.66", optional = true } -bytecount = { version = "0.6.8" } - -[dev-dependencies] - -# Need "rt" and "fs" additionally for tests. -tokio = { version = "1.36.0", features = [ - "io-util", - "fs", - "rt", - "rt-multi-thread", -] } -serde_test = "1.0.176" -criterion = { version = "0.5.1" } - -[features] - -# By default, you get: -# -# - Async support. -# - The 'rustcrypto' backend. -# - The SHA-256 algorithm. -# - Standard library support. -default = ["async", "backend-rustcrypto", "hash-sha256", "std"] - -# Async support is optional. That said, it's currently _only_ with Tokio, -# meaning you'd need to handle integrating with any other async runtime -# yourself. In the future it may be nice to make our async support fully -# generic and not specific to a given runtime. -# -# Note also that async support implies using the standard library, as Tokio -# is not `no_std`-compatible. -async = ["dep:tokio", "std"] - -# All hash algorithms are optional, though you need to have at least one -# algorithm turned on for this crate to be useful. This is intended to -# just let you avoid paying the cost of algorithms you don't use. -hash-sha1 = ["dep:sha1"] -hash-sha1cd = ["dep:sha1collisiondetection"] -hash-sha256 = ["dep:sha2"] - -# Get standard library support. -# -# This feature is enabled by default. You can disable it to run in -# environments without `std`, usually embedded environments. -std = [ - "digest/std", - "sha1?/std", - "sha1collisiondetection?/std", - "sha2?/std", - "bytecount/runtime-dispatch-simd", - "dep:hex", - "dep:serde", - "dep:url", -] - -# Enable using RustCrypto as a cryptography backend. -backend-rustcrypto = [] - -# Enable using BoringSLL as a cryptography backend. -# -# NOTE: This unconditionally turns on the "sha1" and "sha256" features, -# because the `boring` crate which provides the BoringSSL cryptography -# implementations does not permit conditionally compiling those -# implementations out. Since they're _always_ present, we might as well -# use them unconditionally. -backend-boringssl = ["dep:boring", "hash-sha1", "hash-sha256"] - -# Enable using OpenSSL as a cryptography backend. -# -# NOTE: Like the "boringssl" feature, this unconditionally turns on -# the "sha1" and "sha256" features, as they're not able to be -# conditionally compiled out of the dependency, so there's no reason to omit -# them here. -backend-openssl = ["dep:openssl", "hash-sha1", "hash-sha256"] - -[[bench]] -name = "benchmark" -harness = false diff --git a/gitoid/README.md b/gitoid/README.md deleted file mode 100644 index fdf09ef..0000000 --- a/gitoid/README.md +++ /dev/null @@ -1,139 +0,0 @@ - -# `gitoid` crate - -This crate implements `GitOid`s, Git Object Identifiers, in Rust. The crate -is created and maintained by the [OmniBOR] project, and is intended primarily -for that project's use cases. - -## Usage - -The key type of this crate is `GitOid`, which is parameterized over two traits: -`HashAlgorithm` and `ObjectType`. Both of these are sealed traits, which means -they are _only_ implementable by types found in the `gitoid` crate itself. To -use the `GitOid` type, you must provide these type parameters like so: - -```rust -use gitoid::{GitOid, Sha256, Blob}; - -fn main() { - let id = GitOid::::from_str("hello, world"); - println!("{}", id); -} -``` - -If you intend to use just a specific instantiation of the `GitOid` type, you -can make this a bit cleaner with a type alias: - -```rust -use gitoid::{Sha256, Blob}; - -type GitOid = gitoid::GitOid; - -fn main() { - let id = GitOid::from_str("hello, world"); - println!("{}", id); -} -``` - -## Design - -This crate is designed to limit the size of the `GitOid` type in memory, and to -place as much work as possible at compile time. To that end, the `GitOid` type -uses a generic array under the hood to ensure the storage buffer is exactly sized -to the number of bytes required to store the hashes output by the chosen hash -algorithm. The hash algorithm and object type information are also wired up at -compile time through method calls on the `GitOid` type, so they can be accessible -at runtime without actually being stored on a per-`GitOid`-basis. - -### Git Compatibility - -This crate actually diverges from Git's handling of object identifiers in two -meaningful ways. - -1. The in-memory representation of GitOIDs is different in the `gitoid` crate - and in `git` itself. In Git, the relevant type is called `object_id`, and - is [defined as follows][git_object_id]: - - ```c - struct object_id { - unsigned char hash[GIT_MAX_RAWSZ]; - int algo; /* XXX requires 4-byte alignment */ - }; - ``` - - This type contains a buffer, sized to hold a number of bytes equal to the - maximum needed by the largest hash supported by Git (currently 32 bytes - as required by SHA-256), along with an integer which is used to indicated - the selected hash algorithm. This is ineffecient in the case of hash - algorithms whose hash output is smaller than 32 bytes (like SHA-1), and - also means that algorithm selection is delegated to runtime. It also - doesn't, at the type level or in the embedded data, distinguish between - the four types of objects supposed for identification by Git: blobs - (files), commits, tags, and trees (directories). The object types are - handled by standard formatting rules for producing the input to the hash - function which produces the hash (this is what we'll call the "GitOID - hash construction") instead. - - So this representation is less space efficient than it could be and omits - some information (object type) in favor of an implicit type based on - the construction of the input to the hash function. - - In the `gitoid` crate, by comparison, the _only_ thing we store at - runtime is a buffer sized exactly to the number of bytes needed to store - the hash output by the chosen hash function, and we use zero-cost compile - time features to encode the hash algorithm and object type. - - We _do not_ currently implement handling for object types besides `blob`, - because that's all we need for the OmniBOR project, and would love to - add support for `tree`, `commit`, and `tag` in the future. -2. The Git project talks about Git Object IDs being done either with the - SHA-1 hash algorithm or with SHA-256, but that's actually not _quite_ - true. The SHA-1 algorithm is known to be broken, with the ability for - attackers to instigate collisions, and to limit the impact of this - breakage, Git by default uses a variant of SHA-1 called SHA-1CD (short - for "SHA-1 with Collision Detection). This algorithm checks data being - hashed for the presence of some collision-generating vectors of data, and - if those are detected, it modifies the hashing in a way that stops the - collision from happening. - - For Git's purposes, this white lie is tolerable, because the IDs are never - intended for use outside of Git, but for the purpose of OmniBOR we care - about being completely accurate about the construction used since IDs are - intended to be independently reprodicible by _anyone_. - - In this crate, we therefore distinguish between the `sha1` algorithm and - the `sha1cd` algorithm. This is reflected in the `gitoid`-scheme URLs - generated when using the `GitOid` type. - -## Boring Feature - -The `gitoid` crate supports using the BoringSSL cryptographic library for SHA-1 -and SHA-256 hashing through the `boring` feature. This can be useful for -environments where BoringSSL is preferred or required for compliance reasons. - -### Enabling the Boring Feature - -To enable the `boring` feature, add the following to your `Cargo.toml`: - -```toml -[dependencies] -gitoid = { version = "0.7.1", features = ["boring"] } -``` - -When the `boring` feature is enabled, the crate will use BoringSSL's -implementations of SHA-1 and SHA-256 instead of the default RustCrypto -implementations. Note that `sha1cd` is not supported by the `boring` feature -and will fall back to using the RustCrypto implementation. - -## Minimum Supported Rust Version (MSRV) - -This crate does not maintain a Minimum Supported Rust Version, and generally -tracks the latest Rust stable version. - -## License - -This crate is Apache 2.0 licensed. - - -[OmniBOR]: https://omnibor.io -[git_object_id]: https://github.com/git/git/blob/f41f85c9ec8d4d46de0fd5fded88db94d3ec8c11/hash-ll.h#L133-L136 diff --git a/gitoid/src/backend/boringssl.rs b/gitoid/src/backend/boringssl.rs deleted file mode 100644 index e5d79ec..0000000 --- a/gitoid/src/backend/boringssl.rs +++ /dev/null @@ -1,105 +0,0 @@ -//! BoringSSL-based cryptography backend. - -use crate::{impl_hash_algorithm, sealed::Sealed, HashAlgorithm}; -use boring::sha; -use digest::{ - consts::{U20, U32}, - generic_array::GenericArray, - Digest, FixedOutput, HashMarker, Output, OutputSizeUser, Update, -}; - -#[cfg(feature = "hash-sha1")] -/// SHA-1 algorithm -pub struct Sha256 { - #[doc(hidden)] - _private: (), -} - -/// Boring SHA-256 implementation. -#[doc(hidden)] -pub struct BoringSha256 { - hash: sha::Sha256, -} - -#[cfg(all(feature = "hash-sha256"))] -impl_hash_algorithm!(Sha256, BoringSha256, "sha256"); - -impl Update for BoringSha256 { - fn update(&mut self, data: &[u8]) { - self.hash.update(data); - } -} - -impl OutputSizeUser for BoringSha256 { - type OutputSize = U32; -} - -impl FixedOutput for BoringSha256 { - fn finalize_into(self, out: &mut Output) { - out.copy_from_slice(self.hash.finish().as_slice()); - } - - fn finalize_fixed(self) -> Output { - let mut out = Output::::default(); - out.copy_from_slice(self.hash.finish().as_slice()); - out - } -} - -impl HashMarker for BoringSha256 {} - -impl Default for BoringSha256 { - fn default() -> Self { - Self { - hash: sha::Sha256::new(), - } - } -} - -#[cfg(feature = "hash-sha1")] -/// SHA-1 algorithm -pub struct Sha1 { - #[doc(hidden)] - _private: (), -} - -/// Boring SHA-1 implementation. -#[doc(hidden)] -pub struct BoringSha1 { - hash: sha::Sha1, -} - -#[cfg(all(feature = "hash-sha1"))] -impl_hash_algorithm!(Sha1, BoringSha1, "sha1"); - -impl Update for BoringSha1 { - fn update(&mut self, data: &[u8]) { - self.hash.update(data); - } -} - -impl OutputSizeUser for BoringSha1 { - type OutputSize = U20; -} - -impl FixedOutput for BoringSha1 { - fn finalize_into(self, out: &mut Output) { - out.copy_from_slice(self.hash.finish().as_slice()); - } - - fn finalize_fixed(self) -> Output { - let mut out = Output::::default(); - out.copy_from_slice(self.hash.finish().as_slice()); - out - } -} - -impl HashMarker for BoringSha1 {} - -impl Default for BoringSha1 { - fn default() -> Self { - Self { - hash: sha::Sha1::new(), - } - } -} diff --git a/gitoid/src/backend/mod.rs b/gitoid/src/backend/mod.rs deleted file mode 100644 index 0dda636..0000000 --- a/gitoid/src/backend/mod.rs +++ /dev/null @@ -1,10 +0,0 @@ -//! Cryptography backends, providing hash function implementations. - -#[cfg(feature = "backend-boringssl")] -pub mod boringssl; - -#[cfg(feature = "backend-openssl")] -pub mod openssl; - -#[cfg(feature = "backend-rustcrypto")] -pub mod rustcrypto; diff --git a/gitoid/src/backend/openssl.rs b/gitoid/src/backend/openssl.rs deleted file mode 100644 index a87ceb7..0000000 --- a/gitoid/src/backend/openssl.rs +++ /dev/null @@ -1,105 +0,0 @@ -//! OpenSSL-based cryptography backend. - -use crate::{impl_hash_algorithm, sealed::Sealed, HashAlgorithm}; -use digest::{ - consts::{U20, U32}, - generic_array::GenericArray, - Digest, FixedOutput, HashMarker, Output, OutputSizeUser, Update, -}; -use openssl::sha; - -#[cfg(feature = "hash-sha1")] -/// SHA-1 algorithm -pub struct Sha256 { - #[doc(hidden)] - _private: (), -} - -/// OpenSSL SHA-256 implementation. -#[doc(hidden)] -pub struct OpenSSLSha256 { - hash: sha::Sha256, -} - -#[cfg(all(feature = "hash-sha256"))] -impl_hash_algorithm!(Sha256, OpenSSLSha256, "sha256"); - -impl Update for OpenSSLSha256 { - fn update(&mut self, data: &[u8]) { - self.hash.update(data); - } -} - -impl OutputSizeUser for OpenSSLSha256 { - type OutputSize = U32; -} - -impl FixedOutput for OpenSSLSha256 { - fn finalize_into(self, out: &mut Output) { - out.copy_from_slice(self.hash.finish().as_slice()); - } - - fn finalize_fixed(self) -> Output { - let mut out = Output::::default(); - out.copy_from_slice(self.hash.finish().as_slice()); - out - } -} - -impl HashMarker for OpenSSLSha256 {} - -impl Default for OpenSSLSha256 { - fn default() -> Self { - Self { - hash: sha::Sha256::new(), - } - } -} - -#[cfg(feature = "hash-sha1")] -/// SHA-1 algorithm -pub struct Sha1 { - #[doc(hidden)] - _private: (), -} - -/// OpenSSL SHA-1 implementation. -#[doc(hidden)] -pub struct OpenSSLSha1 { - hash: sha::Sha1, -} - -#[cfg(all(feature = "hash-sha1"))] -impl_hash_algorithm!(Sha1, OpenSSLSha1, "sha1"); - -impl Update for OpenSSLSha1 { - fn update(&mut self, data: &[u8]) { - self.hash.update(data); - } -} - -impl OutputSizeUser for OpenSSLSha1 { - type OutputSize = U20; -} - -impl FixedOutput for OpenSSLSha1 { - fn finalize_into(self, out: &mut Output) { - out.copy_from_slice(self.hash.finish().as_slice()); - } - - fn finalize_fixed(self) -> Output { - let mut out = Output::::default(); - out.copy_from_slice(self.hash.finish().as_slice()); - out - } -} - -impl HashMarker for OpenSSLSha1 {} - -impl Default for OpenSSLSha1 { - fn default() -> Self { - Self { - hash: sha::Sha1::new(), - } - } -} diff --git a/gitoid/src/backend/rustcrypto.rs b/gitoid/src/backend/rustcrypto.rs deleted file mode 100644 index 1b8b7c4..0000000 --- a/gitoid/src/backend/rustcrypto.rs +++ /dev/null @@ -1,37 +0,0 @@ -//! RustCrypto-based cryptography backend. - -use crate::{impl_hash_algorithm, sealed::Sealed, HashAlgorithm}; -use digest::{generic_array::GenericArray, Digest, OutputSizeUser}; - -#[cfg(doc)] -use crate::GitOid; - -#[cfg(feature = "hash-sha1")] -/// SHA-1 algorithm, -pub struct Sha1 { - #[doc(hidden)] - _private: (), -} - -#[cfg(feature = "hash-sha1")] -impl_hash_algorithm!(Sha1, sha1::Sha1, "sha1"); - -#[cfg(feature = "hash-sha1cd")] -/// SHA-1Cd (collision detection) algorithm. -pub struct Sha1Cd { - #[doc(hidden)] - _private: (), -} - -#[cfg(feature = "hash-sha1cd")] -impl_hash_algorithm!(Sha1Cd, sha1collisiondetection::Sha1CD, "sha1cd"); - -#[cfg(feature = "hash-sha256")] -/// SHA-256 algorithm. -pub struct Sha256 { - #[doc(hidden)] - _private: (), -} - -#[cfg(feature = "hash-sha256")] -impl_hash_algorithm!(Sha256, sha2::Sha256, "sha256"); diff --git a/gitoid/src/error.rs b/gitoid/src/error.rs deleted file mode 100644 index 272a82a..0000000 --- a/gitoid/src/error.rs +++ /dev/null @@ -1,173 +0,0 @@ -//! Error arising from `GitOid` construction or use. - -use core::{ - fmt::{Display, Formatter, Result as FmtResult}, - result::Result as StdResult, -}; - -#[cfg(feature = "std")] -use { - hex::FromHexError as HexError, - std::{error::Error as StdError, io::Error as IoError}, - url::{ParseError as UrlError, Url}, -}; - -/// A `Result` with `gitoid::Error` as the error type. -pub(crate) type Result = StdResult; - -/// An error arising during `GitOid` construction or use. -#[derive(Debug)] -pub enum Error { - #[cfg(feature = "std")] - /// Tried to construct a `GitOid` from a `Url` with a scheme besides `gitoid`. - InvalidScheme(Url), - - #[cfg(feature = "std")] - /// Tried to construct a `GitOid` from a `Url` without an `ObjectType` in it. - MissingObjectType(Url), - - #[cfg(feature = "std")] - /// Tried to construct a `GitOid` from a `Url` without a `HashAlgorithm` in it. - MissingHashAlgorithm(Url), - - #[cfg(feature = "std")] - /// Tried to construct a `GitOid` from a `Url` without a hash in it. - MissingHash(Url), - - /// Tried to parse an unknown object type. - UnknownObjectType, - - /// The expected object type didn't match the provided type. - MismatchedObjectType { expected: &'static str }, - - /// The expected hash algorithm didn't match the provided algorithm. - MismatchedHashAlgorithm { expected: &'static str }, - - /// The expected size of a hash for an algorithm didn't match the provided size. - UnexpectedHashLength { expected: usize, observed: usize }, - - /// The amount of data read didn't match the expected amount of data - UnexpectedReadLength { expected: usize, observed: usize }, - - #[cfg(feature = "std")] - /// Tried to parse an invalid hex string. - InvalidHex(HexError), - - #[cfg(feature = "std")] - /// Could not construct a valid URL based on the `GitOid` data. - Url(UrlError), - - #[cfg(feature = "std")] - /// Could not perform the IO operations necessary to construct the `GitOid`. - Io(IoError), -} - -impl Display for Error { - fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { - match self { - #[cfg(feature = "std")] - Error::InvalidScheme(url) => write!(f, "invalid scheme in URL '{}'", url.scheme()), - - #[cfg(feature = "std")] - Error::MissingObjectType(url) => write!(f, "missing object type in URL '{}'", url), - - #[cfg(feature = "std")] - Error::MissingHashAlgorithm(url) => { - write!(f, "missing hash algorithm in URL '{}'", url) - } - - #[cfg(feature = "std")] - Error::MissingHash(url) => write!(f, "missing hash in URL '{}'", url), - - Error::UnknownObjectType => write!(f, "unknown object type"), - - Error::MismatchedObjectType { expected } => { - write!(f, "mismatched object type; expected '{}'", expected,) - } - - Error::MismatchedHashAlgorithm { expected } => { - write!(f, "mismatched hash algorithm; expected '{}'", expected) - } - - Error::UnexpectedHashLength { expected, observed } => { - write!( - f, - "unexpected hash length; expected '{}', got '{}'", - expected, observed - ) - } - - Error::UnexpectedReadLength { expected, observed } => { - write!( - f, - "unexpected read length; expected '{}', got '{}'", - expected, observed - ) - } - - #[cfg(feature = "std")] - Error::InvalidHex(_) => write!(f, "invalid hex string"), - - #[cfg(feature = "std")] - Error::Url(e) => write!(f, "{}", e), - - #[cfg(feature = "std")] - Error::Io(e) => write!(f, "{}", e), - } - } -} - -#[cfg(feature = "std")] -impl StdError for Error { - fn source(&self) -> Option<&(dyn StdError + 'static)> { - match self { - #[cfg(feature = "std")] - Error::InvalidScheme(_) => None, - - #[cfg(feature = "std")] - Error::MissingObjectType(_) => None, - - #[cfg(feature = "std")] - Error::MissingHashAlgorithm(_) => None, - - #[cfg(feature = "std")] - Error::MissingHash(_) => None, - - Error::UnknownObjectType - | Error::MismatchedObjectType { .. } - | Error::MismatchedHashAlgorithm { .. } - | Error::UnexpectedHashLength { .. } - | Error::UnexpectedReadLength { .. } => None, - - #[cfg(feature = "std")] - Error::InvalidHex(e) => Some(e), - - #[cfg(feature = "std")] - Error::Url(e) => Some(e), - - #[cfg(feature = "std")] - Error::Io(e) => Some(e), - } - } -} - -#[cfg(feature = "std")] -impl From for Error { - fn from(e: HexError) -> Error { - Error::InvalidHex(e) - } -} - -#[cfg(feature = "std")] -impl From for Error { - fn from(e: UrlError) -> Error { - Error::Url(e) - } -} - -#[cfg(feature = "std")] -impl From for Error { - fn from(e: IoError) -> Error { - Error::Io(e) - } -} diff --git a/gitoid/src/hash_algorithm.rs b/gitoid/src/hash_algorithm.rs deleted file mode 100644 index 2a11e1e..0000000 --- a/gitoid/src/hash_algorithm.rs +++ /dev/null @@ -1,68 +0,0 @@ -//! Trait specifying valid [`GitOid`] hash algorithms. - -use crate::sealed::Sealed; -use core::{fmt::Debug, hash::Hash, ops::Deref}; -use digest::{block_buffer::generic_array::GenericArray, Digest, OutputSizeUser}; - -#[cfg(doc)] -use crate::GitOid; - -/// Hash algorithms that can be used to make a [`GitOid`]. -/// -/// This is a sealed trait to ensure it's only used for hash -/// algorithms which are actually supported by Git. -/// -/// For more information on sealed traits, read Predrag -/// Gruevski's ["A Definitive Guide to Sealed Traits in Rust"][1]. -/// -/// [1]: https://predr.ag/blog/definitive-guide-to-sealed-traits-in-rust/ -pub trait HashAlgorithm: Sealed { - /// The name of the hash algorithm in lowercase ASCII. - #[doc(hidden)] - const NAME: &'static str; - - /// The actual digest type used by the algorithm. - #[doc(hidden)] - type Alg: Digest; - - /// The array type generated by the hash. - #[doc(hidden)] - type Array: Copy + PartialEq + Ord + Hash + Debug + Deref; - - /// Helper function to convert the GenericArray type to Self::Array - #[doc(hidden)] - fn array_from_generic( - arr: GenericArray::OutputSize>, - ) -> Self::Array; - - /// Get an instance of the digester. - #[doc(hidden)] - fn new() -> Self::Alg; -} - -#[doc(hidden)] -#[macro_export] -#[allow(unused_macros)] -macro_rules! impl_hash_algorithm { - ( $type:ident, $alg_ty:ty, $name:literal ) => { - impl Sealed for $type {} - - impl HashAlgorithm for $type { - const NAME: &'static str = $name; - - type Alg = $alg_ty; - - type Array = GenericArray::OutputSize>; - - fn array_from_generic( - arr: GenericArray::OutputSize>, - ) -> Self::Array { - arr - } - - fn new() -> Self::Alg { - Self::Alg::new() - } - } - }; -} diff --git a/gitoid/src/internal.rs b/gitoid/src/internal.rs deleted file mode 100644 index baeaf83..0000000 --- a/gitoid/src/internal.rs +++ /dev/null @@ -1,309 +0,0 @@ -//! A gitoid representing a single artifact. - -use crate::{Error, GitOid, HashAlgorithm, ObjectType, Result}; -use core::marker::PhantomData; -use digest::{block_buffer::generic_array::GenericArray, Digest, OutputSizeUser}; - -/// Generate a GitOid from data in a buffer of bytes. -/// -/// If data is small enough to fit in memory, then generating a GitOid for it -/// this way should be much faster, as it doesn't require seeking. -pub(crate) fn gitoid_from_buffer( - digester: H::Alg, - reader: &[u8], - expected_len: usize, -) -> Result> -where - H: HashAlgorithm, - O: ObjectType, -{ - let expected_hash_length = ::output_size(); - let (hash, amount_read) = hash_from_buffer::(digester, reader, expected_len)?; - - if amount_read != expected_len { - return Err(Error::UnexpectedReadLength { - expected: expected_len, - observed: amount_read, - }); - } - - if hash.len() != expected_hash_length { - return Err(Error::UnexpectedHashLength { - expected: expected_hash_length, - observed: hash.len(), - }); - } - - Ok(GitOid { - _phantom: PhantomData, - value: H::array_from_generic(hash), - }) -} - -#[cfg(feature = "std")] -pub(crate) use standard_impls::gitoid_from_reader; - -#[cfg(feature = "async")] -pub(crate) use async_impls::gitoid_from_async_reader; - -/// Helper function which actually applies the [`GitOid`] construction rules. -/// -/// This function handles actually constructing the hash with the GitOID prefix, -/// and delegates to a buffered reader for performance of the chunked reading. -fn hash_from_buffer( - mut digester: D, - buffer: &[u8], - expected_len: usize, -) -> Result<(GenericArray, usize)> -where - D: Digest, - O: ObjectType, -{ - let hashed_len = expected_len - num_carriage_returns_in_buffer(buffer); - digest_gitoid_header(&mut digester, O::NAME, hashed_len); - digest_with_normalized_newlines(&mut digester, buffer); - Ok((digester.finalize(), expected_len)) -} - -/// Digest the "header" required for a GitOID. -#[inline] -fn digest_gitoid_header(digester: &mut D, object_type: &str, object_len: usize) -where - D: Digest, -{ - digester.update(object_type.as_bytes()); - digester.update(b" "); - digester.update(object_len.to_string().as_bytes()); - digester.update(b"\0"); -} - -/// Update a hash digest with data in a buffer, normalizing newlines. -#[inline] -fn digest_with_normalized_newlines(digester: &mut D, buf: &[u8]) -where - D: Digest, -{ - for chunk in buf.chunk_by(|char1, _| *char1 != b'\r') { - let chunk = match chunk.last() { - // Omit the carriage return at the end of the chunk. - Some(b'\r') => &chunk[0..(chunk.len() - 1)], - _ => chunk, - }; - - digester.update(chunk) - } -} - -/// Count carriage returns in an in-memory buffer. -#[inline(always)] -fn num_carriage_returns_in_buffer(buffer: &[u8]) -> usize { - bytecount::count(buffer, b'\r') -} - -#[cfg(feature = "std")] -mod standard_impls { - use crate::{ - util::for_each_buf_fill::ForEachBufFill as _, Error, GitOid, HashAlgorithm, ObjectType, - Result, - }; - use core::marker::PhantomData; - use digest::{block_buffer::generic_array::GenericArray, Digest, OutputSizeUser}; - use std::io::{BufReader, Read, Seek, SeekFrom}; - - /// Generate a GitOid by reading from an arbitrary reader. - pub(crate) fn gitoid_from_reader( - digester: H::Alg, - reader: R, - expected_len: usize, - ) -> Result> - where - H: HashAlgorithm, - O: ObjectType, - R: Read + Seek, - { - let expected_hash_length = ::output_size(); - let (hash, amount_read) = hash_from_reader::(digester, reader, expected_len)?; - - if amount_read != expected_len { - return Err(Error::UnexpectedReadLength { - expected: expected_len, - observed: amount_read, - }); - } - - if hash.len() != expected_hash_length { - return Err(Error::UnexpectedHashLength { - expected: expected_hash_length, - observed: hash.len(), - }); - } - - Ok(GitOid { - _phantom: PhantomData, - value: H::array_from_generic(hash), - }) - } - - /// Read a seek-able stream and reset to the beginning when done. - fn read_and_reset(reader: R, f: F) -> Result<(usize, R)> - where - R: Read + Seek, - F: Fn(R) -> Result<(usize, R)>, - { - let (data, mut reader) = f(reader)?; - reader.seek(SeekFrom::Start(0))?; - Ok((data, reader)) - } - - /// Count carriage returns in a reader. - fn num_carriage_returns_in_reader(reader: R) -> Result<(usize, R)> - where - R: Read + Seek, - { - read_and_reset(reader, |reader| { - let mut buf_reader = BufReader::new(reader); - let mut total_dos_newlines = 0; - - buf_reader.for_each_buf_fill(|buf| { - // The number of separators is the number of chunks minus one. - total_dos_newlines += buf.chunk_by(|char1, _| *char1 != b'\r').count() - 1 - })?; - - Ok((total_dos_newlines, buf_reader.into_inner())) - }) - } - - /// Helper function which actually applies the [`GitOid`] construction rules. - /// - /// This function handles actually constructing the hash with the GitOID prefix, - /// and delegates to a buffered reader for performance of the chunked reading. - fn hash_from_reader( - mut digester: D, - reader: R, - expected_len: usize, - ) -> Result<(GenericArray, usize)> - where - D: Digest, - O: ObjectType, - R: Read + Seek, - { - let (num_carriage_returns, reader) = num_carriage_returns_in_reader(reader)?; - let hashed_len = expected_len - num_carriage_returns; - - super::digest_gitoid_header(&mut digester, O::NAME, hashed_len); - let amount_read = BufReader::new(reader) - .for_each_buf_fill(|b| super::digest_with_normalized_newlines(&mut digester, b))?; - - Ok((digester.finalize(), amount_read)) - } -} - -#[cfg(feature = "async")] -mod async_impls { - use crate::{Error, GitOid, HashAlgorithm, ObjectType, Result}; - use core::marker::PhantomData; - use digest::{block_buffer::generic_array::GenericArray, Digest, OutputSizeUser}; - use std::io::SeekFrom; - use tokio::io::{ - AsyncBufReadExt as _, AsyncRead, AsyncSeek, AsyncSeekExt as _, BufReader as AsyncBufReader, - }; - - use super::digest_with_normalized_newlines; - - /// Async version of `gitoid_from_reader`. - pub(crate) async fn gitoid_from_async_reader( - digester: H::Alg, - reader: R, - expected_len: usize, - ) -> Result> - where - H: HashAlgorithm, - O: ObjectType, - R: AsyncRead + AsyncSeek + Unpin, - { - let expected_hash_len = ::output_size(); - let (hash, amount_read) = - hash_from_async_buffer::(digester, reader, expected_len).await?; - - if amount_read != expected_len { - return Err(Error::UnexpectedHashLength { - expected: expected_len, - observed: amount_read, - }); - } - - if hash.len() != expected_hash_len { - return Err(Error::UnexpectedHashLength { - expected: expected_hash_len, - observed: hash.len(), - }); - } - - Ok(GitOid { - _phantom: PhantomData, - value: H::array_from_generic(hash), - }) - } - - /// Async version of `hash_from_buffer`. - async fn hash_from_async_buffer( - mut digester: D, - reader: R, - expected_len: usize, - ) -> Result<(GenericArray, usize)> - where - D: Digest, - O: ObjectType, - R: AsyncRead + AsyncSeek + Unpin, - { - let (num_carriage_returns, reader) = num_carriage_returns_in_async_reader(reader).await?; - let hashed_len = expected_len - num_carriage_returns; - - super::digest_gitoid_header(&mut digester, O::NAME, hashed_len); - - let mut reader = AsyncBufReader::new(reader); - let mut total_read = 0; - - loop { - let buffer = reader.fill_buf().await?; - let amount_read = buffer.len(); - - if amount_read == 0 { - break; - } - - digest_with_normalized_newlines(&mut digester, buffer); - - reader.consume(amount_read); - total_read += amount_read; - } - - Ok((digester.finalize(), total_read)) - } - - /// Count carriage returns in a reader. - async fn num_carriage_returns_in_async_reader(reader: R) -> Result<(usize, R)> - where - R: AsyncRead + AsyncSeek + Unpin, - { - let mut reader = AsyncBufReader::new(reader); - let mut total_dos_newlines = 0; - - loop { - let buffer = reader.fill_buf().await?; - let amount_read = buffer.len(); - - if amount_read == 0 { - break; - } - - total_dos_newlines += buffer.chunk_by(|char1, _| *char1 != b'\r').count() - 1; - - reader.consume(amount_read); - } - - let (data, mut reader) = (total_dos_newlines, reader.into_inner()); - reader.seek(SeekFrom::Start(0)).await?; - Ok((data, reader)) - } -} diff --git a/gitoid/src/object_type.rs b/gitoid/src/object_type.rs deleted file mode 100644 index 2d81d90..0000000 --- a/gitoid/src/object_type.rs +++ /dev/null @@ -1,61 +0,0 @@ -//! The types of objects for which a `GitOid` can be made. - -use crate::sealed::Sealed; - -#[cfg(doc)] -use crate::GitOid; - -/// Object types usable to construct a [`GitOid`]. -/// -/// This is a sealed trait to ensure it's only used for hash -/// algorithms which are actually supported by Git. -/// -/// For more information on sealed traits, read Predrag -/// Gruevski's ["A Definitive Guide to Sealed Traits in Rust"][1]. -/// -/// [1]: https://predr.ag/blog/definitive-guide-to-sealed-traits-in-rust/ -pub trait ObjectType: Sealed { - #[doc(hidden)] - const NAME: &'static str; -} - -macro_rules! define_object_type { - ( $name:tt, $s:literal ) => { - impl Sealed for $name {} - - impl ObjectType for $name { - const NAME: &'static str = $s; - } - }; -} - -/// A Blob GitOid object. -pub struct Blob { - #[doc(hidden)] - _private: (), -} - -define_object_type!(Blob, "blob"); - -/// A Tree GitOid object. -pub struct Tree { - #[doc(hidden)] - _private: (), -} - -define_object_type!(Tree, "tree"); - -/// A Tag GitOid object. -pub struct Tag { - #[doc(hidden)] - _private: (), -} - -define_object_type!(Tag, "tag"); - -/// A Commit GitOid object. -pub struct Commit { - _private: (), -} - -define_object_type!(Commit, "commit"); diff --git a/gitoid/src/tests.rs b/gitoid/src/tests.rs deleted file mode 100644 index a59eacc..0000000 --- a/gitoid/src/tests.rs +++ /dev/null @@ -1,259 +0,0 @@ -use super::*; - -#[cfg(all(feature = "hash-sha1", feature = "backend-rustcrypto"))] -use crate::rustcrypto::Sha1; - -#[cfg(all(feature = "hash-sha256", feature = "backend-rustcrypto"))] -use crate::rustcrypto::Sha256; - -#[cfg(feature = "async")] -use tokio::{fs::File as AsyncFile, runtime::Runtime}; - -#[cfg(feature = "std")] -use { - crate::{Blob, GitOid}, - serde_test::{assert_tokens, Token}, - std::fs::File, - url::Url, -}; - -/// SHA-1 hash of a file containing "hello world" -/// -/// Taken from a Git repo as ground truth. -#[cfg(feature = "hash-sha1")] -const GITOID_HELLO_WORLD_SHA1: &str = "gitoid:blob:sha1:95d09f2b10159347eece71399a7e2e907ea3df4f"; - -/// SHA-256 hash of a file containing "hello world" -/// -/// Taken from a Git repo as ground truth. -#[cfg(feature = "hash-sha256")] -const GITOID_HELLO_WORLD_SHA256: &str = - "gitoid:blob:sha256:fee53a18d32820613c0527aa79be5cb30173c823a9b448fa4817767cc84c6f03"; - -#[cfg(all(feature = "hash-sha1", feature = "backend-rustcrypto", feature = "std"))] -#[test] -fn generate_sha1_gitoid_from_bytes() { - let input = b"hello world"; - let result = GitOid::::id_bytes(input); - - assert_eq!(result.to_string(), GITOID_HELLO_WORLD_SHA1); -} - -#[cfg(all(feature = "hash-sha1", feature = "backend-rustcrypto", feature = "std"))] -#[test] -fn generate_sha1_gitoid_from_buffer() -> Result<()> { - let reader = File::open("test/data/hello_world.txt")?; - let result = GitOid::::id_reader(reader)?; - - assert_eq!(result.to_string(), GITOID_HELLO_WORLD_SHA1); - - Ok(()) -} - -#[cfg(all(feature = "hash-sha256", feature = "backend-rustcrypto"))] -#[test] -fn generate_sha256_gitoid_from_bytes() { - let input = b"hello world"; - let result = GitOid::::id_bytes(input); - - assert_eq!(result.to_string(), GITOID_HELLO_WORLD_SHA256); -} - -#[cfg(all( - feature = "hash-sha256", - feature = "backend-rustcrypto", - feature = "std" -))] -#[test] -fn generate_sha256_gitoid_from_buffer() -> Result<()> { - let reader = File::open("test/data/hello_world.txt")?; - let result = GitOid::::id_reader(reader)?; - - assert_eq!(result.to_string(), GITOID_HELLO_WORLD_SHA256); - - Ok(()) -} - -#[cfg(all( - feature = "hash-sha256", - feature = "backend-rustcrypto", - feature = "async" -))] -#[test] -fn generate_sha256_gitoid_from_async_buffer() -> Result<()> { - let runtime = Runtime::new()?; - runtime.block_on(async { - let reader = AsyncFile::open("test/data/hello_world.txt").await?; - let result = GitOid::::id_async_reader(reader).await?; - - assert_eq!(result.to_string(), GITOID_HELLO_WORLD_SHA256); - - Ok(()) - }) -} - -#[cfg(all(feature = "hash-sha256", feature = "backend-rustcrypto"))] -#[test] -fn newline_normalization_from_file() -> Result<()> { - let unix_file = File::open("test/data/unix_line.txt")?; - let windows_file = File::open("test/data/windows_line.txt")?; - - let unix_gitoid = GitOid::::id_reader(unix_file)?; - let windows_gitoid = GitOid::::id_reader(windows_file)?; - - assert_eq!(unix_gitoid.to_string(), windows_gitoid.to_string()); - - Ok(()) -} - -#[cfg(all( - feature = "hash-sha256", - feature = "backend-rustcrypto", - feature = "async" -))] -#[test] -fn newline_normalization_from_async_file() -> Result<()> { - let runtime = Runtime::new()?; - runtime.block_on(async { - let unix_file = AsyncFile::open("test/data/unix_line.txt").await?; - let windows_file = AsyncFile::open("test/data/windows_line.txt").await?; - - let unix_gitoid = GitOid::::id_async_reader(unix_file).await?; - let windows_gitoid = GitOid::::id_async_reader(windows_file).await?; - - assert_eq!(unix_gitoid.to_string(), windows_gitoid.to_string()); - - Ok(()) - }) -} - -#[cfg(all(feature = "hash-sha256", feature = "backend-rustcrypto"))] -#[test] -fn newline_normalization_in_memory() -> Result<()> { - let with_crlf = b"some\r\nstring\r\n"; - let wout_crlf = b"some\nstring\n"; - - let with_crlf_gitoid = GitOid::::id_bytes(&with_crlf[..]); - let wout_crlf_gitoid = GitOid::::id_bytes(&wout_crlf[..]); - - assert_eq!(with_crlf_gitoid.to_string(), wout_crlf_gitoid.to_string()); - - Ok(()) -} - -#[cfg(all(feature = "hash-sha256", feature = "backend-rustcrypto"))] -#[test] -fn validate_uri() -> Result<()> { - let content = b"hello world"; - let gitoid = GitOid::::id_bytes(content); - - assert_eq!(gitoid.url().to_string(), GITOID_HELLO_WORLD_SHA256); - - Ok(()) -} - -#[cfg(all( - feature = "hash-sha256", - feature = "backend-rustcrypto", - feature = "std" -))] -#[test] -fn try_from_url_bad_scheme() { - let url = Url::parse("gitiod:blob:sha1:95d09f2b10159347eece71399a7e2e907ea3df4f").unwrap(); - - match GitOid::::try_from_url(url) { - Ok(_) => panic!("gitoid parsing should fail"), - Err(e) => assert_eq!(e.to_string(), "invalid scheme in URL 'gitiod'"), - } -} - -#[cfg(all(feature = "hash-sha1", feature = "backend-rustcrypto", feature = "std"))] -#[test] -fn try_from_url_missing_object_type() { - let url = Url::parse("gitoid:").unwrap(); - - match GitOid::::try_from_url(url) { - Ok(_) => panic!("gitoid parsing should fail"), - Err(e) => assert_eq!(e.to_string(), "missing object type in URL 'gitoid:'"), - } -} - -#[cfg(all(feature = "hash-sha1", feature = "backend-rustcrypto", feature = "std"))] -#[test] -fn try_from_url_bad_object_type() { - let url = Url::parse("gitoid:whatever").unwrap(); - - match GitOid::::try_from_url(url) { - Ok(_) => panic!("gitoid parsing should fail"), - Err(e) => assert_eq!(e.to_string(), "mismatched object type; expected 'blob'"), - } -} - -#[cfg(all( - feature = "hash-sha256", - feature = "backend-rustcrypto", - feature = "std" -))] -#[test] -fn try_from_url_missing_hash_algorithm() { - let url = Url::parse("gitoid:blob:").unwrap(); - - match GitOid::::try_from_url(url) { - Ok(_) => panic!("gitoid parsing should fail"), - Err(e) => assert_eq!( - e.to_string(), - "missing hash algorithm in URL 'gitoid:blob:'" - ), - } -} - -#[cfg(all(feature = "hash-sha1", feature = "backend-rustcrypto", feature = "std"))] -#[test] -fn try_from_url_bad_hash_algorithm() { - let url = Url::parse("gitoid:blob:sha10000").unwrap(); - - match GitOid::::try_from_url(url) { - Ok(_) => panic!("gitoid parsing should fail"), - Err(e) => assert_eq!(e.to_string(), "mismatched hash algorithm; expected 'sha1'"), - } -} - -#[cfg(all( - feature = "hash-sha256", - feature = "backend-rustcrypto", - feature = "std" -))] -#[test] -fn try_from_url_missing_hash() { - let url = Url::parse("gitoid:blob:sha256:").unwrap(); - - match GitOid::::try_from_url(url) { - Ok(_) => panic!("gitoid parsing should fail"), - Err(e) => assert_eq!(e.to_string(), "missing hash in URL 'gitoid:blob:sha256:'"), - } -} - -#[cfg(all( - feature = "hash-sha256", - feature = "backend-rustcrypto", - feature = "std" -))] -#[test] -fn try_url_roundtrip() { - let url = Url::parse(GITOID_HELLO_WORLD_SHA256).unwrap(); - let gitoid = GitOid::::try_from_url(url.clone()).unwrap(); - let output = gitoid.url(); - assert_eq!(url, output); -} - -// Validate serialization and deserialization work as expected. -#[cfg(all( - feature = "std", - feature = "hash-sha256", - feature = "backend-rustcrypto" -))] -#[test] -fn valid_gitoid_ser_de() { - let id = GitOid::::id_str("hello world"); - assert_tokens(&id, &[Token::Str(GITOID_HELLO_WORLD_SHA256)]); -} diff --git a/gitoid/src/util/mod.rs b/gitoid/src/util/mod.rs deleted file mode 100644 index ab70c22..0000000 --- a/gitoid/src/util/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -//! Helper modules for use internally. - -pub(crate) mod for_each_buf_fill; -pub(crate) mod stream_len; diff --git a/gitoid/test/data/hello_world.txt b/gitoid/test/data/hello_world.txt deleted file mode 100644 index 95d09f2..0000000 --- a/gitoid/test/data/hello_world.txt +++ /dev/null @@ -1 +0,0 @@ -hello world \ No newline at end of file diff --git a/omnibor-cli/src/cli.rs b/omnibor-cli/src/cli.rs index 7d16851..00082c7 100644 --- a/omnibor-cli/src/cli.rs +++ b/omnibor-cli/src/cli.rs @@ -3,7 +3,10 @@ use crate::error::Error; use clap::{builder::PossibleValue, ValueEnum}; use clap_verbosity_flag::{InfoLevel, Verbosity}; -use omnibor::{hashes::Sha256, ArtifactId, IntoArtifactId}; +use omnibor::{ + artifact_id::{ArtifactId, ArtifactIdBuilder}, + hash_algorithm::Sha256, +}; use pathbuf::pathbuf; use std::{ default::Default, @@ -290,11 +293,13 @@ impl FromStr for IdentifiableArg { } } -impl IntoArtifactId for IdentifiableArg { - fn into_artifact_id(self) -> Result, omnibor::Error> { +impl IdentifiableArg { + pub fn into_artifact_id(self) -> Result, omnibor::error::Error> { match self { IdentifiableArg::ArtifactId(aid) => Ok(aid), - IdentifiableArg::Path(path) => path.into_artifact_id(), + IdentifiableArg::Path(path) => { + ArtifactIdBuilder::with_rustcrypto().identify_path(&path) + } } } } diff --git a/omnibor-cli/src/cmd/manifest/create.rs b/omnibor-cli/src/cmd/manifest/create.rs index e0b60d9..0fa875e 100644 --- a/omnibor-cli/src/cmd/manifest/create.rs +++ b/omnibor-cli/src/cmd/manifest/create.rs @@ -6,10 +6,14 @@ use crate::{ error::{Error, Result}, }; use omnibor::{ - embedding::{EmbeddingMode, NoEmbed}, - hashes::Sha256, + artifact_id::{ArtifactId, ArtifactIdBuilder}, + hash_algorithm::Sha256, + hash_provider::{HashProvider, RustCrypto}, + input_manifest::{ + embedding_mode::{EmbeddingMode, NoEmbed}, + InputManifestBuilder, ShouldStore, + }, storage::{FileSystemStorage, Storage}, - ArtifactId, InputManifestBuilder, IntoArtifactId, ShouldStore, }; use pathbuf::pathbuf; use std::{ @@ -28,19 +32,21 @@ pub async fn run(app: &App, args: &ManifestCreateArgs) -> Result<()> { } let root = app.args.dir().ok_or(Error::NoRoot)?; - let storage = FileSystemStorage::new(root).map_err(Error::StorageInitFailed)?; - let builder = InputManifestBuilder::::with_storage(storage); + let storage = + FileSystemStorage::new(RustCrypto::new(), root).map_err(Error::StorageInitFailed)?; + let builder = InputManifestBuilder::::new(storage, RustCrypto::new()); create_with_builder(args, builder)?; Ok(()) } -fn create_with_builder( +fn create_with_builder( args: &ManifestCreateArgs, - mut builder: InputManifestBuilder, + mut builder: InputManifestBuilder, ) -> Result<()> where E: EmbeddingMode, S: Storage, + P: HashProvider, { for input in &args.inputs { let aid = input.clone().into_artifact_id().map_err(Error::IdFailed)?; @@ -66,8 +72,9 @@ where Ok(file) => file, Err(source) => { let mut existing_file = File::open(&path).unwrap(); - let existing_file_aid = - ArtifactId::::id_reader(&mut existing_file).unwrap(); + let existing_file_aid = ArtifactIdBuilder::with_rustcrypto() + .identify_file(&mut existing_file) + .unwrap(); if existing_file_aid == linked_manifest.manifest_aid() { info!("matching manifest already found at '{}'", path.display()); return Ok(()); @@ -82,7 +89,7 @@ where output_file // SAFETY: We just constructed the manifest, so we know it's fine. - .write_all(&linked_manifest.manifest().as_bytes().unwrap()) + .write_all(&linked_manifest.manifest().as_bytes()) .map_err(|source| Error::CantWriteManifest { path: path.to_path_buf(), source, diff --git a/omnibor-cli/src/error.rs b/omnibor-cli/src/error.rs index bd4a3f6..49b55dd 100644 --- a/omnibor-cli/src/error.rs +++ b/omnibor-cli/src/error.rs @@ -1,7 +1,7 @@ //! Error types. use async_channel::SendError; -use omnibor::Error as OmniborError; +use omnibor::error::Error as OmniborError; use serde_json::Error as JsonError; use std::{io::Error as IoError, path::PathBuf, result::Result as StdResult}; use tokio::task::JoinError; diff --git a/omnibor-cli/src/fs.rs b/omnibor-cli/src/fs.rs index 34ea3e3..be4fa82 100644 --- a/omnibor-cli/src/fs.rs +++ b/omnibor-cli/src/fs.rs @@ -9,7 +9,10 @@ use crate::{ use async_channel::{bounded, Receiver, Sender as WorkSender}; use async_walkdir::{DirEntry as AsyncDirEntry, WalkDir}; use futures_util::{pin_mut, StreamExt}; -use omnibor::{hashes::Sha256, ArtifactId}; +use omnibor::{ + artifact_id::{ArtifactId, ArtifactIdBuilder}, + hash_algorithm::Sha256, +}; use std::path::{Path, PathBuf}; use tokio::{fs::File as AsyncFile, task::JoinSet}; use tracing::debug; @@ -179,7 +182,8 @@ pub async fn open_async_file(path: &Path) -> Result { /// Identify a file using a SHA-256 hash. pub async fn sha256_id_async_file(file: &mut AsyncFile, path: &Path) -> Result> { - ArtifactId::id_async_reader(file) + ArtifactIdBuilder::with_rustcrypto() + .identify_async_file(file) .await .map_err(|source| Error::FileFailedToId { path: path.to_path_buf(), diff --git a/omnibor/Cargo.toml b/omnibor/Cargo.toml index 399bc35..7143086 100644 --- a/omnibor/Cargo.toml +++ b/omnibor/Cargo.toml @@ -32,30 +32,67 @@ crate-type = [ [dependencies] -gitoid = { version = "0.9.0", path = "../gitoid", default-features = false, features = [ - "async", - "backend-rustcrypto", - "hash-sha256", - "std", -] } -newline-converter = "0.3.0" -pathbuf = "1.0.0" -serde = { version = "1.0.197", optional = true } -thiserror = "1.0.60" -tokio = { version = "1.36.0", features = ["io-util"] } +# NOTE: Must match the version used in the hash crate. +# +# Technically, we could rely on the re-export from one of those crates, +# but since all the hash crates are optional dependencies our usage code +# within the 'gitoid' crate would be more complex to handle the possibility +# for any/all of them to be missing. It's simpler to just specify it here +# so we know we always get the crate. +digest = { version = "0.10.7", features = ["std"] } + +# Cryptography Providers. +boring = { version = "4.6.0", optional = true } +openssl = { version = "0.10.66", optional = true } +sha2 = { version = "0.10.8", features = ["std"], optional = true } + +# Required crates. +bytecount = { version = "0.6.8", features = ["runtime-dispatch-simd"] } +hex = "0.4.3" +serde = { version = "1.0.197", features = ["derive"] } +thiserror = "2.0.0" +tokio = { version = "1.36.0", features = ["io-util", "fs"] } tracing = "0.1.40" -url = "2.5.0" -walkdir = "2.5.0" +url = "2.4.1" +paste = "1.0.15" +walkdir = { version = "2.5.0" } [dev-dependencies] +anyhow = "1.0.95" +criterion = { version = "0.5.1" } +# NOTE: Match version in "dependencies" digest = "0.10.7" -pathbuf = "1.0.0" serde_test = "1.0.176" -tokio = { version = "1.36.0", features = ["io-util", "fs"] } +# Need "rt" and "fs" additionally for tests. +tokio = { version = "1.36.0", features = [ + "io-util", + "fs", + "rt", + "rt-multi-thread", +] } tokio-test = "0.4.3" [features] -# Support serde serialization and deserialization -serde = ["dep:serde", "gitoid/std"] +# By default, you get: +# +# - Async support. +# - The 'rustcrypto' backend. +# - Standard library support. +default = ["backend-rustcrypto"] + +# Enable using RustCrypto as a cryptography backend. +backend-rustcrypto = ["dep:sha2"] + +# Enable using BoringSLL as a cryptography backend. +backend-boringssl = ["dep:boring"] + +# Enable using OpenSSL as a cryptography backend. +backend-openssl = ["dep:openssl"] + +# Removed until the benchmark can be properly restored. +# +# [[bench]] +# name = "benchmark" +# harness = false diff --git a/gitoid/benches/benchmark.rs b/omnibor/new-benches/benchmark.rs similarity index 96% rename from gitoid/benches/benchmark.rs rename to omnibor/new-benches/benchmark.rs index 8a14e54..1a870c7 100644 --- a/gitoid/benches/benchmark.rs +++ b/omnibor/new-benches/benchmark.rs @@ -1,19 +1,19 @@ //! Benchmarks comparing cryptography backends. use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use gitoid::{Blob, GitOid}; +use omnibor::ArtifactId; #[cfg(all(feature = "backend-boringssl"))] -use gitoid::boringssl::{Sha1 as BoringSha1, Sha256 as BoringSha256}; +use omnibor::gitoid::boringssl::{Sha1 as BoringSha1, Sha256 as BoringSha256}; #[cfg(all(feature = "backend-openssl"))] -use gitoid::openssl::{Sha1 as OpenSSLSha1, Sha256 as OpenSSLSha256}; +use omnibor::gitoid::openssl::{Sha1 as OpenSSLSha1, Sha256 as OpenSSLSha256}; #[cfg(all(feature = "backend-rustcrypto", feature = "hash-sha1"))] -use gitoid::rustcrypto::Sha1 as RustSha1; +use omnibor::gitoid::rustcrypto::Sha1 as RustSha1; #[cfg(all(feature = "backend-rustcrypto", feature = "hash-sha256"))] -use gitoid::rustcrypto::Sha256 as RustSha256; +use omnibor::gitoid::rustcrypto::Sha256 as RustSha256; #[cfg(not(any( feature = "backend-rustcrypto", diff --git a/omnibor/src/artifact_id.rs b/omnibor/src/artifact_id.rs deleted file mode 100644 index f8ff32b..0000000 --- a/omnibor/src/artifact_id.rs +++ /dev/null @@ -1,535 +0,0 @@ -#[cfg(doc)] -use crate::hashes::Sha256; -use crate::hashes::SupportedHash; -use crate::Error; -use crate::InputManifest; -use crate::Result; -use gitoid::Blob; -use gitoid::GitOid; -#[cfg(feature = "serde")] -use serde::de::Deserializer; -#[cfg(feature = "serde")] -use serde::Deserialize; -#[cfg(feature = "serde")] -use serde::Serialize; -#[cfg(feature = "serde")] -use serde::Serializer; -use std::cmp::Ordering; -use std::fmt::Debug; -use std::fmt::Display; -use std::fmt::Formatter; -use std::fmt::Result as FmtResult; -use std::hash::Hash; -use std::hash::Hasher; -use std::io::Read; -use std::io::Seek; -use std::path::PathBuf; -#[cfg(feature = "serde")] -use std::result::Result as StdResult; -use std::str::FromStr; -use tokio::io::AsyncRead; -use tokio::io::AsyncSeek; -use url::Url; - -/// An OmniBOR Artifact Identifier. -/// -/// This is a content-based unique identifier for any software artifact. -/// -/// It is built around, per the specification, any supported hash algorithm. -/// Currently, only SHA-256 is supported, but others may be added in the future. -pub struct ArtifactId { - #[doc(hidden)] - gitoid: GitOid, -} - -impl ArtifactId { - /// Construct an [`ArtifactId`] from an existing [`GitOid`]. - /// - /// This produces an identifier using the provided [`GitOid`] directly, - /// without additional validation. The type system ensures the [`GitOid`] - /// hash algorithm is one supported for an [`ArtifactId`], and that the - /// object type is [`gitoid::Blob`]. - /// - /// # Note - /// - /// This function is not exported because we don't re-export the `GitOid` - /// type we use, which would mean users of the crate would themselves - /// need to import a binary-compatible version of the `GitOid` crate as - /// well. This is extra complexity for minimal gain, so we don't support it. - /// - /// If it were ever absolutely needed in the future, we might expose this - /// constructor with a `#[doc(hidden)]` attribute, or with documentation - /// which clearly outlines the extra complexity. - fn from_gitoid(gitoid: GitOid) -> ArtifactId { - ArtifactId { gitoid } - } - - /// Construct an [`ArtifactId`] from raw bytes. - /// - /// This hashes the bytes to produce an identifier. - /// - /// # Note - /// - /// Generally, `ArtifactId`s are produced so independent parties - /// can compare ID's in the future. It's generally not useful to identify - /// artifacts which are never persisted or shared in some way. - /// - /// # Example - /// - /// ```rust - /// # use omnibor::ArtifactId; - /// # use omnibor::hashes::Sha256; - /// let id: ArtifactId = ArtifactId::id_bytes(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); - /// println!("Artifact ID: {}", id); - /// ``` - pub fn id_bytes>(content: B) -> ArtifactId { - ArtifactId::from_gitoid(GitOid::id_bytes(content)) - } - - /// Construct an [`ArtifactId`] from a string. - /// - /// This hashes the contents of the string to produce an identifier. - /// - /// # Note - /// - /// Generally, `ArtifactId`s are produced so independent parties - /// can compare ID's in the future. It's generally not useful to identify - /// artifacts which are never persisted or shared in some way. - /// - /// # Example - /// - /// ```rust - /// # use omnibor::ArtifactId; - /// # use omnibor::hashes::Sha256; - /// let id: ArtifactId = ArtifactId::id_str("hello, world"); - /// println!("Artifact ID: {}", id); - /// ``` - pub fn id_str>(s: S) -> ArtifactId { - ArtifactId::from_gitoid(GitOid::id_str(s)) - } - - /// Construct an [`ArtifactId`] from a synchronous reader. - /// - /// This reads the content of the reader and hashes it to produce an identifier. - /// - /// Note that this will figure out the expected size in bytes of the content - /// being read by seeking to the end of the content and then back to wherever the - /// reading initially started. This is to enable a correctness check where the total - /// number of bytes hashed is checked against the expected length. If they do not - /// match, we return an [`Error`] rather than proceeding with a potentially-invalid - /// identifier. - /// - /// If you don't want this seeking to occur, you can use - /// [`ArtifactId::id_reader_with_length`] instead, which takes an explicit expected - /// length and checks against _that_ value, rather than inferring an expected length. - /// - /// Also note that this doesn't reset the reader to the beginning of its region; if - /// you provide a reader which has already read some portion of an underlying file or - /// has seeked to a point that's not the beginning, this function will continue reading - /// from that point, and the resulting hash will _not_ encompass the contents of the - /// entire file. You can use [`ArtifactId::id_reader_with_length`] and provide the - /// expected length of the full file in bytes to defend against this "partial hash" - /// error. - /// - /// Reads are buffered internally to reduce the number of syscalls and context switches - /// between the kernel and user code. - /// - /// # Example - /// - /// ```rust - /// # use omnibor::ArtifactId; - /// # use omnibor::hashes::Sha256; - /// # use std::fs::File; - /// let file = File::open("test/data/hello_world.txt").unwrap(); - /// let id: ArtifactId = ArtifactId::id_reader(&file).unwrap(); - /// println!("Artifact ID: {}", id); - /// ``` - pub fn id_reader(reader: R) -> Result> { - let gitoid = GitOid::id_reader(reader)?; - Ok(ArtifactId::from_gitoid(gitoid)) - } - - /// Construct an [`ArtifactId`] from a synchronous reader with an expected length. - /// - /// This reads the content of the reader and hashes it to produce an identifier. - /// - /// This uses the `expected_len` to enable a correctness check where the total - /// number of bytes hashed is checked against the expected length. If they do not - /// match, we return an [`Error`] rather than proceeding with a potentially-invalid - /// identifier. - /// - /// Also note that this doesn't reset the reader to the beginning of its region; if - /// you provide a reader which has already read some portion of an underlying file or - /// has seeked to a point that's not the beginning, this function will continue reading - /// from that point, and the resulting hash will _not_ encompass the contents of the - /// entire file. Make sure to provide the expected number of bytes for the full file - /// to protect against this error. - /// - /// Reads are buffered internally to reduce the number of syscalls and context switches - /// between the kernel and user code. - /// - /// # Example - /// - /// ```rust - /// # use omnibor::ArtifactId; - /// # use omnibor::hashes::Sha256; - /// # use std::fs::File; - /// let file = File::open("test/data/hello_world.txt").unwrap(); - /// let id: ArtifactId = ArtifactId::id_reader_with_length(&file, 11).unwrap(); - /// println!("Artifact ID: {}", id); - /// ``` - pub fn id_reader_with_length(reader: R, expected_length: usize) -> Result> - where - R: Read + Seek, - { - let gitoid = GitOid::id_reader_with_length(reader, expected_length)?; - Ok(ArtifactId::from_gitoid(gitoid)) - } - - /// Construct an [`ArtifactId`] from an asynchronous reader. - /// - /// This reads the content of the reader and hashes it to produce an identifier. - /// - /// Reading is done asynchronously by the Tokio runtime. The specifics of how this - /// is done are based on the configuration of the runtime. - /// - /// Note that this will figure out the expected size in bytes of the content - /// being read by seeking to the end of the content and then back to wherever the - /// reading initially started. This is to enable a correctness check where the total - /// number of bytes hashed is checked against the expected length. If they do not - /// match, we return an [`Error`] rather than proceeding with a potentially-invalid - /// identifier. - /// - /// If you don't want this seeking to occur, you can use - /// [`ArtifactId::id_reader_with_length`] instead, which takes an explicit expected - /// length and checks against _that_ value, rather than inferring an expected length. - /// - /// Also note that this doesn't reset the reader to the beginning of its region; if - /// you provide a reader which has already read some portion of an underlying file or - /// has seeked to a point that's not the beginning, this function will continue reading - /// from that point, and the resulting hash will _not_ encompass the contents of the - /// entire file. You can use [`ArtifactId::id_reader_with_length`] and provide the - /// expected length of the full file in bytes to defend against this "partial hash" - /// error. - /// - /// Reads are buffered internally to reduce the number of syscalls and context switches - /// between the kernel and user code. - /// - /// # Example - /// - /// ```rust - /// # use omnibor::ArtifactId; - /// # use omnibor::hashes::Sha256; - /// # use tokio::fs::File; - /// # tokio_test::block_on(async { - /// let mut file = File::open("test/data/hello_world.txt").await.unwrap(); - /// let id: ArtifactId = ArtifactId::id_async_reader(&mut file).await.unwrap(); - /// println!("Artifact ID: {}", id); - /// # }) - /// ``` - pub async fn id_async_reader( - reader: R, - ) -> Result> { - let gitoid = GitOid::id_async_reader(reader).await?; - Ok(ArtifactId::from_gitoid(gitoid)) - } - - /// Construct an [`ArtifactId`] from an asynchronous reader with an expected length. - /// - /// This reads the content of the reader and hashes it to produce an identifier. - /// - /// Reading is done asynchronously by the Tokio runtime. The specifics of how this - /// is done are based on the configuration of the runtime. - /// - /// This uses the `expected_len` to enable a correctness check where the total - /// number of bytes hashed is checked against the expected length. If they do not - /// match, we return an [`Error`] rather than proceeding with a potentially-invalid - /// identifier. - /// - /// Also note that this doesn't reset the reader to the beginning of its region; if - /// you provide a reader which has already read some portion of an underlying file or - /// has seeked to a point that's not the beginning, this function will continue reading - /// from that point, and the resulting hash will _not_ encompass the contents of the - /// entire file. Make sure to provide the expected number of bytes for the full file - /// to protect against this error. - /// - /// Reads are buffered internally to reduce the number of syscalls and context switches - /// between the kernel and user code. - /// - /// # Example - /// - /// ```rust - /// # use omnibor::ArtifactId; - /// # use omnibor::hashes::Sha256; - /// # use tokio::fs::File; - /// # tokio_test::block_on(async { - /// let mut file = File::open("test/data/hello_world.txt").await.unwrap(); - /// let id: ArtifactId = ArtifactId::id_async_reader_with_length(&mut file, 11).await.unwrap(); - /// println!("Artifact ID: {}", id); - /// # }) - /// ``` - pub async fn id_async_reader_with_length( - reader: R, - expected_length: usize, - ) -> Result> { - let gitoid = GitOid::id_async_reader_with_length(reader, expected_length).await?; - Ok(ArtifactId::from_gitoid(gitoid)) - } - - /// Construct an [`ArtifactId`] for an [`InputManifest`]. - pub fn id_manifest(manifest: &InputManifest) -> Result { - Ok(ArtifactId::id_bytes(manifest.as_bytes()?)) - } - - /// Construct an [`ArtifactId`] from a `gitoid`-scheme [`Url`]. - /// - /// This validates that the provided URL has a hashing scheme which matches the one - /// selected for your [`ArtifactId`] (today, only `sha256` is supported), and has the - /// `blob` object type. It also validates that the provided hash is a valid hash for - /// the specified hashing scheme. If any of these checks fail, the function returns - /// an [`Error`]. - /// - /// Note that this expects a `gitoid`-scheme URL, as defined by IANA. This method - /// _does not_ expect an HTTP or HTTPS URL to access, retrieve contents, and hash - /// those contents to produce an identifier. You _can_ implement that yourself with - /// a Rust HTTP(S) crate and [`ArtifactId::id_bytes`]. - /// - /// # Example - /// - /// ```rust - /// # use omnibor::ArtifactId; - /// # use omnibor::hashes::Sha256; - /// # use url::Url; - /// let url = Url::parse("gitoid:blob:sha256:fee53a18d32820613c0527aa79be5cb30173c823a9b448fa4817767cc84c6f03").unwrap(); - /// let id: ArtifactId = ArtifactId::try_from_url(url).unwrap(); - /// println!("Artifact ID: {}", id); - /// ``` - pub fn try_from_url(url: Url) -> Result> { - ArtifactId::try_from(url) - } - - /// Try to construct an [`ArtifactId`] from a filesystem-safe representation. - pub fn try_from_safe_name(s: &str) -> Result> { - ArtifactId::from_str(&s.replace('_', ":")) - } - - /// Get the [`Url`] representation of the [`ArtifactId`]. - /// - /// This returns a `gitoid`-scheme URL for the [`ArtifactId`]. - /// - /// # Example - /// - /// ```rust - /// # use omnibor::ArtifactId; - /// # use omnibor::hashes::Sha256; - /// let id: ArtifactId = ArtifactId::id_str("hello, world"); - /// println!("Artifact ID URL: {}", id.url()); - /// ``` - pub fn url(&self) -> Url { - self.gitoid.url() - } - - /// Get a filesystem-safe representation of the [`ArtifactId`]. - /// - /// This is a conservative method that tries to use _only_ characters - /// which can be expected to work broadly cross-platform. - /// - /// What that means for us is that the `:` separator character is - /// replaced with `_`. - pub fn as_file_name(&self) -> PathBuf { - let name = self.gitoid.url().to_string().replace(':', "_"); - let mut path = PathBuf::from(name); - path.set_extension("manifest"); - path - } - - /// Get the underlying bytes of the [`ArtifactId`] hash. - /// - /// This slice is the raw underlying buffer of the [`ArtifactId`], exactly - /// as produced by the hasher. - /// - /// # Example - /// - /// ```rust - /// # use omnibor::ArtifactId; - /// # use omnibor::hashes::Sha256; - /// let id: ArtifactId = ArtifactId::id_str("hello, world"); - /// println!("Artifact ID bytes: {:?}", id.as_bytes()); - /// ``` - pub fn as_bytes(&self) -> &[u8] { - self.gitoid.as_bytes() - } - - /// Get the bytes of the [`ArtifactId`] hash as a hexadecimal string. - /// - /// This returns a [`String`] rather than [`str`] because the string must be - /// constructed on the fly, as we do not store a hexadecimal representation - /// of the hash data. - /// - /// # Example - /// - /// ```rust - /// # use omnibor::ArtifactId; - /// # use omnibor::hashes::Sha256; - /// let id: ArtifactId = ArtifactId::id_str("hello, world"); - /// println!("Artifact ID bytes as hex: {}", id.as_hex()); - /// ``` - pub fn as_hex(&self) -> String { - self.gitoid.as_hex() - } - - /// Get the name of the hash algorithm used in the [`ArtifactId`] as a string. - /// - /// For [`Sha256`], this is the string `"sha256"`. - /// - /// # Example - /// - /// ```rust - /// # use omnibor::ArtifactId; - /// # use omnibor::hashes::Sha256; - /// let id: ArtifactId = ArtifactId::id_str("hello, world"); - /// println!("Artifact ID hash algorithm: {}", id.hash_algorithm()); - /// ``` - pub const fn hash_algorithm(&self) -> &'static str { - self.gitoid.hash_algorithm() - } - - /// Get the object type used in the [`ArtifactId`] as a string. - /// - /// For all [`ArtifactId`]s this is `"blob"`, but the method is provided - /// for completeness nonetheless. - /// - /// # Example - /// - /// ```rust - /// # use omnibor::ArtifactId; - /// # use omnibor::hashes::Sha256; - /// let id: ArtifactId = ArtifactId::id_str("hello, world"); - /// println!("Artifact ID object type: {}", id.object_type()); - /// ``` - pub const fn object_type(&self) -> &'static str { - self.gitoid.object_type() - } - - /// Get the length in bytes of the hash used in the [`ArtifactId`]. - /// - /// In the future this method will be `const`, but is not able to be - /// today due to limitations in the Rust cryptography crates we use - /// internally. - /// - /// # Example - /// - /// ```rust - /// # use omnibor::ArtifactId; - /// # use omnibor::hashes::Sha256; - /// let id: ArtifactId = ArtifactId::id_str("hello, world"); - /// println!("Artifact ID hash length in bytes: {}", id.hash_len()); - /// ``` - pub fn hash_len(&self) -> usize { - self.gitoid.hash_len() - } -} - -impl FromStr for ArtifactId { - type Err = Error; - - fn from_str(s: &str) -> Result> { - let url = Url::parse(s)?; - ArtifactId::try_from_url(url) - } -} - -impl Clone for ArtifactId { - fn clone(&self) -> Self { - *self - } -} - -impl Copy for ArtifactId {} - -impl PartialEq> for ArtifactId { - fn eq(&self, other: &Self) -> bool { - self.gitoid == other.gitoid - } -} - -impl Eq for ArtifactId {} - -impl PartialOrd> for ArtifactId { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for ArtifactId { - fn cmp(&self, other: &Self) -> Ordering { - self.gitoid.cmp(&other.gitoid) - } -} - -impl Hash for ArtifactId { - fn hash

(&self, state: &mut H2) - where - H2: Hasher, - { - self.gitoid.hash(state); - } -} - -impl Debug for ArtifactId { - fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { - f.debug_struct("ArtifactId") - .field("gitoid", &self.gitoid) - .finish() - } -} - -impl Display for ArtifactId { - fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { - write!(f, "{}", self.gitoid) - } -} - -impl From> for ArtifactId { - fn from(gitoid: GitOid) -> Self { - ArtifactId::from_gitoid(gitoid) - } -} - -impl<'r, H: SupportedHash> TryFrom<&'r str> for ArtifactId { - type Error = Error; - - fn try_from(s: &'r str) -> Result { - ArtifactId::from_str(s) - } -} - -impl TryFrom for ArtifactId { - type Error = Error; - - fn try_from(url: Url) -> Result> { - let gitoid = GitOid::try_from_url(url)?; - Ok(ArtifactId::from_gitoid(gitoid)) - } -} - -#[cfg(feature = "serde")] -impl Serialize for ArtifactId { - fn serialize(&self, serializer: S) -> StdResult - where - S: Serializer, - { - self.gitoid.serialize(serializer) - } -} - -#[cfg(feature = "serde")] -impl<'de, H: SupportedHash> Deserialize<'de> for ArtifactId { - fn deserialize(deserializer: D) -> StdResult - where - D: Deserializer<'de>, - { - let gitoid = GitOid::::deserialize(deserializer)?; - Ok(ArtifactId::from_gitoid(gitoid)) - } -} diff --git a/omnibor/src/artifact_id/artifact_id.rs b/omnibor/src/artifact_id/artifact_id.rs new file mode 100644 index 0000000..d257894 --- /dev/null +++ b/omnibor/src/artifact_id/artifact_id.rs @@ -0,0 +1,302 @@ +use { + crate::{ + error::{Error, Result}, + gitoid::GitOid, + hash_algorithm::HashAlgorithm, + object_type::Blob, + }, + core::{ + cmp::Ordering, + fmt::{Debug, Formatter, Result as FmtResult}, + hash::{Hash, Hasher}, + }, + serde::{de::Deserializer, Deserialize, Serialize, Serializer}, + std::{fmt::Display, path::PathBuf, result::Result as StdResult, str::FromStr}, + url::Url, +}; + +#[cfg(doc)] +use crate::hash_algorithm::Sha256; + +/// An OmniBOR Artifact Identifier. +/// +/// This is a content-based unique identifier for any software artifact. +/// +/// It is built around, per the specification, any supported hash algorithm. +/// Currently, only SHA-256 is supported, but others may be added in the future. +pub struct ArtifactId { + #[doc(hidden)] + gitoid: GitOid, +} + +impl ArtifactId { + /// Construct an [`ArtifactId`] from an existing [`GitOid`]. + /// + /// This produces an identifier using the provided [`GitOid`] directly, + /// without additional validation. The type system ensures the [`GitOid`] + /// hash algorithm is one supported for an [`ArtifactId`], and that the + /// object type is [`gitoid::Blob`]. + /// + /// # Note + /// + /// This function is not exported because we don't re-export the `GitOid` + /// type we use, which would mean users of the crate would themselves + /// need to import a binary-compatible version of the `GitOid` crate as + /// well. This is extra complexity for minimal gain, so we don't support it. + /// + /// If it were ever absolutely needed in the future, we might expose this + /// constructor with a `#[doc(hidden)]` attribute, or with documentation + /// which clearly outlines the extra complexity. + pub(crate) fn from_gitoid(gitoid: GitOid) -> ArtifactId { + ArtifactId { gitoid } + } + + /// Construct an [`ArtifactId`] from a `gitoid`-scheme [`Url`]. + /// + /// This validates that the provided URL has a hashing scheme which matches the one + /// selected for your [`ArtifactId`] (today, only `sha256` is supported), and has the + /// `blob` object type. It also validates that the provided hash is a valid hash for + /// the specified hashing scheme. If any of these checks fail, the function returns + /// an [`Error`]. + /// + /// Note that this expects a `gitoid`-scheme URL, as defined by IANA. This method + /// _does not_ expect an HTTP or HTTPS URL to access, retrieve contents, and hash + /// those contents to produce an identifier. + /// + /// # Example + /// + /// ```rust + /// # use omnibor::artifact_id::ArtifactId; + /// # use omnibor::hash_algorithm::Sha256; + /// # use url::Url; + /// let url = Url::parse("gitoid:blob:sha256:fee53a18d32820613c0527aa79be5cb30173c823a9b448fa4817767cc84c6f03").unwrap(); + /// let id: ArtifactId = ArtifactId::try_from_url(url).unwrap(); + /// println!("Artifact ID: {}", id); + /// ``` + pub fn try_from_url(url: Url) -> Result> { + ArtifactId::try_from(url) + } + + /// Try to construct an [`ArtifactId`] from a filesystem-safe representation. + pub fn try_from_safe_name(s: &str) -> Result> { + ArtifactId::from_str(&s.replace('_', ":")) + } + + /// Get the [`Url`] representation of the [`ArtifactId`]. + /// + /// This returns a `gitoid`-scheme URL for the [`ArtifactId`]. + /// + /// # Example + /// + /// ```rust + /// # use omnibor::artifact_id::{ArtifactId, ArtifactIdBuilder}; + /// # use omnibor::hash_algorithm::Sha256; + /// let id: ArtifactId = ArtifactIdBuilder::with_rustcrypto().identify_string("hello, world"); + /// println!("Artifact ID URL: {}", id.url()); + /// ``` + pub fn url(&self) -> Url { + self.gitoid.url() + } + + /// Get a filesystem-safe representation of the [`ArtifactId`]. + /// + /// This is a conservative method that tries to use _only_ characters + /// which can be expected to work broadly cross-platform. + /// + /// What that means for us is that the `:` separator character is + /// replaced with `_`. + pub fn as_file_name(&self) -> PathBuf { + let name = self.gitoid.url().to_string().replace(':', "_"); + let mut path = PathBuf::from(name); + path.set_extension("manifest"); + path + } + + /// Get the underlying bytes of the [`ArtifactId`] hash. + /// + /// This slice is the raw underlying buffer of the [`ArtifactId`], exactly + /// as produced by the hasher. + /// + /// # Example + /// + /// ```rust + /// # use omnibor::artifact_id::{ArtifactId, ArtifactIdBuilder}; + /// # use omnibor::hash_algorithm::Sha256; + /// let id: ArtifactId = ArtifactIdBuilder::with_rustcrypto().identify_string("hello, world"); + /// println!("Artifact ID bytes: {:?}", id.as_bytes()); + /// ``` + pub fn as_bytes(&self) -> &[u8] { + self.gitoid.as_bytes() + } + + /// Get the bytes of the [`ArtifactId`] hash as a hexadecimal string. + /// + /// This returns a [`String`] rather than [`str`] because the string must be + /// constructed on the fly, as we do not store a hexadecimal representation + /// of the hash data. + /// + /// # Example + /// + /// ```rust + /// # use omnibor::artifact_id::{ArtifactId, ArtifactIdBuilder}; + /// # use omnibor::hash_algorithm::Sha256; + /// let id: ArtifactId = ArtifactIdBuilder::with_rustcrypto().identify_string("hello, world"); + /// println!("Artifact ID bytes as hex: {}", id.as_hex()); + /// ``` + pub fn as_hex(&self) -> String { + self.gitoid.as_hex() + } + + /// Get the name of the hash algorithm used in the [`ArtifactId`] as a string. + /// + /// For [`Sha256`], this is the string `"sha256"`. + /// + /// # Example + /// + /// ```rust + /// # use omnibor::artifact_id::{ArtifactId, ArtifactIdBuilder}; + /// # use omnibor::hash_algorithm::Sha256; + /// let id: ArtifactId = ArtifactIdBuilder::with_rustcrypto().identify_string("hello, world"); + /// println!("Artifact ID hash algorithm: {}", id.hash_algorithm()); + /// ``` + pub const fn hash_algorithm(&self) -> &'static str { + self.gitoid.hash_algorithm() + } + + /// Get the object type used in the [`ArtifactId`] as a string. + /// + /// For all [`ArtifactId`]s this is `"blob"`, but the method is provided + /// for completeness nonetheless. + /// + /// # Example + /// + /// ```rust + /// # use omnibor::artifact_id::{ArtifactId, ArtifactIdBuilder}; + /// # use omnibor::hash_algorithm::Sha256; + /// let id: ArtifactId = ArtifactIdBuilder::with_rustcrypto().identify_string("hello, world"); + /// println!("Artifact ID object type: {}", id.object_type()); + /// ``` + pub const fn object_type(&self) -> &'static str { + self.gitoid.object_type() + } + + /// Get the length in bytes of the hash used in the [`ArtifactId`]. + /// + /// In the future this method will be `const`, but is not able to be + /// today due to limitations in the Rust cryptography crates we use + /// internally. + /// + /// # Example + /// + /// ```rust + /// # use omnibor::artifact_id::{ArtifactId, ArtifactIdBuilder}; + /// # use omnibor::hash_algorithm::Sha256; + /// let id: ArtifactId = ArtifactIdBuilder::with_rustcrypto().identify_string("hello, world"); + /// println!("Artifact ID hash length in bytes: {}", id.hash_len()); + /// ``` + pub fn hash_len(&self) -> usize { + self.gitoid.hash_len() + } +} + +impl FromStr for ArtifactId { + type Err = Error; + + fn from_str(s: &str) -> Result> { + let url = Url::parse(s)?; + ArtifactId::try_from_url(url) + } +} + +impl Clone for ArtifactId { + fn clone(&self) -> Self { + *self + } +} + +impl Copy for ArtifactId {} + +impl PartialEq> for ArtifactId { + fn eq(&self, other: &Self) -> bool { + self.gitoid == other.gitoid + } +} + +impl Eq for ArtifactId {} + +impl PartialOrd> for ArtifactId { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for ArtifactId { + fn cmp(&self, other: &Self) -> Ordering { + self.gitoid.cmp(&other.gitoid) + } +} + +impl Hash for ArtifactId { + fn hash

(&self, state: &mut H2) + where + H2: Hasher, + { + self.gitoid.hash(state); + } +} + +impl Debug for ArtifactId { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + f.debug_struct("ArtifactId") + .field("gitoid", &self.gitoid) + .finish() + } +} + +impl Display for ArtifactId { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + write!(f, "{}", self.gitoid) + } +} + +impl From> for ArtifactId { + fn from(gitoid: GitOid) -> Self { + ArtifactId::from_gitoid(gitoid) + } +} + +impl<'r, H: HashAlgorithm> TryFrom<&'r str> for ArtifactId { + type Error = Error; + + fn try_from(s: &'r str) -> Result { + ArtifactId::from_str(s) + } +} + +impl TryFrom for ArtifactId { + type Error = Error; + + fn try_from(url: Url) -> Result> { + let gitoid = GitOid::try_from_url(url)?; + Ok(ArtifactId::from_gitoid(gitoid)) + } +} + +impl Serialize for ArtifactId { + fn serialize(&self, serializer: S) -> StdResult + where + S: Serializer, + { + self.gitoid.serialize(serializer) + } +} + +impl<'de, H: HashAlgorithm> Deserialize<'de> for ArtifactId { + fn deserialize(deserializer: D) -> StdResult + where + D: Deserializer<'de>, + { + let gitoid = GitOid::::deserialize(deserializer)?; + Ok(ArtifactId::from_gitoid(gitoid)) + } +} diff --git a/omnibor/src/artifact_id/artifact_id_builder.rs b/omnibor/src/artifact_id/artifact_id_builder.rs new file mode 100644 index 0000000..60aa689 --- /dev/null +++ b/omnibor/src/artifact_id/artifact_id_builder.rs @@ -0,0 +1,111 @@ +use { + crate::{ + artifact_id::ArtifactId, + error::Result, + gitoid::internal::{gitoid_from_async_reader, gitoid_from_buffer, gitoid_from_reader}, + hash_algorithm::{HashAlgorithm, Sha256}, + hash_provider::HashProvider, + input_manifest::InputManifest, + object_type::Blob, + }, + std::{ + fs::File, + io::{Read, Seek}, + marker::PhantomData, + path::Path, + }, + tokio::{ + fs::File as AsyncFile, + io::{AsyncRead, AsyncSeek}, + }, +}; + +pub struct ArtifactIdBuilder> { + _hash_algorithm: PhantomData, + provider: P, +} + +#[cfg(feature = "backend-rustcrypto")] +impl ArtifactIdBuilder { + pub fn with_rustcrypto() -> Self { + Self { + _hash_algorithm: PhantomData, + provider: crate::hash_provider::RustCrypto::new(), + } + } +} + +#[cfg(feature = "backend-boringssl")] +impl ArtifactIdBuilder { + pub fn with_boringssl() -> Self { + Self { + _hash_algorithm: PhantomData, + provider: crate::hash_provider::BoringSsl::new(), + } + } +} + +#[cfg(feature = "backend-openssl")] +impl ArtifactIdBuilder { + pub fn with_openssl() -> Self { + Self { + _hash_algorithm: PhantomData, + provider: crate::hash_provider::OpenSsl::new(), + } + } +} + +impl> ArtifactIdBuilder { + pub fn with_provider(provider: P) -> Self { + Self { + _hash_algorithm: PhantomData, + provider, + } + } + + pub fn identify_bytes(&self, bytes: &[u8]) -> ArtifactId { + // PANIC SAFETY: We're reading from an in-memory buffer, so no IO errors can arise. + let gitoid = gitoid_from_buffer::(self.provider.digester(), bytes).unwrap(); + ArtifactId::from_gitoid(gitoid) + } + + pub fn identify_string(&self, s: &str) -> ArtifactId { + self.identify_bytes(s.as_bytes()) + } + + pub fn identify_file(&self, file: &mut File) -> Result> { + self.identify_reader(file) + } + + pub fn identify_path(&self, path: &Path) -> Result> { + let mut file = File::open(path)?; + self.identify_file(&mut file) + } + + pub fn identify_reader(&self, reader: R) -> Result> { + let gitoid = gitoid_from_reader::(self.provider.digester(), reader)?; + Ok(ArtifactId::from_gitoid(gitoid)) + } + + pub async fn identify_async_file(&self, file: &mut AsyncFile) -> Result> { + self.identify_async_reader(file).await + } + + pub async fn identify_async_path(&self, path: &Path) -> Result> { + let mut file = AsyncFile::open(path).await?; + self.identify_async_file(&mut file).await + } + + pub async fn identify_async_reader( + &self, + reader: R, + ) -> Result> { + let gitoid = + gitoid_from_async_reader::(self.provider.digester(), reader).await?; + Ok(ArtifactId::from_gitoid(gitoid)) + } + + pub fn identify_manifest(&self, manifest: &InputManifest) -> ArtifactId { + self.identify_bytes(&manifest.as_bytes()[..]) + } +} diff --git a/omnibor/src/artifact_id/mod.rs b/omnibor/src/artifact_id/mod.rs new file mode 100644 index 0000000..d644faa --- /dev/null +++ b/omnibor/src/artifact_id/mod.rs @@ -0,0 +1,5 @@ +mod artifact_id; +mod artifact_id_builder; + +pub use artifact_id::ArtifactId; +pub use artifact_id_builder::ArtifactIdBuilder; diff --git a/omnibor/src/error.rs b/omnibor/src/error.rs index 3a36450..9aee828 100644 --- a/omnibor/src/error.rs +++ b/omnibor/src/error.rs @@ -1,17 +1,55 @@ +//! Error types for OmniBOR. + +use { + hex::FromHexError as HexError, + std::{io::Error as IoError, result::Result as StdResult}, + url::{ParseError as UrlError, Url}, +}; + #[cfg(doc)] -use crate::ArtifactId; -#[cfg(doc)] -use crate::InputManifest; -use gitoid::Error as GitOidError; -use std::io::Error as IoError; -use std::result::Result as StdResult; -use url::ParseError as UrlError; +use crate::{artifact_id::ArtifactId, input_manifest::InputManifest}; pub type Result = StdResult; /// Errors arising from [`ArtifactId`] use or [`InputManifest`] interaction. #[derive(Debug, thiserror::Error)] pub enum Error { + #[error("invalid scheme in URL '{0}'")] + InvalidScheme(Url), + + #[error("missing object type in URL '{0}'")] + MissingObjectType(Url), + + #[error("missing hash algorithm in URL '{0}'")] + MissingHashAlgorithm(Url), + + #[error("missing hash in URL '{0}'")] + MissingHash(Url), + + #[error("unknown object type")] + UnknownObjectType, + + #[error("mismatched object type; expected '{expected}'")] + MismatchedObjectType { expected: &'static str }, + + #[error("mismatched hash algorithm; expected '{expected}'")] + MismatchedHashAlgorithm { expected: &'static str }, + + #[error("unexpected hash length; expected '{expected}', got '{observed}'")] + UnexpectedHashLength { expected: usize, observed: usize }, + + #[error("unexpected read length; expected '{expected}', got '{observed}'")] + UnexpectedReadLength { expected: usize, observed: usize }, + + #[error("invalid hex string")] + InvalidHex(#[from] HexError), + + #[error(transparent)] + Url(#[from] UrlError), + + #[error(transparent)] + Io(#[from] IoError), + #[error("no storage root found; provide one or set the 'OMNIBOR_DIR' environment variable")] NoStorageRoot, @@ -94,11 +132,5 @@ pub enum Error { UnknownEmbeddingTarget, #[error("failed to read input manifest file")] - FailedManifestRead(#[from] IoError), - - #[error(transparent)] - GitOid(#[from] GitOidError), - - #[error(transparent)] - Url(#[from] UrlError), + FailedManifestRead(#[source] IoError), } diff --git a/omnibor/src/ffi/artifact_id.rs b/omnibor/src/ffi/artifact_id.rs index 3f41824..a7cd3a7 100644 --- a/omnibor/src/ffi/artifact_id.rs +++ b/omnibor/src/ffi/artifact_id.rs @@ -1,31 +1,31 @@ //! The main ArtifactId FFI functions. -use crate::ffi::error::catch_panic; -use crate::ffi::error::get_error_msg; -use crate::ffi::error::Error; -use crate::ffi::status::Status; -use crate::ffi::util::check_null; -use crate::ffi::util::write_to_c_buf; -use crate::hashes::Sha256; -use crate::ArtifactId; -use core::ffi::c_char; -use core::ffi::c_int; -use core::ffi::CStr; -use core::ptr::null; -use core::ptr::null_mut; -use core::slice::from_raw_parts; -use core::slice::from_raw_parts_mut; -use std::ffi::CString; -use std::fs::File; -#[cfg(target_family = "unix")] -use std::os::unix::prelude::FromRawFd; +use crate::artifact_id::ArtifactIdBuilder; + +use { + crate::{ + artifact_id::ArtifactId, + ffi::{ + error::{catch_panic, get_error_msg, Error}, + status::Status, + util::{check_null, write_to_c_buf}, + }, + hash_algorithm::Sha256, + }, + core::{ + ffi::{c_char, c_int, CStr}, + ptr::{null, null_mut}, + slice::{from_raw_parts, from_raw_parts_mut}, + }, + std::{ffi::CString, fs::File}, + url::Url, +}; + #[cfg(target_family = "unix")] -use std::os::unix::prelude::RawFd; -#[cfg(target_family = "windows")] -use std::os::windows::io::FromRawHandle; +use std::os::unix::prelude::{FromRawFd, RawFd}; + #[cfg(target_family = "windows")] -use std::os::windows::prelude::RawHandle; -use url::Url; +use std::os::windows::{io::FromRawHandle, prelude::RawHandle}; /// Get the last-written error message written to a buffer. /// @@ -103,7 +103,8 @@ pub unsafe extern "C" fn ob_aid_sha256_id_bytes( let output = catch_panic(|| { check_null(content, Error::ContentPtrIsNull)?; let content = unsafe { from_raw_parts(content, content_len) }; - let artifact_id = ArtifactIdSha256(ArtifactId::::id_bytes(content)); + let artifact_id = + ArtifactIdSha256(ArtifactIdBuilder::with_rustcrypto().identify_bytes(content)); let boxed = Box::new(artifact_id); Ok(Box::into_raw(boxed) as *const _) }); @@ -123,7 +124,7 @@ pub unsafe extern "C" fn ob_aid_sha256_id_str(s: *const c_char) -> *const Artifa let output = catch_panic(|| { check_null(s, Error::StringPtrIsNull)?; let s = unsafe { CStr::from_ptr(s) }.to_str()?; - let artifact_id = ArtifactIdSha256(ArtifactId::::id_str(s)); + let artifact_id = ArtifactIdSha256(ArtifactIdBuilder::with_rustcrypto().identify_string(s)); let boxed = Box::new(artifact_id); Ok(Box::into_raw(boxed) as *const _) }); @@ -163,35 +164,9 @@ pub unsafe extern "C" fn ob_aid_sha256_try_from_url(s: *const c_char) -> *const #[no_mangle] pub unsafe extern "C" fn ob_aid_sha256_id_reader(fd: RawFd) -> *const ArtifactIdSha256 { let output = catch_panic(|| { - let file = unsafe { File::from_raw_fd(fd) }; - let artifact_id = ArtifactIdSha256(ArtifactId::::id_reader(file)?); - let boxed = Box::new(artifact_id); - Ok(Box::into_raw(boxed) as *const _) - }); - - output.unwrap_or_else(null) -} - -/// Create a new `ArtifactId` by reading data from a file. -/// -/// # Safety -/// -/// The provided file descriptor must be valid and open for reading. -/// -/// Returns an invalid `ArtifactId` if construction fails. -#[cfg(target_family = "unix")] -#[no_mangle] -pub unsafe extern "C" fn ob_aid_sha256_id_reader_with_length( - fd: RawFd, - expected_length: c_int, -) -> *const ArtifactIdSha256 { - let output = catch_panic(|| { - let file = unsafe { File::from_raw_fd(fd) }; - let expected_length = expected_length as usize; - let artifact_id = ArtifactIdSha256(ArtifactId::::id_reader_with_length( - file, - expected_length, - )?); + let mut file = unsafe { File::from_raw_fd(fd) }; + let artifact_id = + ArtifactIdSha256(ArtifactIdBuilder::with_rustcrypto().identify_file(&mut file)?); let boxed = Box::new(artifact_id); Ok(Box::into_raw(boxed) as *const _) }); @@ -211,36 +186,9 @@ pub unsafe extern "C" fn ob_aid_sha256_id_reader_with_length( /// cbindgen:ignore pub unsafe extern "C" fn ob_aid_sha256_id_reader(handle: RawHandle) -> *const ArtifactIdSha256 { let output = catch_panic(|| { - let file = unsafe { File::from_raw_handle(handle) }; - let artifact_id = ArtifactIdSha256(ArtifactId::::id_reader(file)?); - let boxed = Box::new(artifact_id); - Ok(Box::into_raw(boxed) as *const _) - }); - - output.unwrap_or_else(null) -} - -/// Create a new `ArtifactId` by reading data from a file. -/// -/// # Safety -/// -/// The provided file handle must be valid and open for reading. -/// -/// Returns an invalid `ArtifactId` if construction fails. -#[cfg(target_family = "windows")] -#[no_mangle] -/// cbindgen:ignore -pub unsafe extern "C" fn ob_aid_sha256_id_reader_with_length( - handle: RawHandle, - expected_length: c_int, -) -> *const ArtifactIdSha256 { - let output = catch_panic(|| { - let file = unsafe { File::from_raw_handle(handle) }; - let expected_length = expected_length as usize; - let artifact_id = ArtifactIdSha256(ArtifactId::::id_reader_with_length( - file, - expected_length, - )?); + let mut file = unsafe { File::from_raw_handle(handle) }; + let artifact_id = + ArtifactIdSha256(ArtifactIdBuilder::with_rustcrypto().identify_file(&mut file)?); let boxed = Box::new(artifact_id); Ok(Box::into_raw(boxed) as *const _) }); diff --git a/omnibor/src/ffi/error.rs b/omnibor/src/ffi/error.rs index 77d7fe5..539193b 100644 --- a/omnibor/src/ffi/error.rs +++ b/omnibor/src/ffi/error.rs @@ -12,18 +12,18 @@ //! Together, these provide a consistent mechanism for collecting and reporting //! errors to users of the `ArtifactId` FFI. -use crate::error::Error as ArtifactIdError; -use core::any::Any; -use core::cell::RefCell; -use core::fmt::Display; -use core::fmt::Formatter; -use core::fmt::Result as FmtResult; -use core::panic::UnwindSafe; -use core::str::Utf8Error; -use std::error::Error as StdError; -use std::ffi::NulError; -use std::panic::catch_unwind; -use url::ParseError as UrlError; +use { + crate::error::Error as ArtifactIdError, + core::{ + any::Any, + cell::RefCell, + fmt::{Display, Formatter, Result as FmtResult}, + panic::UnwindSafe, + str::Utf8Error, + }, + std::{error::Error as StdError, ffi::NulError, panic::catch_unwind}, + url::ParseError as UrlError, +}; thread_local! { // The last error to have been reported by the FFI code. diff --git a/omnibor/src/ffi/util.rs b/omnibor/src/ffi/util.rs index 6a50b66..6ff34cf 100644 --- a/omnibor/src/ffi/util.rs +++ b/omnibor/src/ffi/util.rs @@ -1,10 +1,12 @@ //! Utility functions for the FFI code. -use crate::ffi::error::Error; -use crate::ffi::status::Status; -use core::ffi::c_int; -use std::ffi::CString; -use std::io::Write as _; +use { + crate::ffi::{error::Error, status::Status}, + std::{ + ffi::{c_int, CString}, + io::Write as _, + }, +}; /// Write a string slice to a C buffer. /// diff --git a/gitoid/src/gitoid.rs b/omnibor/src/gitoid/gitoid.rs similarity index 63% rename from gitoid/src/gitoid.rs rename to omnibor/src/gitoid/gitoid.rs index 65d9f0b..377723e 100644 --- a/gitoid/src/gitoid.rs +++ b/omnibor/src/gitoid/gitoid.rs @@ -1,33 +1,21 @@ //! A gitoid representing a single artifact. -use crate::{ - internal::gitoid_from_buffer, util::stream_len::stream_len, Error, HashAlgorithm, ObjectType, - Result, -}; -use core::{ - cmp::Ordering, - fmt::{Debug, Formatter, Result as FmtResult}, - hash::{Hash, Hasher}, - marker::PhantomData, -}; -use digest::OutputSizeUser; - -#[cfg(feature = "async")] use { - crate::{internal::gitoid_from_async_reader, util::stream_len::async_stream_len}, - tokio::io::{AsyncRead, AsyncSeek}, -}; - -#[cfg(feature = "std")] -use { - crate::{gitoid_url_parser::GitOidUrlParser, internal::gitoid_from_reader}, + crate::{ + error::{Error, Result}, + gitoid::gitoid_url_parser::GitOidUrlParser, + hash_algorithm::HashAlgorithm, + object_type::ObjectType, + }, serde::{ de::{Deserializer, Error as DeserializeError, Visitor}, Deserialize, Serialize, Serializer, }, std::{ - fmt::Display, - io::{Read, Seek}, + cmp::Ordering, + fmt::{Debug, Display, Formatter, Result as FmtResult}, + hash::{Hash, Hasher}, + marker::PhantomData, result::Result as StdResult, str::FromStr, }, @@ -47,10 +35,9 @@ where pub(crate) _phantom: PhantomData, #[doc(hidden)] - pub(crate) value: H::Array, + pub(crate) value: ::Array, } -#[cfg(feature = "std")] pub(crate) const GITOID_URL_SCHEME: &str = "gitoid"; impl GitOid @@ -58,74 +45,11 @@ where H: HashAlgorithm, O: ObjectType, { - /// Create a new `GitOid` based on a slice of bytes. - pub fn id_bytes>(content: B) -> GitOid { - fn inner(content: &[u8]) -> GitOid - where - H: HashAlgorithm, - O: ObjectType, - { - // PANIC SAFETY: We're reading from an in-memory buffer, so no IO errors can arise. - gitoid_from_buffer(H::new(), content, content.len()).unwrap() - } - - inner(content.as_ref()) - } - - /// Create a `GitOid` from a UTF-8 string slice. - pub fn id_str>(s: S) -> GitOid { - fn inner(s: &str) -> GitOid - where - H: HashAlgorithm, - O: ObjectType, - { - GitOid::id_bytes(s.as_bytes()) - } - - inner(s.as_ref()) - } - - #[cfg(feature = "std")] - /// Create a `GitOid` from a reader. - pub fn id_reader(mut reader: R) -> Result> { - let expected_length = stream_len(&mut reader)? as usize; - GitOid::id_reader_with_length(reader, expected_length) - } - - #[cfg(feature = "std")] - /// Generate a `GitOid` from a reader, providing an expected length in bytes. - pub fn id_reader_with_length(reader: R, expected_length: usize) -> Result> - where - R: Read + Seek, - { - gitoid_from_reader(H::new(), reader, expected_length) - } - - #[cfg(feature = "async")] - /// Generate a `GitOid` from an asynchronous reader. - pub async fn id_async_reader( - mut reader: R, - ) -> Result> { - let expected_length = async_stream_len(&mut reader).await? as usize; - GitOid::id_async_reader_with_length(reader, expected_length).await - } - - #[cfg(feature = "async")] - /// Generate a `GitOid` from an asynchronous reader, providing an expected length in bytes. - pub async fn id_async_reader_with_length( - reader: R, - expected_length: usize, - ) -> Result> { - gitoid_from_async_reader(H::new(), reader, expected_length).await - } - - #[cfg(feature = "std")] /// Construct a new `GitOid` from a `Url`. pub fn try_from_url(url: Url) -> Result> { GitOid::try_from(url) } - #[cfg(feature = "std")] /// Get a URL for the current `GitOid`. pub fn url(&self) -> Url { // PANIC SAFETY: We know that this is a valid URL; @@ -138,7 +62,6 @@ where &self.value[..] } - #[cfg(feature = "std")] /// Convert the hash to a hexadecimal string. pub fn as_hex(&self) -> String { hex::encode(self.as_bytes()) @@ -156,11 +79,10 @@ where /// Get the length of the hash in bytes. pub fn hash_len(&self) -> usize { - ::output_size() + self.value.len() } } -#[cfg(feature = "std")] impl FromStr for GitOid where H: HashAlgorithm, @@ -255,7 +177,6 @@ where } } -#[cfg(feature = "std")] impl Display for GitOid where H: HashAlgorithm, @@ -273,7 +194,6 @@ where } } -#[cfg(feature = "std")] impl Serialize for GitOid where H: HashAlgorithm, @@ -289,7 +209,6 @@ where } } -#[cfg(feature = "std")] impl<'de, H, O> Deserialize<'de> for GitOid where H: HashAlgorithm, @@ -323,7 +242,6 @@ where } } -#[cfg(feature = "std")] impl TryFrom for GitOid where H: HashAlgorithm, diff --git a/gitoid/src/gitoid_url_parser.rs b/omnibor/src/gitoid/gitoid_url_parser.rs similarity index 68% rename from gitoid/src/gitoid_url_parser.rs rename to omnibor/src/gitoid/gitoid_url_parser.rs index 591299c..571e1be 100644 --- a/gitoid/src/gitoid_url_parser.rs +++ b/omnibor/src/gitoid/gitoid_url_parser.rs @@ -1,9 +1,16 @@ //! A gitoid representing a single artifact. -use crate::{gitoid::GITOID_URL_SCHEME, Error, GitOid, HashAlgorithm, ObjectType, Result}; -use core::{marker::PhantomData, ops::Not as _}; -use digest::{block_buffer::generic_array::GenericArray, OutputSizeUser}; -use {core::str::Split, digest::block_buffer::generic_array::sequence::GenericSequence, url::Url}; +use { + crate::{ + error::{Error, Result}, + gitoid::{gitoid::GITOID_URL_SCHEME, GitOid}, + hash_algorithm::HashAlgorithm, + object_type::ObjectType, + }, + std::{marker::PhantomData, ops::Not as _, str::Split}, + url::Url, +}; + pub(crate) struct GitOidUrlParser<'u, H, O> where H: HashAlgorithm, @@ -43,9 +50,9 @@ where .and_then(|_| self.validate_object_type()) .and_then(|_| self.validate_hash_algorithm()) .and_then(|_| self.parse_hash()) - .map(|hash| GitOid { + .map(|value| GitOid { _phantom: PhantomData, - value: H::array_from_generic(hash), + value, }) } @@ -85,26 +92,15 @@ where Ok(()) } - fn parse_hash(&mut self) -> Result::OutputSize>> { + fn parse_hash(&mut self) -> Result { let hex_str = self .segments .next() .and_then(some_if_not_empty) .ok_or_else(|| Error::MissingHash(self.url.clone()))?; - // TODO(alilleybrinker): When `sha1` et al. move to generic-array 1.0, - // update this to use the `arr!` macro. - let mut value = GenericArray::generate(|_| 0); - hex::decode_to_slice(hex_str, &mut value)?; - - let expected_size = ::output_size(); - if value.len() != expected_size { - return Err(Error::UnexpectedHashLength { - expected: expected_size, - observed: value.len(), - }); - } - + let decoded = hex::decode(hex_str)?; + let value = ::Array::from_iter(decoded); Ok(value) } } diff --git a/omnibor/src/gitoid/internal.rs b/omnibor/src/gitoid/internal.rs new file mode 100644 index 0000000..bb8c864 --- /dev/null +++ b/omnibor/src/gitoid/internal.rs @@ -0,0 +1,192 @@ +//! A gitoid representing a single artifact. + +use crate::{ + error::Result, + gitoid::GitOid, + hash_algorithm::HashAlgorithm, + object_type::ObjectType, + util::{ + for_each_buf_fill::ForEachBufFill as _, + stream_len::{async_stream_len, stream_len}, + }, +}; +use digest::Digest; +use std::{ + io::{BufReader, Read, Seek, SeekFrom}, + marker::PhantomData, +}; +use tokio::io::{ + AsyncBufReadExt as _, AsyncRead, AsyncSeek, AsyncSeekExt as _, BufReader as AsyncBufReader, +}; + +/// Generate a GitOid from data in a buffer of bytes. +/// +/// If data is small enough to fit in memory, then generating a GitOid for it +/// this way should be much faster, as it doesn't require seeking. +pub(crate) fn gitoid_from_buffer( + mut digester: impl Digest, + buffer: &[u8], +) -> Result> +where + H: HashAlgorithm, + O: ObjectType, +{ + let hashed_len = buffer.len() - num_carriage_returns_in_buffer(buffer); + digest_gitoid_header(&mut digester, O::NAME, hashed_len); + digest_with_normalized_newlines(&mut digester, buffer); + let hash = digester.finalize(); + Ok(GitOid { + _phantom: PhantomData, + value: ::Array::from_iter(hash), + }) +} + +/// Generate a GitOid by reading from an arbitrary reader. +pub(crate) fn gitoid_from_reader( + mut digester: impl Digest, + mut reader: R, +) -> Result> +where + H: HashAlgorithm, + O: ObjectType, + R: Read + Seek, +{ + let expected_len = stream_len(&mut reader)? as usize; + let (num_carriage_returns, reader) = num_carriage_returns_in_reader(reader)?; + let hashed_len = expected_len - num_carriage_returns; + + digest_gitoid_header(&mut digester, O::NAME, hashed_len); + let _ = BufReader::new(reader) + .for_each_buf_fill(|b| digest_with_normalized_newlines(&mut digester, b))?; + + let hash = digester.finalize(); + + Ok(GitOid { + _phantom: PhantomData, + value: ::Array::from_iter(hash), + }) +} + +/// Async version of `gitoid_from_reader`. +pub(crate) async fn gitoid_from_async_reader( + mut digester: impl Digest, + mut reader: R, +) -> Result> +where + H: HashAlgorithm, + O: ObjectType, + R: AsyncRead + AsyncSeek + Unpin, +{ + let expected_len = async_stream_len(&mut reader).await? as usize; + + let (num_carriage_returns, reader) = num_carriage_returns_in_async_reader(reader).await?; + let hashed_len = expected_len - num_carriage_returns; + + digest_gitoid_header(&mut digester, O::NAME, hashed_len); + + let mut reader = AsyncBufReader::new(reader); + + loop { + let buffer = reader.fill_buf().await?; + let amount_read = buffer.len(); + + if amount_read == 0 { + break; + } + + digest_with_normalized_newlines(&mut digester, buffer); + + reader.consume(amount_read); + } + + let hash = digester.finalize(); + + Ok(GitOid { + _phantom: PhantomData, + value: ::Array::from_iter(hash), + }) +} + +/// Digest the "header" required for a GitOID. +#[inline] +fn digest_gitoid_header(digester: &mut impl Digest, object_type: &str, object_len: usize) { + digester.update(object_type.as_bytes()); + digester.update(b" "); + digester.update(object_len.to_string().as_bytes()); + digester.update(b"\0"); +} + +/// Update a hash digest with data in a buffer, normalizing newlines. +#[inline] +fn digest_with_normalized_newlines(digester: &mut impl Digest, buf: &[u8]) { + for chunk in buf.chunk_by(|char1, _| *char1 != b'\r') { + let chunk = match chunk.last() { + // Omit the carriage return at the end of the chunk. + Some(b'\r') => &chunk[0..(chunk.len() - 1)], + _ => chunk, + }; + + digester.update(chunk) + } +} + +/// Count carriage returns in an in-memory buffer. +#[inline(always)] +fn num_carriage_returns_in_buffer(buffer: &[u8]) -> usize { + bytecount::count(buffer, b'\r') +} + +/// Read a seek-able stream and reset to the beginning when done. +fn read_and_reset(reader: R, f: F) -> Result<(usize, R)> +where + R: Read + Seek, + F: Fn(R) -> Result<(usize, R)>, +{ + let (data, mut reader) = f(reader)?; + reader.seek(SeekFrom::Start(0))?; + Ok((data, reader)) +} + +/// Count carriage returns in a reader. +fn num_carriage_returns_in_reader(reader: R) -> Result<(usize, R)> +where + R: Read + Seek, +{ + read_and_reset(reader, |reader| { + let mut buf_reader = BufReader::new(reader); + let mut total_dos_newlines = 0; + + buf_reader.for_each_buf_fill(|buf| { + // The number of separators is the number of chunks minus one. + total_dos_newlines += buf.chunk_by(|char1, _| *char1 != b'\r').count() - 1 + })?; + + Ok((total_dos_newlines, buf_reader.into_inner())) + }) +} + +/// Count carriage returns in a reader. +async fn num_carriage_returns_in_async_reader(reader: R) -> Result<(usize, R)> +where + R: AsyncRead + AsyncSeek + Unpin, +{ + let mut reader = AsyncBufReader::new(reader); + let mut total_dos_newlines = 0; + + loop { + let buffer = reader.fill_buf().await?; + let amount_read = buffer.len(); + + if amount_read == 0 { + break; + } + + total_dos_newlines += buffer.chunk_by(|char1, _| *char1 != b'\r').count() - 1; + + reader.consume(amount_read); + } + + let (data, mut reader) = (total_dos_newlines, reader.into_inner()); + reader.seek(SeekFrom::Start(0)).await?; + Ok((data, reader)) +} diff --git a/gitoid/src/lib.rs b/omnibor/src/gitoid/mod.rs similarity index 67% rename from gitoid/src/lib.rs rename to omnibor/src/gitoid/mod.rs index 55dfd39..0b8acf7 100644 --- a/gitoid/src/lib.rs +++ b/omnibor/src/gitoid/mod.rs @@ -103,77 +103,8 @@ //! [gitoid]: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects //! [omnibor]: https://omnibor.io -#![cfg_attr(not(feature = "std"), no_std)] - -#[cfg(not(any( - feature = "hash-sha1", - feature = "hash-sha1cd", - feature = "hash-sha256" -)))] -compile_error!( - r#"At least one hash algorithm feature must be active: "hash-sha1", "hash-sha1cd", or "hash-sha256""# -); - -#[cfg(all( - feature = "hash-sha1cd", - feature = "backend-boringssl", - not(feature = "backend-rustcrypto") -))] -compile_error!(r#"The "backend-boringssl" feature does not support the "hash-sha1cd" algorithm"#); - -#[cfg(all( - feature = "hash-sha1cd", - feature = "backend-openssl", - not(feature = "backend-rustcrypto") -))] -compile_error!(r#"The "backend-openssl" feature does not support the "hash-sha1cd" algorithm"#); - -#[cfg(all( - feature = "backend-rustcrypto", - not(any( - feature = "hash-sha1", - feature = "hash-sha1cd", - feature = "hash-sha256" - )) -))] -compile_error!( - r#"The "backend-rustcrypto" feature requires at least one of the following algorithms: "hash-sha1", "hash-sha1cd", or "hash-sha256""# -); - -#[cfg(not(any( - feature = "backend-rustcrypto", - feature = "backend-boringssl", - feature = "backend-openssl" -)))] -compile_error!( - r#"At least one of the "backend-rustcrypto", "backend-boringssl", or "backend-openssl" features must be enabled"# -); - -mod backend; -mod error; mod gitoid; -#[cfg(feature = "std")] mod gitoid_url_parser; -mod hash_algorithm; -mod internal; -mod object_type; -pub(crate) mod sealed; -#[cfg(test)] -mod tests; -mod util; +pub(crate) mod internal; -#[cfg(feature = "backend-boringssl")] -pub use crate::backend::boringssl; -#[cfg(feature = "backend-openssl")] -pub use crate::backend::openssl; -#[cfg(feature = "backend-rustcrypto")] -pub use crate::backend::rustcrypto; -pub use crate::error::Error; -pub(crate) use crate::error::Result; -pub use crate::gitoid::GitOid; -pub use crate::hash_algorithm::HashAlgorithm; -pub use crate::object_type::Blob; -pub use crate::object_type::Commit; -pub use crate::object_type::ObjectType; -pub use crate::object_type::Tag; -pub use crate::object_type::Tree; +pub use crate::gitoid::gitoid::GitOid; diff --git a/omnibor/src/hash_algorithm.rs b/omnibor/src/hash_algorithm.rs new file mode 100644 index 0000000..3b614cd --- /dev/null +++ b/omnibor/src/hash_algorithm.rs @@ -0,0 +1,41 @@ +use { + crate::util::sealed::Sealed, + digest::{ + consts::U32, + generic_array::{sequence::GenericSequence, GenericArray}, + }, + std::{fmt::Debug, ops::Deref}, +}; + +/// Marker trait for hash algorithms supported for constructing `GitOid`s. +pub trait HashAlgorithm: Sealed { + /// The name of the hash algorithm, to be written in the GitOid string. + #[doc(hidden)] + const NAME: &'static str; + + /// The array type generated by the hash. + #[doc(hidden)] + type Array: GenericSequence + + FromIterator + + Deref + + PartialEq + + Eq + + Clone + + Copy + + Debug; +} + +/// Use SHA-256 as the hash algorithm. +pub struct Sha256 { + #[doc(hidden)] + _private: (), +} + +impl Sealed for Sha256 {} + +impl HashAlgorithm for Sha256 { + const NAME: &'static str = "sha256"; + + // A SHA-256 hash is 32 bytes long. + type Array = GenericArray; +} diff --git a/omnibor/src/hash_provider/boringssl.rs b/omnibor/src/hash_provider/boringssl.rs new file mode 100644 index 0000000..a3101f4 --- /dev/null +++ b/omnibor/src/hash_provider/boringssl.rs @@ -0,0 +1,68 @@ +//! BoringSSL-based cryptography backend. + +#![allow(clippy::new_without_default)] + +use { + super::HashProvider, + crate::hash_algorithm::Sha256, + boring::sha, + digest::{consts::U32, FixedOutput, HashMarker, Output, OutputSizeUser, Update}, +}; + +#[derive(Clone, Copy)] +pub struct BoringSsl { + #[doc(hidden)] + _phantom: (), +} + +impl BoringSsl { + pub fn new() -> Self { + BoringSsl { _phantom: () } + } +} + +impl HashProvider for BoringSsl { + type Digester = Sha256Digester; + + fn digester(&self) -> Self::Digester { + Sha256Digester::default() + } +} + +/// `boringssl` SHA-256 implementing the `Digest` trait. +#[doc(hidden)] +pub struct Sha256Digester { + hash: sha::Sha256, +} + +impl Update for Sha256Digester { + fn update(&mut self, data: &[u8]) { + self.hash.update(data); + } +} + +impl OutputSizeUser for Sha256Digester { + type OutputSize = U32; +} + +impl FixedOutput for Sha256Digester { + fn finalize_into(self, out: &mut Output) { + out.copy_from_slice(self.hash.finish().as_slice()); + } + + fn finalize_fixed(self) -> Output { + let mut out = Output::::default(); + out.copy_from_slice(self.hash.finish().as_slice()); + out + } +} + +impl HashMarker for Sha256Digester {} + +impl Default for Sha256Digester { + fn default() -> Self { + Self { + hash: sha::Sha256::new(), + } + } +} diff --git a/omnibor/src/hash_provider/mod.rs b/omnibor/src/hash_provider/mod.rs new file mode 100644 index 0000000..f2cbba2 --- /dev/null +++ b/omnibor/src/hash_provider/mod.rs @@ -0,0 +1,28 @@ +#[cfg(doc)] +use crate::artifact_id::ArtifactId; +use crate::hash_algorithm::HashAlgorithm; +use digest::Digest; + +#[cfg(feature = "backend-boringssl")] +mod boringssl; +#[cfg(feature = "backend-boringssl")] +pub use crate::hash_provider::boringssl::BoringSsl; + +#[cfg(feature = "backend-openssl")] +mod openssl; +#[cfg(feature = "backend-openssl")] +pub use crate::hash_provider::openssl::OpenSsl; + +#[cfg(feature = "backend-rustcrypto")] +mod rustcrypto; +#[cfg(feature = "backend-rustcrypto")] +pub use crate::hash_provider::rustcrypto::RustCrypto; + +/// A cryptography library for producing [`ArtifactId`]s with SHA-256. +pub trait HashProvider: Copy { + /// The type used to produce the SHA-256 digest. + type Digester: Digest; + + /// Get the SHA-256 digester. + fn digester(&self) -> Self::Digester; +} diff --git a/omnibor/src/hash_provider/openssl.rs b/omnibor/src/hash_provider/openssl.rs new file mode 100644 index 0000000..267fdda --- /dev/null +++ b/omnibor/src/hash_provider/openssl.rs @@ -0,0 +1,68 @@ +//! OpenSSL-based cryptography backend. + +#![allow(clippy::new_without_default)] + +use { + super::HashProvider, + crate::hash_algorithm::Sha256, + digest::{consts::U32, FixedOutput, HashMarker, Output, OutputSizeUser, Update}, + openssl::sha, +}; + +#[derive(Clone, Copy)] +pub struct OpenSsl { + #[doc(hidden)] + _phantom: (), +} + +impl OpenSsl { + pub fn new() -> Self { + OpenSsl { _phantom: () } + } +} + +impl HashProvider for OpenSsl { + type Digester = Sha256Digester; + + fn digester(&self) -> Self::Digester { + Sha256Digester::default() + } +} + +/// `openssl` SHA-256 implementing the `Digest` trait. +#[doc(hidden)] +pub struct Sha256Digester { + hash: sha::Sha256, +} + +impl Update for Sha256Digester { + fn update(&mut self, data: &[u8]) { + self.hash.update(data); + } +} + +impl OutputSizeUser for Sha256Digester { + type OutputSize = U32; +} + +impl FixedOutput for Sha256Digester { + fn finalize_into(self, out: &mut Output) { + out.copy_from_slice(self.hash.finish().as_slice()); + } + + fn finalize_fixed(self) -> Output { + let mut out = Output::::default(); + out.copy_from_slice(self.hash.finish().as_slice()); + out + } +} + +impl HashMarker for Sha256Digester {} + +impl Default for Sha256Digester { + fn default() -> Self { + Self { + hash: sha::Sha256::new(), + } + } +} diff --git a/omnibor/src/hash_provider/rustcrypto.rs b/omnibor/src/hash_provider/rustcrypto.rs new file mode 100644 index 0000000..59d2c3e --- /dev/null +++ b/omnibor/src/hash_provider/rustcrypto.rs @@ -0,0 +1,70 @@ +//! RustCrypto-based cryptography backend. + +#![allow(clippy::derivable_impls)] +#![allow(clippy::new_without_default)] + +use { + super::HashProvider, + crate::hash_algorithm::Sha256, + digest::{consts::U32, FixedOutput, HashMarker, Output, OutputSizeUser, Update}, + sha2 as sha, +}; + +#[derive(Clone, Copy)] +pub struct RustCrypto { + #[doc(hidden)] + _phantom: (), +} + +impl RustCrypto { + pub fn new() -> Self { + RustCrypto { _phantom: () } + } +} + +impl HashProvider for RustCrypto { + type Digester = Sha256Digester; + + fn digester(&self) -> Self::Digester { + Sha256Digester::default() + } +} + +/// `rustcrypto` SHA-256 implementing the `Digest` trait. +/// +/// This just wraps the internal type that already implements `Digest` for +/// consistency with the other cryptography providers. +#[doc(hidden)] +pub struct Sha256Digester { + hash: sha::Sha256, +} + +impl Update for Sha256Digester { + fn update(&mut self, data: &[u8]) { + Update::update(&mut self.hash, data); + } +} + +impl OutputSizeUser for Sha256Digester { + type OutputSize = U32; +} + +impl FixedOutput for Sha256Digester { + fn finalize_into(self, out: &mut Output) { + FixedOutput::finalize_into(self.hash, out) + } + + fn finalize_fixed(self) -> Output { + self.hash.finalize_fixed() + } +} + +impl HashMarker for Sha256Digester {} + +impl Default for Sha256Digester { + fn default() -> Self { + Self { + hash: sha::Sha256::default(), + } + } +} diff --git a/omnibor/src/embedding_mode.rs b/omnibor/src/input_manifest/embedding_mode.rs similarity index 88% rename from omnibor/src/embedding_mode.rs rename to omnibor/src/input_manifest/embedding_mode.rs index bef3aa0..a03b851 100644 --- a/omnibor/src/embedding_mode.rs +++ b/omnibor/src/input_manifest/embedding_mode.rs @@ -1,7 +1,7 @@ -use crate::sealed::Sealed; +use {crate::util::sealed::Sealed, core::marker::PhantomData}; + #[cfg(doc)] -use crate::InputManifest; -use std::marker::PhantomData; +use crate::input_manifest::InputManifest; /// The embedding mode to use when making new [`InputManifest`]s. pub trait EmbeddingMode: Sealed { diff --git a/omnibor/src/input_manifest.rs b/omnibor/src/input_manifest/input_manifest.rs similarity index 79% rename from omnibor/src/input_manifest.rs rename to omnibor/src/input_manifest/input_manifest.rs index 47e62f4..ee70eba 100644 --- a/omnibor/src/input_manifest.rs +++ b/omnibor/src/input_manifest/input_manifest.rs @@ -1,22 +1,21 @@ //! [`InputManifest`] type that represents build inputs for an artifact. -use crate::hashes::SupportedHash; -use crate::ArtifactId; -use crate::Error; -use crate::Result; -use gitoid::Blob; -use gitoid::HashAlgorithm; -use gitoid::ObjectType; -use std::cmp::Ordering; -use std::fmt::Debug; -use std::fmt::Formatter; -use std::fmt::Result as FmtResult; -use std::fs::File; -use std::io::BufRead; -use std::io::BufReader; -use std::io::Write; -use std::path::Path; -use std::str::FromStr; +use { + crate::{ + artifact_id::ArtifactId, + error::{Error, Result}, + hash_algorithm::HashAlgorithm, + object_type::{Blob, ObjectType}, + }, + std::{ + cmp::Ordering, + fmt::{Debug, Formatter, Result as FmtResult}, + fs::File, + io::{BufRead, BufReader, Write}, + path::Path, + str::FromStr, + }, +}; /// A manifest describing the inputs used to build an artifact. /// @@ -31,7 +30,7 @@ use std::str::FromStr; /// Relations may additionally refer to the [`InputManifest`] of the /// related artifact. #[derive(PartialEq, Eq)] -pub struct InputManifest { +pub struct InputManifest { /// The artifact the manifest is describing. /// /// A manifest without this is "detached" because we don't know @@ -42,7 +41,7 @@ pub struct InputManifest { relations: Vec>, } -impl InputManifest { +impl InputManifest { pub(crate) fn with_relations(relations: impl Iterator>) -> Self { InputManifest { target: None, @@ -108,9 +107,9 @@ impl InputManifest { return Err(Error::MissingObjectTypeInHeader); } - if hash_algorithm != H::HashAlgorithm::NAME { + if hash_algorithm != H::NAME { return Err(Error::WrongHashAlgorithm { - expected: H::HashAlgorithm::NAME, + expected: H::NAME, got: hash_algorithm.to_owned(), }); } @@ -129,8 +128,7 @@ impl InputManifest { } /// Write the manifest out at the given path. - #[allow(clippy::write_with_newline)] - pub fn as_bytes(&self) -> Result> { + pub fn as_bytes(&self) -> Vec { let mut bytes = vec![]; // Per the spec, this prefix is present to substantially shorten @@ -138,25 +136,25 @@ impl InputManifest { // a manifest if they were written in full form. Instead, only the // hex-encoded hashes are recorded elsewhere, because all the metadata // is identical in a manifest and only recorded once at the beginning. - write!(bytes, "gitoid:{}:{}\n", Blob::NAME, H::HashAlgorithm::NAME)?; + let _ = writeln!(bytes, "gitoid:{}:{}", Blob::NAME, H::NAME); for relation in &self.relations { let aid = relation.artifact; - write!(bytes, "{}", aid.as_hex())?; + let _ = write!(bytes, "{}", aid.as_hex()); if let Some(mid) = relation.manifest { - write!(bytes, " manifest {}", mid.as_hex())?; + let _ = write!(bytes, " manifest {}", mid.as_hex()); } - write!(bytes, "\n")?; + let _ = writeln!(bytes); } - Ok(bytes) + bytes } } -impl Debug for InputManifest { +impl Debug for InputManifest { fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { f.debug_struct("InputManifest") .field("target", &self.target) @@ -165,7 +163,7 @@ impl Debug for InputManifest { } } -impl Clone for InputManifest { +impl Clone for InputManifest { fn clone(&self) -> Self { InputManifest { target: self.target, @@ -175,7 +173,7 @@ impl Clone for InputManifest { } /// Parse a single relation line. -fn parse_relation(input: &str) -> Result> { +fn parse_relation(input: &str) -> Result> { let parts = input.split(' ').collect::>(); if parts.len() < 2 { @@ -185,12 +183,8 @@ fn parse_relation(input: &str) -> Result> { // Panic Safety: we've already checked the length. let (aid_hex, manifest_indicator, manifest_aid_hex) = (parts[0], parts.get(1), parts.get(2)); - let artifact = ArtifactId::::from_str(&format!( - "gitoid:{}:{}:{}", - Blob::NAME, - H::HashAlgorithm::NAME, - aid_hex - ))?; + let artifact = + ArtifactId::::from_str(&format!("gitoid:{}:{}:{}", Blob::NAME, H::NAME, aid_hex))?; let manifest = match (manifest_indicator, manifest_aid_hex) { (None, None) | (Some(_), None) | (None, Some(_)) => None, @@ -199,12 +193,7 @@ fn parse_relation(input: &str) -> Result> { return Err(Error::MissingBomIndicatorInRelation); } - let gitoid_url = &format!( - "gitoid:{}:{}:{}", - Blob::NAME, - H::HashAlgorithm::NAME, - manifest_aid_hex - ); + let gitoid_url = &format!("gitoid:{}:{}:{}", Blob::NAME, H::NAME, manifest_aid_hex); ArtifactId::::from_str(gitoid_url).ok() } @@ -215,7 +204,7 @@ fn parse_relation(input: &str) -> Result> { /// A single input artifact represented in a [`InputManifest`]. #[derive(Copy)] -pub struct Relation { +pub struct Relation { /// The ID of the artifact itself. artifact: ArtifactId, @@ -229,7 +218,7 @@ pub struct Relation { // isn't actually relevant in this case because we don't _really_ // store a value of type-`H`, we just use it for type-level // programming. -impl Debug for Relation { +impl Debug for Relation { fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { f.debug_struct("Relation") .field("artifact", &self.artifact) @@ -238,7 +227,7 @@ impl Debug for Relation { } } -impl Clone for Relation { +impl Clone for Relation { fn clone(&self) -> Self { Relation { artifact: self.artifact, @@ -247,27 +236,27 @@ impl Clone for Relation { } } -impl PartialEq for Relation { +impl PartialEq for Relation { fn eq(&self, other: &Self) -> bool { self.artifact.eq(&other.artifact) && self.manifest.eq(&other.manifest) } } -impl Eq for Relation {} +impl Eq for Relation {} -impl PartialOrd for Relation { +impl PartialOrd for Relation { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } -impl Ord for Relation { +impl Ord for Relation { fn cmp(&self, other: &Self) -> Ordering { self.artifact.cmp(&other.artifact) } } -impl Relation { +impl Relation { pub(crate) fn new(artifact: ArtifactId, manifest: Option>) -> Relation { Relation { artifact, manifest } } diff --git a/omnibor/src/input_manifest_builder.rs b/omnibor/src/input_manifest/input_manifest_builder.rs similarity index 75% rename from omnibor/src/input_manifest_builder.rs rename to omnibor/src/input_manifest/input_manifest_builder.rs index 7b88ce8..605c8c1 100644 --- a/omnibor/src/input_manifest_builder.rs +++ b/omnibor/src/input_manifest/input_manifest_builder.rs @@ -1,24 +1,31 @@ -use crate::embedding::EmbeddingMode; -use crate::embedding_mode::Mode; -use crate::hashes::SupportedHash; -use crate::storage::Storage; -use crate::ArtifactId; -use crate::Error; -use crate::InputManifest; -use crate::IntoArtifactId; -use crate::Relation; -use crate::Result; -use std::collections::BTreeSet; -use std::fmt::Debug; -use std::fmt::Formatter; -use std::fmt::Result as FmtResult; -use std::fs::File; -use std::fs::OpenOptions; -use std::marker::PhantomData; -use std::path::Path; +use { + crate::{ + artifact_id::{ArtifactId, ArtifactIdBuilder}, + error::{Error, Result}, + hash_algorithm::HashAlgorithm, + hash_provider::HashProvider, + input_manifest::{ + embedding_mode::{EmbeddingMode, Mode}, + InputManifest, Relation, + }, + storage::Storage, + }, + std::{ + collections::BTreeSet, + fmt::{Debug, Formatter, Result as FmtResult}, + fs::{File, OpenOptions}, + marker::PhantomData, + path::Path, + }, +}; /// An [`InputManifest`] builder. -pub struct InputManifestBuilder> { +pub struct InputManifestBuilder< + H: HashAlgorithm, + M: EmbeddingMode, + S: Storage, + P: HashProvider, +> { /// The relations to be written to a new manifest by this transaction. relations: BTreeSet>, @@ -27,6 +34,9 @@ pub struct InputManifestBuilder> Debug for InputManifestBuilder { +impl, P: HashProvider> Debug + for InputManifestBuilder +{ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { f.debug_struct("InputManifestBuilder") .field("mode", &M::mode()) @@ -47,19 +59,21 @@ impl> Debug for InputManifestB } } -impl> InputManifestBuilder { +impl, P: HashProvider> + InputManifestBuilder +{ /// Construct a new [`InputManifestBuilder`] with a specific type of storage. - pub fn with_storage(storage: S) -> Self { + pub fn new(storage: S, sha256_provider: P) -> Self { Self { relations: BTreeSet::new(), mode: PhantomData, storage, + sha256_provider, } } /// Add a relation to an artifact to the transaction. - pub fn add_relation(&mut self, artifact: impl IntoArtifactId) -> Result<&mut Self> { - let artifact = artifact.into_artifact_id()?; + pub fn add_relation(&mut self, artifact: ArtifactId) -> Result<&mut Self> { let manifest = self.storage.get_manifest_id_for_artifact(artifact)?; self.relations.insert(Relation::new(artifact, manifest)); Ok(self) @@ -85,6 +99,8 @@ impl> InputManifestBuilder Result> { + let builder = ArtifactIdBuilder::with_provider(self.sha256_provider); + // Construct a new input manifest. let mut manifest = InputManifest::with_relations(self.relations.iter().cloned()); @@ -93,7 +109,7 @@ impl> InputManifestBuilder> InputManifestBuilder { let mut file = OpenOptions::new().read(true).write(true).open(target)?; embed_manifest_in_target(target, &mut file, manifest_aid)?; - ArtifactId::id_reader(file)? + builder.identify_file(&mut file)? } Mode::NoEmbed => { - let file = File::open(target)?; - ArtifactId::id_reader(file)? + let mut file = File::open(target)?; + builder.identify_file(&mut file)? } }; @@ -134,7 +150,7 @@ impl> InputManifestBuilder { +pub struct LinkedInputManifest { /// The ArtifactId of the target. target_aid: ArtifactId, @@ -145,7 +161,7 @@ pub struct LinkedInputManifest { manifest: InputManifest, } -impl Debug for LinkedInputManifest { +impl Debug for LinkedInputManifest { fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { f.debug_struct("TransactionIds") .field("target_aid", &self.target_aid) @@ -155,7 +171,7 @@ impl Debug for LinkedInputManifest { } } -impl LinkedInputManifest { +impl LinkedInputManifest { /// Get the ArtifactId of the file targeted by the transaction. pub fn target_aid(&self) -> ArtifactId { self.target_aid @@ -173,7 +189,7 @@ impl LinkedInputManifest { } /// Embed the manifest's [`ArtifactId`] into the target file. -fn embed_manifest_in_target( +fn embed_manifest_in_target( path: &Path, file: &mut File, manifest_aid: ArtifactId, @@ -192,7 +208,7 @@ fn embed_manifest_in_target( } } -fn embed_in_elf_file( +fn embed_in_elf_file( _path: &Path, _file: &mut File, _manifest_aid: ArtifactId, @@ -200,7 +216,7 @@ fn embed_in_elf_file( todo!("embedding mode for ELF files is not yet implemented") } -fn embed_in_text_file_with_prefix_comment( +fn embed_in_text_file_with_prefix_comment( _path: &Path, _file: &mut File, _manifest_aid: ArtifactId, @@ -209,7 +225,7 @@ fn embed_in_text_file_with_prefix_comment( todo!("embedding mode for text files is not yet implemented") } -fn embed_in_text_file_with_wrapped_comment( +fn embed_in_text_file_with_wrapped_comment( _path: &Path, _file: &mut File, _manifest_aid: ArtifactId, @@ -248,15 +264,24 @@ enum TextType { #[cfg(test)] mod tests { - use super::*; - use crate::embedding_mode::NoEmbed; - use crate::hashes::Sha256; - use crate::storage::{FileSystemStorage, InMemoryStorage}; - use pathbuf::pathbuf; - use std::str::FromStr; - + use { + super::*, + crate::{ + hash_algorithm::Sha256, + input_manifest::embedding_mode::NoEmbed, + pathbuf, + storage::{FileSystemStorage, InMemoryStorage}, + }, + std::str::FromStr, + }; + + #[cfg(feature = "backend-rustcrypto")] /// A basic builder test that creates a single manifest and validates it. fn basic_builder_test(storage: impl Storage) { + use crate::hash_provider::RustCrypto; + + let builder = ArtifactIdBuilder::with_rustcrypto(); + let target = pathbuf![ env!("CARGO_MANIFEST_DIR"), "test", @@ -264,8 +289,8 @@ mod tests { "hello_world.txt" ]; - let first_input_aid = ArtifactId::id_str("test_1"); - let second_input_aid = ArtifactId::id_str("test_2"); + let first_input_aid = builder.identify_string("test_1"); + let second_input_aid = builder.identify_string("test_2"); let expected_target_aid = ArtifactId::::from_str( "gitoid:blob:sha256:fee53a18d32820613c0527aa79be5cb30173c823a9b448fa4817767cc84c6f03", @@ -277,7 +302,7 @@ mod tests { ) .unwrap(); - let ids = InputManifestBuilder::::with_storage(storage) + let ids = InputManifestBuilder::::new(storage, RustCrypto::new()) .add_relation(first_input_aid) .unwrap() .add_relation(second_input_aid) @@ -310,16 +335,22 @@ mod tests { ); } + #[cfg(feature = "backend-rustcrypto")] #[test] fn in_memory_builder_works() { - let storage = InMemoryStorage::new(); + use crate::hash_provider::RustCrypto; + + let storage = InMemoryStorage::new(RustCrypto::new()); basic_builder_test(storage); } + #[cfg(feature = "backend-rustcrypto")] #[test] fn file_system_builder_works() { + use crate::hash_provider::RustCrypto; + let storage_root = pathbuf![env!("CARGO_MANIFEST_DIR"), "test", "fs_storage"]; - let mut storage = FileSystemStorage::new(&storage_root).unwrap(); + let mut storage = FileSystemStorage::new(RustCrypto::new(), &storage_root).unwrap(); basic_builder_test(&mut storage); storage.cleanup().unwrap(); } diff --git a/omnibor/src/input_manifest/mod.rs b/omnibor/src/input_manifest/mod.rs new file mode 100644 index 0000000..0d98770 --- /dev/null +++ b/omnibor/src/input_manifest/mod.rs @@ -0,0 +1,8 @@ +pub mod embedding_mode; +pub(crate) mod input_manifest; +pub(crate) mod input_manifest_builder; + +pub use input_manifest::InputManifest; +pub use input_manifest::Relation; +pub use input_manifest_builder::InputManifestBuilder; +pub use input_manifest_builder::ShouldStore; diff --git a/omnibor/src/into_artifact_id.rs b/omnibor/src/into_artifact_id.rs deleted file mode 100644 index aff9f8c..0000000 --- a/omnibor/src/into_artifact_id.rs +++ /dev/null @@ -1,42 +0,0 @@ -use crate::supported_hash::SupportedHash; -use crate::ArtifactId; -#[cfg(doc)] -use crate::InputManifestBuilder; -use crate::Result; -use std::fs::File; -use std::io::BufReader; -use std::path::Path; - -/// Types that can produce an [`ArtifactId`]. -/// -/// This is a convenience trait used by [`InputManifestBuilder`] to provide a more -/// ergonomic API for constructing input manifests. -pub trait IntoArtifactId { - /// Produce an [`ArtifactId`] from the current type. - fn into_artifact_id(self) -> Result>; -} - -impl IntoArtifactId for ArtifactId { - fn into_artifact_id(self) -> Result> { - Ok(self) - } -} - -impl IntoArtifactId for &Path { - fn into_artifact_id(self) -> Result> { - File::open(self)?.into_artifact_id() - } -} - -impl IntoArtifactId for File { - fn into_artifact_id(self) -> Result> { - let file = BufReader::new(self); - ArtifactId::id_reader(file) - } -} - -impl IntoArtifactId for &[u8] { - fn into_artifact_id(self) -> Result> { - Ok(ArtifactId::id_bytes(self)) - } -} diff --git a/omnibor/src/lib.rs b/omnibor/src/lib.rs index d68af75..99cbf5d 100644 --- a/omnibor/src/lib.rs +++ b/omnibor/src/lib.rs @@ -5,9 +5,11 @@ //! [OmniBOR][omnibor] is a draft specification which defines two key concepts: //! //! - __Artifact Identifiers__: independently-reproducible identifiers for -//! software artifacts. Use [`ArtifactId`] for these. +//! software artifacts. Use [`ArtifactId`](crate::artifact_id::ArtifactId) +//! for these. //! - __Artifact Input Manifests__: record the IDs of every input used in the -//! build process for an artifact. Use [`InputManifest`] for these. +//! build process for an artifact. Use +//! [`InputManifest`](crate::input_manifest::InputManifest) for these. //! //! Artifact IDs enable _anyone_ to identify and cross-reference information for //! software artifacts without a central authority. Unlike [pURL][purl] or [CPE][cpe], @@ -41,50 +43,57 @@ //! [omnibor_spec]: https://github.com/omnibor/spec //! [purl]: https://github.com/package-url/purl-spec -// Make this public within the crate to aid with writing sealed -// traits, a pattern we use repeatedly. -pub(crate) mod sealed; +/*=============================================================================================== + * Lint Configuration + */ -// This is hidden for now, as we are not yet ready to commit to any -// stability guarantees for FFI. -#[doc(hidden)] -pub mod ffi; +#![allow(clippy::module_inception)] -// Keep modules private and just re-export the symbols we care about. -mod artifact_id; -mod embedding_mode; -mod error; -mod input_manifest; -mod input_manifest_builder; -mod into_artifact_id; -pub mod storage; -mod supported_hash; +/*=============================================================================================== + * Compilation Protections + */ + +#[cfg(not(any( + feature = "backend-rustcrypto", + feature = "backend-boringssl", + feature = "backend-openssl" +)))] +compile_error!( + r#"At least one of the "backend-rustcrypto", "backend-boringssl", \n"# + r#"\tor "backend-openssl" features must be enabled"# +); + +/*=============================================================================================== + * Internal Modules + */ +pub(crate) mod gitoid; +pub(crate) mod object_type; +pub(crate) mod util; + +/*=============================================================================================== + * Testing + */ + +#[cfg(feature = "backend-rustcrypto")] #[cfg(test)] mod test; -// Only make this public within the crate, for convenience -// elsewhere since we always expect to be using our own `Error` -// type anyway. -pub(crate) use crate::error::Result; +/*=============================================================================================== + * FFI + */ -/// Defines whether data for an [`InputManifest`] is embedded in the artifact itself. -pub mod embedding { - pub use crate::embedding_mode::Embed; - pub use crate::embedding_mode::EmbeddingMode; - pub use crate::embedding_mode::NoEmbed; -} +// Hidden since we don't want to commit to stability. +#[doc(hidden)] +pub mod ffi; -/// Defines the hash algorithms supported for [`ArtifactId`]s. -pub mod hashes { - pub use crate::supported_hash::Sha256; - pub use crate::supported_hash::SupportedHash; -} +/*=============================================================================================== + * Public API + */ -pub use crate::artifact_id::ArtifactId; -pub use crate::error::Error; -pub use crate::input_manifest::InputManifest; -pub use crate::input_manifest::Relation; -pub use crate::input_manifest_builder::InputManifestBuilder; -pub use crate::input_manifest_builder::ShouldStore; -pub use crate::into_artifact_id::IntoArtifactId; +pub mod artifact_id; +pub mod error; +pub mod hash_algorithm; +pub mod hash_provider; +pub mod input_manifest; +pub mod storage; diff --git a/omnibor/src/object_type.rs b/omnibor/src/object_type.rs new file mode 100644 index 0000000..ceaefb9 --- /dev/null +++ b/omnibor/src/object_type.rs @@ -0,0 +1,32 @@ +//! The types of objects for which a `GitOid` can be made. + +use crate::util::sealed::Sealed; + +#[cfg(doc)] +use crate::gitoid::GitOid; + +/// Object types usable to construct a [`GitOid`]. +/// +/// This is a sealed trait to ensure it's only used for hash +/// algorithms which are actually supported by Git. +/// +/// For more information on sealed traits, read Predrag +/// Gruevski's ["A Definitive Guide to Sealed Traits in Rust"][1]. +/// +/// [1]: https://predr.ag/blog/definitive-guide-to-sealed-traits-in-rust/ +pub trait ObjectType: Sealed { + #[doc(hidden)] + const NAME: &'static str; +} + +/// A Blob GitOid object. +pub struct Blob { + #[doc(hidden)] + _private: (), +} + +impl Sealed for Blob {} + +impl ObjectType for Blob { + const NAME: &'static str = "blob"; +} diff --git a/omnibor/src/sealed.rs b/omnibor/src/sealed.rs deleted file mode 100644 index 0650b2f..0000000 --- a/omnibor/src/sealed.rs +++ /dev/null @@ -1 +0,0 @@ -pub trait Sealed {} diff --git a/omnibor/src/storage.rs b/omnibor/src/storage.rs index f7d9f48..962970d 100644 --- a/omnibor/src/storage.rs +++ b/omnibor/src/storage.rs @@ -1,34 +1,31 @@ //! Defines how manifests are stored and accessed. -use crate::hashes::SupportedHash; -use crate::supported_hash::Sha256; -use crate::ArtifactId; -use crate::Error; -use crate::InputManifest; -use crate::Result; -use pathbuf::pathbuf; -use std::collections::HashMap; -use std::env::var_os; -use std::fmt::Debug; -use std::fs; -use std::fs::create_dir_all; -use std::fs::write; -use std::fs::File; -use std::io::BufRead as _; -use std::io::BufReader; -use std::io::BufWriter; -use std::io::Write as _; -use std::ops::Not as _; -use std::path::Path; -use std::path::PathBuf; -use std::str::FromStr; -use tracing::debug; -use tracing::info; -use walkdir::DirEntry; -use walkdir::WalkDir; +use { + crate::{ + artifact_id::{ArtifactId, ArtifactIdBuilder}, + error::{Error, Result}, + hash_algorithm::{HashAlgorithm, Sha256}, + hash_provider::HashProvider, + input_manifest::InputManifest, + pathbuf, + }, + std::{ + collections::HashMap, + env::var_os, + fmt::Debug, + fs::{self, create_dir_all, write, File}, + io::{BufRead as _, BufReader, BufWriter, Write as _}, + marker::PhantomData, + ops::Not as _, + path::{Path, PathBuf}, + str::FromStr, + }, + tracing::{debug, info}, + walkdir::{DirEntry, WalkDir}, +}; /// Represents the interface for storing and querying manifests. -pub trait Storage { +pub trait Storage { /// Check if we have the manifest for a specific artifact. fn has_manifest_for_artifact(&self, target_aid: ArtifactId) -> bool; @@ -58,7 +55,7 @@ pub trait Storage { fn get_manifests(&self) -> Result>>; } -impl> Storage for &mut S { +impl> Storage for &mut S { fn has_manifest_for_artifact(&self, target_aid: ArtifactId) -> bool { (**self).has_manifest_for_artifact(target_aid) } @@ -96,13 +93,15 @@ impl> Storage for &mut S { /// File system storage for [`InputManifest`]s. #[derive(Debug)] -pub struct FileSystemStorage { +pub struct FileSystemStorage> { + _hash_algorithm: PhantomData, + hash_provider: P, root: PathBuf, } -impl FileSystemStorage { +impl> FileSystemStorage { /// Start building a new [`FileSystemStorage`]. - pub fn new(root: impl AsRef) -> Result { + pub fn new(hash_provider: P, root: impl AsRef) -> Result> { let root = root.as_ref().to_owned(); if root.exists() { @@ -117,15 +116,21 @@ impl FileSystemStorage { .map_err(|e| Error::CantCreateObjectStoreDir(root.display().to_string(), e))?; } - Ok(FileSystemStorage { root }) + Ok(FileSystemStorage { + _hash_algorithm: PhantomData, + hash_provider, + root, + }) } /// Build a [`FileSystemStorage`] with a root set from /// the `OMNIBOR_DIR` environment variable. - pub fn from_env() -> Result { + pub fn from_env(hash_provider: P) -> Result> { var_os("OMNIBOR_DIR") .ok_or(Error::NoStorageRoot) .map(|root| FileSystemStorage { + _hash_algorithm: PhantomData, + hash_provider, root: PathBuf::from(root), }) } @@ -156,7 +161,7 @@ impl FileSystemStorage { } /// Get the path for storing a manifest with this [`ArtifactId`]. - fn manifest_path(&self, aid: ArtifactId) -> PathBuf { + fn manifest_path(&self, aid: ArtifactId) -> PathBuf { let kind = format!("gitoid_{}_{}", aid.object_type(), aid.hash_algorithm()); let hash = aid.as_hex(); let (prefix, remainder) = hash.split_at(2); @@ -164,7 +169,7 @@ impl FileSystemStorage { } /// Iterate over the targets of manifests currently in the object store. - fn manifests(&self) -> impl Iterator> + '_ { + fn manifests(&self) -> impl Iterator> + '_ { WalkDir::new(self.manifests_path()) .into_iter() .filter_map(|result| result.ok()) @@ -183,7 +188,7 @@ impl FileSystemStorage { } } -impl Storage for FileSystemStorage { +impl> Storage for FileSystemStorage { fn has_manifest_for_artifact(&self, target_aid: ArtifactId) -> bool { self.manifests() .any(|entry| entry.target_aid == Some(target_aid)) @@ -207,14 +212,17 @@ impl Storage for FileSystemStorage { target_aid: ArtifactId, ) -> Result>> { match self.get_manifest_for_artifact(target_aid) { - Ok(Some(manifest)) => ArtifactId::id_manifest(&manifest).map(Some), + Ok(Some(manifest)) => Ok(Some( + ArtifactIdBuilder::with_provider(self.hash_provider).identify_manifest(&manifest), + )), Ok(None) => Ok(None), Err(e) => Err(e), } } fn write_manifest(&mut self, manifest: &InputManifest) -> Result> { - let manifest_aid = ArtifactId::::id_manifest(manifest)?; + let builder = ArtifactIdBuilder::with_provider(self.hash_provider); + let manifest_aid = builder.identify_manifest(manifest); let path = self.manifest_path(manifest_aid); let parent_dirs = path .parent() @@ -223,7 +231,7 @@ impl Storage for FileSystemStorage { create_dir_all(parent_dirs) .map_err(|e| Error::CantWriteManifestDir(parent_dirs.display().to_string(), e))?; - write(&path, manifest.as_bytes()?) + write(&path, manifest.as_bytes()) .map_err(|e| Error::CantWriteManifest(path.display().to_string(), e))?; info!("wrote manifest '{}' to store", manifest_aid); @@ -250,7 +258,7 @@ impl Storage for FileSystemStorage { } } -fn artifact_id_from_dir_entry(entry: &DirEntry) -> Option> { +fn artifact_id_from_dir_entry(entry: &DirEntry) -> Option> { let gitoid_url = { let path_components = entry .path() @@ -273,7 +281,7 @@ fn artifact_id_from_dir_entry(entry: &DirEntry) -> Option { +struct ManifestsEntry { /// The [`ArtifactId`] of the target artifact. target_aid: Option>, @@ -281,7 +289,7 @@ struct ManifestsEntry { manifest_path: PathBuf, } -impl ManifestsEntry { +impl ManifestsEntry { /// Load the [`InputManifest`] represented by this entry. fn manifest(&self) -> Result> { let mut manifest = InputManifest::from_path(&self.manifest_path)?; @@ -310,7 +318,7 @@ impl TargetIndex { } /// Find an entry for a specific manifest [`ArtifactId`]. - fn find(&self, manifest_aid: ArtifactId) -> Result>> { + fn find(&self, manifest_aid: ArtifactId) -> Result>> { let file = File::open(&self.path) .map_err(|e| Error::CantOpenTargetIndex(self.path.display().to_string(), e))?; @@ -337,19 +345,19 @@ impl TargetIndex { // Begin an "upsert" operation in the [`TargetIndex`]. // // This either updates or inserts, as appropriate, into the index. - fn upsert(&self) -> TargetIndexUpsert { + fn upsert(&self) -> TargetIndexUpsert { let root = self.path.parent().unwrap(); TargetIndexUpsert::new(root) } } -struct TargetIndexUpsert { +struct TargetIndexUpsert { root: PathBuf, manifest_aid: Option>, target_aid: Option>, } -impl TargetIndexUpsert { +impl TargetIndexUpsert { /// Start a new upsert operation. fn new(root: impl AsRef) -> Self { TargetIndexUpsert { @@ -445,16 +453,22 @@ impl TargetIndexUpsert { /// may be useful in other applications where you only care about producing and using /// manifests in the short-term, and not in persisting them to a disk or some other /// durable location. -#[derive(Debug, Default)] -pub struct InMemoryStorage { +#[derive(Debug)] +pub struct InMemoryStorage> { + /// The cryptography library providing a hash implementation. + hash_provider: P, + /// Stored SHA-256 [`InputManifest`]s. sha256_manifests: Vec>, } -impl InMemoryStorage { +impl> InMemoryStorage

{ /// Construct a new `InMemoryStorage` instance. - pub fn new() -> Self { - InMemoryStorage::default() + pub fn new(hash_provider: P) -> Self { + Self { + hash_provider, + sha256_manifests: Vec::new(), + } } /// Find the manifest entry that matches the target [`ArtifactId`] @@ -468,7 +482,7 @@ impl InMemoryStorage { } } -impl Storage for InMemoryStorage { +impl> Storage for InMemoryStorage

{ fn has_manifest_for_artifact(&self, target_aid: ArtifactId) -> bool { self.match_by_target_aid(target_aid).is_some() } @@ -492,7 +506,8 @@ impl Storage for InMemoryStorage { } fn write_manifest(&mut self, manifest: &InputManifest) -> Result> { - let manifest_aid = ArtifactId::::id_manifest(manifest)?; + let builder = ArtifactIdBuilder::with_provider(self.hash_provider); + let manifest_aid = builder.identify_manifest(manifest); self.sha256_manifests.push(ManifestEntry { manifest_aid, @@ -525,7 +540,7 @@ impl Storage for InMemoryStorage { } /// An entry in the in-memory manifest storage. -struct ManifestEntry { +struct ManifestEntry { /// The [`ArtifactId`] of the manifest. manifest_aid: ArtifactId, @@ -533,7 +548,7 @@ struct ManifestEntry { manifest: InputManifest, } -impl Debug for ManifestEntry { +impl Debug for ManifestEntry { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("ManifestEntry") .field("manifest_aid", &self.manifest_aid) @@ -542,7 +557,7 @@ impl Debug for ManifestEntry { } } -impl Clone for ManifestEntry { +impl Clone for ManifestEntry { fn clone(&self) -> Self { ManifestEntry { manifest_aid: self.manifest_aid, @@ -554,15 +569,16 @@ impl Clone for ManifestEntry { #[cfg(test)] mod tests { use super::FileSystemStorage; - use crate::hashes::Sha256; - use crate::ArtifactId; - use pathbuf::pathbuf; + use crate::{artifact_id::ArtifactId, hash_algorithm::Sha256, pathbuf}; use std::str::FromStr; + #[cfg(feature = "backend-rustcrypto")] #[test] fn correct_aid_storage_path() { + use crate::hash_provider::RustCrypto; + let root = pathbuf![env!("CARGO_MANIFEST_DIR"), "test", "fs_storage"]; - let storage = FileSystemStorage::new(&root).unwrap(); + let storage = FileSystemStorage::new(RustCrypto::new(), &root).unwrap(); let aid = ArtifactId::::from_str( "gitoid:blob:sha256:9d09789f20162dca6d80d2d884f46af22c824f6409d4f447332d079a2d1e364f", diff --git a/omnibor/src/supported_hash.rs b/omnibor/src/supported_hash.rs deleted file mode 100644 index 774acbe..0000000 --- a/omnibor/src/supported_hash.rs +++ /dev/null @@ -1,21 +0,0 @@ -use crate::sealed::Sealed; -#[cfg(doc)] -use crate::ArtifactId; -use gitoid::HashAlgorithm; - -/// Marker trait for hash algorithms supported for constructing [`ArtifactId`]s. -pub trait SupportedHash: Sealed { - type HashAlgorithm: HashAlgorithm; -} - -/// The SHA-256 hashing algorithm. -pub struct Sha256 { - #[doc(hidden)] - _private: (), -} - -impl Sealed for Sha256 {} - -impl SupportedHash for Sha256 { - type HashAlgorithm = gitoid::rustcrypto::Sha256; -} diff --git a/omnibor/src/test.rs b/omnibor/src/test.rs index a49dbb7..9d4f2b0 100644 --- a/omnibor/src/test.rs +++ b/omnibor/src/test.rs @@ -1,33 +1,211 @@ //! Tests against the OmniBOR crate as a whole. -use crate::hashes::Sha256; -use crate::hashes::SupportedHash; -use crate::ArtifactId; -use digest::OutputSizeUser; -use gitoid::HashAlgorithm; -use std::mem::size_of; +use { + crate::{ + artifact_id::{ArtifactId, ArtifactIdBuilder}, + hash_algorithm::Sha256, + }, + anyhow::Result, + serde_test::{assert_tokens, Token}, + std::fs::File, + tokio::{fs::File as AsyncFile, runtime::Runtime}, + url::Url, +}; -/// Get the underlying 'Digest'-implementing type for the Sha256 algorithm. -type Sha256Alg = <::HashAlgorithm as HashAlgorithm>::Alg; +/// SHA-256 hash of a file containing "hello world" +/// +/// Taken from a Git repo as ground truth. +const ARTIFACT_ID_HELLO_WORLD_SHA256: &str = + "gitoid:blob:sha256:fee53a18d32820613c0527aa79be5cb30173c823a9b448fa4817767cc84c6f03"; /// ArtifactID should be exactly 32 bytes, the size of the buffer. #[test] fn artifact_id_sha256_size() { - assert_eq!(size_of::>(), Sha256Alg::output_size()); + assert_eq!(size_of::>(), 32); } -#[cfg(feature = "serde")] -mod serde_test { - use crate::hashes::Sha256; - use crate::ArtifactId; - use serde_test::assert_tokens; - use serde_test::Token; +#[test] +fn generate_sha256_artifact_id_from_bytes() { + let input = b"hello world"; + let result = ArtifactIdBuilder::with_rustcrypto().identify_bytes(input); + + assert_eq!(result.to_string(), ARTIFACT_ID_HELLO_WORLD_SHA256); +} + +#[test] +fn generate_sha256_artifact_id_from_buffer() -> Result<()> { + let mut file = File::open("test/data/hello_world.txt")?; + let result = ArtifactIdBuilder::with_rustcrypto().identify_file(&mut file)?; + + assert_eq!(result.to_string(), ARTIFACT_ID_HELLO_WORLD_SHA256); + + Ok(()) +} + +#[test] +fn generate_sha256_artifact_id_from_async_buffer() -> Result<()> { + let runtime = Runtime::new()?; + runtime.block_on(async { + let mut file = AsyncFile::open("test/data/hello_world.txt").await?; + let result = ArtifactIdBuilder::with_rustcrypto() + .identify_async_file(&mut file) + .await?; + + assert_eq!(result.to_string(), ARTIFACT_ID_HELLO_WORLD_SHA256); + + Ok(()) + }) +} + +#[test] +fn newline_normalization_from_file() -> Result<()> { + let mut unix_file = File::open("test/data/unix_line.txt")?; + let mut windows_file = File::open("test/data/windows_line.txt")?; + + let builder = ArtifactIdBuilder::with_rustcrypto(); + + let unix_artifact_id = builder.identify_file(&mut unix_file)?; + let windows_artifact_id = builder.identify_file(&mut windows_file)?; + + assert_eq!( + unix_artifact_id.to_string(), + windows_artifact_id.to_string() + ); + + Ok(()) +} + +#[test] +fn newline_normalization_from_async_file() -> Result<()> { + let runtime = Runtime::new()?; + runtime.block_on(async { + let mut unix_file = AsyncFile::open("test/data/unix_line.txt").await?; + let mut windows_file = AsyncFile::open("test/data/windows_line.txt").await?; + + let builder = ArtifactIdBuilder::with_rustcrypto(); + + let unix_artifact_id = builder.identify_async_file(&mut unix_file).await?; + let windows_artifact_id = builder.identify_async_file(&mut windows_file).await?; + + assert_eq!( + unix_artifact_id.to_string(), + windows_artifact_id.to_string() + ); + + Ok(()) + }) +} + +#[test] +fn newline_normalization_in_memory() -> Result<()> { + let with_crlf = b"some\r\nstring\r\n"; + let wout_crlf = b"some\nstring\n"; + + let builder = ArtifactIdBuilder::with_rustcrypto(); + + let with_crlf_artifact_id = builder.identify_bytes(&with_crlf[..]); + let wout_crlf_artifact_id = builder.identify_bytes(&wout_crlf[..]); + + assert_eq!( + with_crlf_artifact_id.to_string(), + wout_crlf_artifact_id.to_string() + ); + + Ok(()) +} + +#[test] +fn validate_uri() -> Result<()> { + let content = b"hello world"; + let artifact_id = ArtifactIdBuilder::with_rustcrypto().identify_bytes(content); - #[test] - fn valid_artifact_id_ser_de() { - let id = ArtifactId::::id_str("hello, world"); + assert_eq!( + artifact_id.url().to_string(), + ARTIFACT_ID_HELLO_WORLD_SHA256 + ); - // This validates both serialization and deserialization. - assert_tokens(&id, &[Token::Str("gitoid:blob:sha256:7d0be525d6521168c74051e5ab1b99e3b6d1c962fba763818f1954ab9e1c821a")]); + Ok(()) +} + +#[test] +fn try_from_url_bad_scheme() { + let url = Url::parse("gitiod:blob:sha1:95d09f2b10159347eece71399a7e2e907ea3df4f").unwrap(); + + match ArtifactId::::try_from_url(url) { + Ok(_) => panic!("parsing should fail"), + Err(e) => assert_eq!( + e.to_string(), + "invalid scheme in URL 'gitiod:blob:sha1:95d09f2b10159347eece71399a7e2e907ea3df4f'" + ), + } +} + +#[test] +fn try_from_url_missing_object_type() { + let url = Url::parse("gitoid:").unwrap(); + + match ArtifactId::::try_from_url(url) { + Ok(_) => panic!("parsing should fail"), + Err(e) => assert_eq!(e.to_string(), "missing object type in URL 'gitoid:'"), } } + +#[test] +fn try_from_url_bad_object_type() { + let url = Url::parse("gitoid:whatever").unwrap(); + + match ArtifactId::::try_from_url(url) { + Ok(_) => panic!("parsing should fail"), + Err(e) => assert_eq!(e.to_string(), "mismatched object type; expected 'blob'"), + } +} + +#[test] +fn try_from_url_missing_hash_algorithm() { + let url = Url::parse("gitoid:blob:").unwrap(); + + match ArtifactId::::try_from_url(url) { + Ok(_) => panic!("parsing should fail"), + Err(e) => assert_eq!( + e.to_string(), + "missing hash algorithm in URL 'gitoid:blob:'" + ), + } +} + +#[test] +fn try_from_url_bad_hash_algorithm() { + let url = Url::parse("gitoid:blob:sha10000").unwrap(); + + match ArtifactId::::try_from_url(url) { + Ok(_) => panic!("parsing should fail"), + Err(e) => assert_eq!( + e.to_string(), + "mismatched hash algorithm; expected 'sha256'" + ), + } +} + +#[test] +fn try_from_url_missing_hash() { + let url = Url::parse("gitoid:blob:sha256:").unwrap(); + + match ArtifactId::::try_from_url(url) { + Ok(_) => panic!("parsing should fail"), + Err(e) => assert_eq!(e.to_string(), "missing hash in URL 'gitoid:blob:sha256:'"), + } +} + +#[test] +fn try_url_roundtrip() { + let url = Url::parse(ARTIFACT_ID_HELLO_WORLD_SHA256).unwrap(); + let artifact_id = ArtifactId::::try_from_url(url.clone()).unwrap(); + let output = artifact_id.url(); + assert_eq!(url, output); +} + +#[test] +fn valid_artifact_id_ser_de() { + let id = ArtifactIdBuilder::with_rustcrypto().identify_string("hello world"); + assert_tokens(&id, &[Token::Str(ARTIFACT_ID_HELLO_WORLD_SHA256)]); +} diff --git a/gitoid/src/util/for_each_buf_fill.rs b/omnibor/src/util/for_each_buf_fill.rs similarity index 88% rename from gitoid/src/util/for_each_buf_fill.rs rename to omnibor/src/util/for_each_buf_fill.rs index 12e0cde..0bfad58 100644 --- a/gitoid/src/util/for_each_buf_fill.rs +++ b/omnibor/src/util/for_each_buf_fill.rs @@ -1,9 +1,5 @@ -use crate::Result; +use {crate::error::Result, std::io::BufRead}; -#[cfg(feature = "std")] -use std::io::BufRead; - -#[cfg(feature = "std")] /// Helper extension trait to give a convenient way to iterate over /// chunks sized to the size of the internal buffer of the reader. pub(crate) trait ForEachBufFill: BufRead { @@ -12,7 +8,6 @@ pub(crate) trait ForEachBufFill: BufRead { fn for_each_buf_fill(&mut self, f: impl FnMut(&[u8])) -> Result; } -#[cfg(feature = "std")] impl ForEachBufFill for R { fn for_each_buf_fill(&mut self, mut f: impl FnMut(&[u8])) -> Result { let mut total_read = 0; diff --git a/omnibor/src/util/mod.rs b/omnibor/src/util/mod.rs new file mode 100644 index 0000000..9201eaa --- /dev/null +++ b/omnibor/src/util/mod.rs @@ -0,0 +1,4 @@ +pub mod for_each_buf_fill; +pub mod pathbuf; +pub mod sealed; +pub mod stream_len; diff --git a/omnibor/src/util/pathbuf.rs b/omnibor/src/util/pathbuf.rs new file mode 100644 index 0000000..166ef9d --- /dev/null +++ b/omnibor/src/util/pathbuf.rs @@ -0,0 +1,17 @@ +#[macro_export] +#[doc(hidden)] +macro_rules! pathbuf { + ( $( $part:expr ),* ) => {{ + use std::path::PathBuf; + + let mut temp = PathBuf::new(); + + $( + temp.push($part); + )* + + temp + }}; + + ($( $part:expr, )*) => ($crate::pathbuf![$($part),*]) +} diff --git a/gitoid/src/sealed.rs b/omnibor/src/util/sealed.rs similarity index 100% rename from gitoid/src/sealed.rs rename to omnibor/src/util/sealed.rs diff --git a/gitoid/src/util/stream_len.rs b/omnibor/src/util/stream_len.rs similarity index 91% rename from gitoid/src/util/stream_len.rs rename to omnibor/src/util/stream_len.rs index 993cbb3..9bebf60 100644 --- a/gitoid/src/util/stream_len.rs +++ b/omnibor/src/util/stream_len.rs @@ -1,10 +1,8 @@ -use crate::Result; - -#[cfg(feature = "async")] -use tokio::io::{AsyncSeek, AsyncSeekExt as _}; - -#[cfg(feature = "std")] -use std::io::{Seek, SeekFrom}; +use { + crate::error::Result, + std::io::{Seek, SeekFrom}, + tokio::io::{AsyncSeek, AsyncSeekExt as _}, +}; // Adapted from the Rust standard library's unstable implementation // of `Seek::stream_len`. @@ -36,7 +34,6 @@ use std::io::{Seek, SeekFrom}; // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. -#[cfg(feature = "std")] pub(crate) fn stream_len(mut stream: R) -> Result where R: Seek, @@ -54,7 +51,6 @@ where } /// An async equivalent of `stream_len`. -#[cfg(feature = "async")] pub(crate) async fn async_stream_len(mut stream: R) -> Result where R: AsyncSeek + Unpin, diff --git a/gitoid/test/data/unix_line.txt b/omnibor/test/data/unix_line.txt similarity index 100% rename from gitoid/test/data/unix_line.txt rename to omnibor/test/data/unix_line.txt diff --git a/gitoid/test/data/windows_line.txt b/omnibor/test/data/windows_line.txt similarity index 100% rename from gitoid/test/data/windows_line.txt rename to omnibor/test/data/windows_line.txt