diff --git a/.github/workflows/mirror.yml b/.github/workflows/mirror.yml index 22a4a112..849195e9 100644 --- a/.github/workflows/mirror.yml +++ b/.github/workflows/mirror.yml @@ -26,8 +26,14 @@ jobs: - name: Generate backend files run: | - mkdir tmp/ - cargo run --locked --release --bin upki-mirror -- tmp/ production --manifest-comment="$GITHUB_REPOSITORY run $GITHUB_RUN_ID" + mkdir -p tmp/revocation/ tmp/intermediates + cargo run --locked --release --bin mozilla-crlite -- tmp/revocation/ production --manifest-comment="$GITHUB_REPOSITORY run $GITHUB_RUN_ID" + # backwards compatible location & manifest + cp tmp/revocation/* tmp/ + jq '. + {filters: .files} | del(.files)' tmp/manifest.json > tmp/manifest.json.new + mv tmp/manifest.json.new tmp/manifest.json + + cargo run --locked --release --bin intermediates -- tmp/intermediates/ --manifest-comment="$GITHUB_REPOSITORY run $GITHUB_RUN_ID" - name: Package and upload artifact uses: actions/upload-pages-artifact@v4 diff --git a/Cargo.lock b/Cargo.lock index 651c4659..4a819fc0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2695,9 +2695,11 @@ version = "0.1.0" dependencies = [ "aws-lc-rs", "clap", + "csv", "eyre", "hex", "reqwest", + "rustls-pki-types", "serde", "serde_json", "tokio", diff --git a/Cargo.toml b/Cargo.toml index 4c03e85f..76e21d33 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ chrono = { version = "0.4.42", features = ["alloc"], default-features = false } clap = { version = "4.5", features = ["derive"] } clubcard-crlite = "0.3.2" criterion = "0.8" +csv = "1.4" directories = "6" eyre = "0.6" hex = { version = "0.4", features = ["serde"] } diff --git a/upki-mirror/Cargo.toml b/upki-mirror/Cargo.toml index 85158d21..c35c810d 100644 --- a/upki-mirror/Cargo.toml +++ b/upki-mirror/Cargo.toml @@ -9,9 +9,11 @@ repository.workspace = true [dependencies] aws-lc-rs.workspace = true clap.workspace = true +csv.workspace = true eyre.workspace = true hex.workspace = true reqwest.workspace = true +rustls-pki-types.workspace = true serde.workspace = true serde_json.workspace = true tokio.workspace = true diff --git a/upki-mirror/src/bin/intermediates.rs b/upki-mirror/src/bin/intermediates.rs new file mode 100644 index 00000000..d4cf38fa --- /dev/null +++ b/upki-mirror/src/bin/intermediates.rs @@ -0,0 +1,154 @@ +use core::time::Duration; +use std::collections::HashMap; +use std::fs; +use std::path::PathBuf; +use std::time::SystemTime; + +use aws_lc_rs::digest::{SHA256, digest}; +use clap::Parser; +use eyre::{Context, Report, anyhow}; +use rustls_pki_types::CertificateDer; +use rustls_pki_types::pem::PemObject; +use serde::Deserialize; +use upki::revocation::{Manifest, ManifestFile}; + +#[tokio::main(flavor = "current_thread")] +async fn main() -> Result<(), Report> { + let opts = Opts::try_parse()?; + + let client = reqwest::Client::builder() + .use_rustls_tls() + .timeout(Duration::from_secs(opts.http_timeout_secs)) + .user_agent(format!( + "{}/{} ({})", + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_VERSION"), + env!("CARGO_PKG_REPOSITORY") + )) + .build() + .wrap_err("failed to create HTTP client")?; + + let response = client + .get("https://ccadb.my.salesforce-sites.com/mozilla/MozillaIntermediateCertsCSVReport") + .send() + .await + .wrap_err("records request failed")?; + + if !response.status().is_success() { + return Err(anyhow!( + "HTTP error for records request: {}", + response.status() + )); + } + + let csv_bytes = response + .bytes() + .await + .wrap_err("failed to receive CSV body")?; + + let intermediates = csv::ReaderBuilder::new() + .has_headers(true) + .from_reader(&mut csv_bytes.as_ref()) + .into_deserialize::() + .collect::, _>>() + .wrap_err("failed to parse CSV")?; + + println!("we have {} intermediates", intermediates.len()); + + // we bucket intermediates into up to 256 files, by the first byte of the + // sha256-hash of their DER value. + // + // that means the manifest contains up to 256 items, and the filenames are small. + let mut buckets: HashMap> = HashMap::new(); + + for i in intermediates { + let der = CertificateDer::from_pem_slice(i.pem.as_bytes()).wrap_err("cannot parse PEM")?; + + // check hash matches + let actual_hash = digest(&SHA256, &der); + if i.sha256 != actual_hash.as_ref() { + return Err(anyhow!("cert {i:?} does not have correct hash")); + } + + let bucket = i.sha256[0]; + buckets + .entry(bucket) + .or_default() + .push(i); + } + + let mut files = Vec::new(); + for (bucket, certs) in buckets { + let filename = format!("{bucket:02x?}.pem",); + + let mut contents = String::new(); + for inter in certs { + contents.push_str(&inter.pem); + contents.push('\n'); + } + + fs::write(opts.output_dir.join(&filename), &contents).wrap_err("cannot write PEM file")?; + let hash = digest(&SHA256, contents.as_bytes()); + + files.push(ManifestFile { + filename, + size: contents.len(), + hash: hash.as_ref().to_vec(), + }); + } + + let manifest = Manifest { + generated_at: SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs(), + comment: opts.manifest_comment.clone(), + files, + }; + let output_filename = opts.output_dir.join("manifest.json"); + fs::write( + output_filename, + serde_json::to_string(&manifest) + .wrap_err("cannot encode JSON manifest")? + .as_bytes(), + ) + .wrap_err_with(|| "cannot write manifest to {output_filename:?}")?; + + Ok(()) +} + +#[derive(Debug, Parser)] +struct Opts { + /// Where to write output files. This must exist. + output_dir: PathBuf, + + /// Timeout in seconds for all HTTP requests. + #[clap(long, default_value_t = 10)] + http_timeout_secs: u64, + + /// Comment included in output manifest. + #[clap(long, default_value = "")] + manifest_comment: String, +} + +#[non_exhaustive] +#[derive(Debug, Clone, Hash, Eq, PartialEq, Deserialize)] +struct IntermediateData { + #[serde(rename = "Subject")] + subject: String, + + #[serde(rename = "Issuer")] + issuer: String, + + #[serde(rename = "SHA256", with = "hex::serde")] + sha256: [u8; 32], + + #[serde(rename = "Full CRL Issued By This CA")] + full_crl: String, + + #[serde(rename = "PEM")] + pem: String, + + #[serde(rename = "JSON Array of Partitioned CRLs")] + json_crls: String, +} diff --git a/upki-mirror/src/main.rs b/upki-mirror/src/bin/mozilla-crlite.rs similarity index 82% rename from upki-mirror/src/main.rs rename to upki-mirror/src/bin/mozilla-crlite.rs index dce19c99..dca4619f 100644 --- a/upki-mirror/src/main.rs +++ b/upki-mirror/src/bin/mozilla-crlite.rs @@ -7,9 +7,7 @@ use std::time::SystemTime; use aws_lc_rs::digest::{SHA256, digest}; use clap::{Parser, ValueEnum}; use eyre::{Context, Report, anyhow}; -use upki::revocation::{Filter, Manifest}; - -mod mozilla; +use upki::revocation::{Manifest, ManifestFile}; #[tokio::main(flavor = "current_thread")] async fn main() -> Result<(), Report> { @@ -20,7 +18,7 @@ async fn main() -> Result<(), Report> { .use_rustls_tls() .timeout(Duration::from_secs(opts.http_timeout_secs)) .user_agent(format!( - "{} v{} ({})", + "{}/{} ({})", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"), env!("CARGO_PKG_REPOSITORY") @@ -68,7 +66,7 @@ async fn main() -> Result<(), Report> { download_plan.push(item); } - let mut filters = Vec::new(); + let mut files = Vec::new(); for p in download_plan { let attachment_url = source.attachment_host.to_string() + &p.attachment.location; @@ -100,7 +98,7 @@ async fn main() -> Result<(), Report> { fs::write(&output_filename, bytes) .wrap_err_with(|| format!("cannot write filter data to {output_filename:?}",))?; - filters.push(Filter { + files.push(ManifestFile { filename: p.attachment.filename.clone(), size: p.attachment.size, hash: p.attachment.hash.clone(), @@ -113,7 +111,7 @@ async fn main() -> Result<(), Report> { .unwrap() .as_secs(), comment: opts.manifest_comment.clone(), - filters, + files, }; let output_filename = opts.output_dir.join("manifest.json"); fs::write( @@ -168,3 +166,38 @@ const MOZILLA_PROD: Source = Source { records_url: "https://firefox.settings.services.mozilla.com/v1/buckets/security-state/collections/cert-revocations/records", attachment_host: "https://firefox-settings-attachments.cdn.mozilla.net/", }; + +/// JSON structures used in the Mozilla preferences service. +mod mozilla { + use serde::Deserialize; + + #[derive(Debug, Deserialize)] + pub(crate) struct Manifest { + pub(crate) data: Vec, + } + + #[derive(Clone, Debug, Deserialize)] + pub(crate) struct Item { + pub(crate) attachment: Attachment, + pub(crate) channel: Channel, + pub(crate) id: String, + pub(crate) incremental: bool, + pub(crate) parent: Option, + } + + #[derive(Clone, Debug, Deserialize, PartialEq)] + #[serde(rename_all = "snake_case")] + pub(crate) enum Channel { + Default, + Compat, + } + + #[derive(Clone, Debug, Deserialize)] + pub(crate) struct Attachment { + #[serde(with = "hex::serde")] + pub hash: Vec, + pub size: usize, + pub filename: String, + pub location: String, + } +} diff --git a/upki-mirror/src/mozilla.rs b/upki-mirror/src/mozilla.rs deleted file mode 100644 index a05ccdc3..00000000 --- a/upki-mirror/src/mozilla.rs +++ /dev/null @@ -1,33 +0,0 @@ -//! JSON structures used in the Mozilla preferences service. - -use serde::Deserialize; - -#[derive(Debug, Deserialize)] -pub(crate) struct Manifest { - pub(crate) data: Vec, -} - -#[derive(Clone, Debug, Deserialize)] -pub(crate) struct Item { - pub(crate) attachment: Attachment, - pub(crate) channel: Channel, - pub(crate) id: String, - pub(crate) incremental: bool, - pub(crate) parent: Option, -} - -#[derive(Clone, Debug, Deserialize, PartialEq)] -#[serde(rename_all = "snake_case")] -pub(crate) enum Channel { - Default, - Compat, -} - -#[derive(Clone, Debug, Deserialize)] -pub(crate) struct Attachment { - #[serde(with = "hex::serde")] - pub hash: Vec, - pub size: usize, - pub filename: String, - pub location: String, -} diff --git a/upki/src/revocation/fetch.rs b/upki/src/revocation/fetch.rs index 3e672c60..1ede7a0b 100644 --- a/upki/src/revocation/fetch.rs +++ b/upki/src/revocation/fetch.rs @@ -20,7 +20,7 @@ use std::process::ExitCode; use aws_lc_rs::digest; use tracing::{debug, info}; -use super::{Error, Filter, Manifest}; +use super::{Error, Manifest, ManifestFile}; use crate::Config; /// Update the local revocation cache by fetching updates over the network. @@ -40,7 +40,7 @@ pub async fn fetch(dry_run: bool, config: &Config) -> Result { .use_rustls_tls() .timeout(Duration::from_secs(REQUEST_TIMEOUT)) .user_agent(format!( - "{} v{} ({})", + "{}/{} ({})", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"), env!("CARGO_PKG_REPOSITORY") @@ -145,16 +145,16 @@ impl Plan { steps.push(PlanStep::CreateDir(local.to_owned())); } - for filter in &manifest.filters { - unwanted_files.remove(Path::new(&filter.filename)); + for file in &manifest.files { + unwanted_files.remove(Path::new(&file.filename)); - let path = local.join(&filter.filename); + let path = local.join(&file.filename); match hash_file(&path) { - Ok(digest) if digest.as_ref() == filter.hash => continue, + Ok(digest) if digest.as_ref() == file.hash => continue, _ => {} } - steps.push(PlanStep::download(filter, remote_url, local)); + steps.push(PlanStep::download(file, remote_url, local)); } steps.push(PlanStep::SaveManifest { @@ -174,7 +174,7 @@ impl Plan { self.steps .iter() .filter_map(|s| match s { - PlanStep::Download { filter, .. } => Some(filter.size), + PlanStep::Download { file, .. } => Some(file.size), _ => None, }) .sum() @@ -185,9 +185,9 @@ impl Plan { enum PlanStep { CreateDir(PathBuf), - /// Download `filter` from `remote` to `local` + /// Download `file` from `remote` to `local` Download { - filter: Filter, + file: ManifestFile, /// URL. remote_url: String, /// Full path to output file. @@ -211,11 +211,11 @@ impl PlanStep { fs::create_dir_all(&path).map_err(|error| Error::CreateDirectory { error, path })? } Self::Download { - filter, + file, remote_url, local, } => { - debug!("downloading {:?}", filter); + debug!("downloading {:?}", file); let response = client .get(&remote_url) @@ -245,7 +245,7 @@ impl PlanStep { })?; match hash_file(&local) { - Ok(digest) if digest.as_ref() == filter.hash => {} + Ok(digest) if digest.as_ref() == file.hash => {} Ok(_) => return Err(Error::HashMismatch(local)), Err(error) => { return Err(Error::FileRead { @@ -282,11 +282,11 @@ impl PlanStep { Ok(()) } - fn download(filter: &Filter, remote_url: &str, local: &Path) -> Self { + fn download(file: &ManifestFile, remote_url: &str, local: &Path) -> Self { Self::Download { - filter: filter.clone(), - remote_url: format!("{remote_url}{}", filter.filename), - local: local.join(&filter.filename), + file: file.clone(), + remote_url: format!("{remote_url}{}", file.filename), + local: local.join(&file.filename), } } } @@ -296,13 +296,13 @@ impl fmt::Display for PlanStep { match self { Self::CreateDir(path) => write!(f, "create directory {path:?}"), Self::Download { - filter, + file, remote_url, local, } => write!( f, "download {} bytes from {remote_url} to {local:?}", - filter.size + file.size ), Self::Delete(path) => write!(f, "delete stale file {path:?}"), Self::SaveManifest { local_dir, .. } => { diff --git a/upki/src/revocation/mod.rs b/upki/src/revocation/mod.rs index 90c9704e..e929b52e 100644 --- a/upki/src/revocation/mod.rs +++ b/upki/src/revocation/mod.rs @@ -32,8 +32,9 @@ pub struct Manifest { /// Some human-readable text. pub comment: String, - /// List of filter files. - pub filters: Vec, + /// List of required files. + #[serde(alias = "filters")] + pub files: Vec, } impl Manifest { @@ -72,7 +73,7 @@ impl Manifest { ) -> Result { let key = input.key(); let cache_dir = config.revocation_cache_dir(); - for f in &self.filters { + for f in &self.files { let path = cache_dir.join(&f.filename); let bytes = match fs::read(&path) { Ok(bytes) => bytes, @@ -136,7 +137,7 @@ impl Manifest { /// Manifest data for a single crlite filter file. #[derive(Clone, Debug, Deserialize, Serialize)] -pub struct Filter { +pub struct ManifestFile { /// Relative filename. /// /// This is also the suggested local filename. diff --git a/upki/tests/data/evolution/revocation/manifest.json b/upki/tests/data/evolution/revocation/manifest.json index 9dacf444..7e17558d 100644 --- a/upki/tests/data/evolution/revocation/manifest.json +++ b/upki/tests/data/evolution/revocation/manifest.json @@ -1,7 +1,7 @@ { "generated_at": 1765446031, "comment": "evolution from typical test manifest", - "filters": [ + "files": [ { "filename": "filter1.filter", "size": 11, diff --git a/upki/tests/data/typical/revocation/manifest.json b/upki/tests/data/typical/revocation/manifest.json index 2841a860..6f99231e 100644 --- a/upki/tests/data/typical/revocation/manifest.json +++ b/upki/tests/data/typical/revocation/manifest.json @@ -1,7 +1,7 @@ { "generated_at": 1765445031, "comment": "typical test manifest", - "filters": [ + "files": [ { "filename": "filter1.filter", "size": 11, diff --git a/upki/tests/data/verify_of_empty_manifest/revocation/manifest.json b/upki/tests/data/verify_of_empty_manifest/revocation/manifest.json index a7c4c565..04372747 100644 --- a/upki/tests/data/verify_of_empty_manifest/revocation/manifest.json +++ b/upki/tests/data/verify_of_empty_manifest/revocation/manifest.json @@ -1,5 +1,5 @@ { "generated_at": 1765445031, "comment": "empty manifest", - "filters": [] + "files": [] } diff --git a/upki/tests/integration.rs b/upki/tests/integration.rs index 7fdbbf5f..fce25b61 100644 --- a/upki/tests/integration.rs +++ b/upki/tests/integration.rs @@ -157,7 +157,7 @@ fn fetch_of_empty_manifest() { "); assert_snapshot!( server.into_log(), - @"GET /manifest.json -> 200 OK (81 bytes)" + @"GET /manifest.json -> 200 OK (79 bytes)" ); assert_eq!( list_dir(&temp.path().join("revocation")), @@ -186,7 +186,7 @@ fn full_fetch() { assert_snapshot!( server.into_log(), @r" - GET /manifest.json -> 200 OK (532 bytes) + GET /manifest.json -> 200 OK (530 bytes) GET /filter1.filter -> 200 OK (11 bytes) GET /filter2.delta -> 200 OK (14 bytes) GET /filter3.delta -> 200 OK (10 bytes) @@ -223,7 +223,7 @@ fn full_fetch_and_incremental_update() { assert_snapshot!( server.into_log(), @r" - GET /manifest.json -> 200 OK (532 bytes) + GET /manifest.json -> 200 OK (530 bytes) GET /filter1.filter -> 200 OK (11 bytes) GET /filter2.delta -> 200 OK (14 bytes) GET /filter3.delta -> 200 OK (10 bytes) @@ -257,7 +257,7 @@ fn full_fetch_and_incremental_update() { assert_snapshot!( server.into_log(), @r" - GET /manifest.json -> 200 OK (547 bytes) + GET /manifest.json -> 200 OK (545 bytes) GET /filter4.delta -> 200 OK (3 bytes) "); // filter2 is deleted (stale), filter4 is new @@ -314,7 +314,7 @@ fn typical_incremental_fetch() { assert_snapshot!( server.into_log(), @r" - GET /manifest.json -> 200 OK (532 bytes) + GET /manifest.json -> 200 OK (530 bytes) GET /filter2.delta -> 200 OK (14 bytes) ");