diff --git a/Cargo.lock b/Cargo.lock index ce7d1d682f4..ff4237097c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2018,6 +2018,7 @@ dependencies = [ "dropshot", "expectorate", "futures", + "http", "ipnetwork", "mockall", "nexus-client", diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 0a14e8f2cfc..99f3c6277d4 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -569,6 +569,7 @@ pub enum ResourceType { Project, Dataset, Disk, + DownloadArtifact, Instance, NetworkInterface, Rack, diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 92706dc279f..1954c3328c5 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use crate::nexus::BASE_ARTIFACT_DIR; /** * Handler functions (entrypoints) for HTTP APIs internal to the control plane */ @@ -19,6 +20,8 @@ use dropshot::HttpResponseUpdatedNoContent; use dropshot::Path; use dropshot::RequestContext; use dropshot::TypedBody; +use http::{Response, StatusCode}; +use hyper::Body; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::InstanceRuntimeState; use omicron_common::api::internal::nexus::ProducerEndpoint; @@ -26,6 +29,7 @@ use oximeter::types::ProducerResults; use oximeter_producer::{collect, ProducerIdPathParams}; use schemars::JsonSchema; use serde::Deserialize; +use std::path::PathBuf; use std::sync::Arc; use uuid::Uuid; @@ -44,6 +48,7 @@ pub fn internal_api() -> NexusApiDescription { api.register(cpapi_producers_post)?; api.register(cpapi_collectors_post)?; api.register(cpapi_metrics_collect)?; + api.register(cpapi_artifact_download)?; Ok(()) } @@ -280,3 +285,76 @@ async fn cpapi_metrics_collect( .instrument_dropshot_handler(&request_context, handler) .await } + +#[derive(Deserialize, JsonSchema)] +struct AllPath { + path: String, +} + +/// Endpoint used by Sled Agents to download cached artifacts. +#[endpoint { + method = GET, + path = "/artifacts/{path}", +}] +async fn cpapi_artifact_download( + request_context: Arc>>, + path: Path, +) -> Result, HttpError> { + let context = request_context.context(); + let nexus = &context.nexus; + let mut entry = PathBuf::from(BASE_ARTIFACT_DIR); + + // TODO: Most of the below code is ready to accept a multi-component path, + // such as in: + // https://github.com/oxidecomputer/dropshot/blob/78be3deda556a9339ea09f3a9961fd91389f8757/dropshot/examples/file_server.rs#L86-L89 + // + // However, openapi does *not* like that currently, so we limit the endpoint + // to only accepting single-component paths. + let path = vec![path.into_inner().path]; + + for component in &path { + // Dropshot should not provide "." and ".." components. + assert_ne!(component, "."); + assert_ne!(component, ".."); + entry.push(component); + + if entry.exists() { + // We explicitly prohibit consumers from following symlinks to prevent + // showing data outside of the intended directory. + let m = entry.symlink_metadata().map_err(|e| { + HttpError::for_bad_request( + None, + format!("Failed to query file metadata: {}", e), + ) + })?; + if m.file_type().is_symlink() { + return Err(HttpError::for_bad_request( + None, + "Cannot traverse symlinks".to_string(), + )); + } + } + } + + // Note - at this point, "entry" may or may not actually exist. + // We try to avoid creating intermediate artifacts until we know there + // is something "real" to download, as this would let malformed paths + // create defunct intermediate directories. + if entry.is_dir() { + return Err(HttpError::for_bad_request( + None, + "Directory download not supported".to_string(), + )); + } + let body = nexus.download_artifact(&entry).await?; + + // Derive the MIME type from the file name + let content_type = mime_guess::from_path(&entry) + .first() + .map_or_else(|| "text/plain".to_string(), |m| m.to_string()); + + Ok(Response::builder() + .status(StatusCode::OK) + .header(http::header::CONTENT_TYPE, content_type) + .body(body.into())?) +} diff --git a/nexus/src/nexus.rs b/nexus/src/nexus.rs index 83322eac401..79388e53805 100644 --- a/nexus/src/nexus.rs +++ b/nexus/src/nexus.rs @@ -67,6 +67,7 @@ use sled_agent_client::Client as SledAgentClient; use slog::Logger; use std::convert::TryInto; use std::net::SocketAddr; +use std::path::Path; use std::sync::Arc; use std::time::Duration; use steno::SagaId; @@ -107,6 +108,8 @@ pub trait TestInterfaces { ) -> CreateResult; } +pub static BASE_ARTIFACT_DIR: &str = "/var/tmp/oxide_artifacts"; + /** * Manages an Oxide fleet -- the heart of the control plane */ @@ -2375,6 +2378,90 @@ impl Nexus { pub async fn session_hard_delete(&self, token: String) -> DeleteResult { self.db_datastore.session_hard_delete(token).await } + + /// Downloads a file from within [`BASE_ARTIFACT_DIR`]. + pub async fn download_artifact>( + &self, + path: P, + ) -> Result, Error> { + let path = path.as_ref(); + if !path.starts_with(BASE_ARTIFACT_DIR) { + return Err(Error::internal_error( + "Cannot access path outside artifact directory", + )); + } + + if !path.exists() { + info!( + self.log, + "Accessing {} - needs to be downloaded", + path.display() + ); + // If the artifact doesn't exist, we should download it. + // + // TODO: There also exists the question of "when should we *remove* + // things from BASE_ARTIFACT_DIR", which we should also resolve. + // Demo-quality solution could be "destroy it on boot" or something? + // (we aren't doing that yet). + + let file_name = path.strip_prefix(BASE_ARTIFACT_DIR).unwrap(); + match file_name.to_str().unwrap() { + // TODO: iliana if you're reading this, + // 1. I'm sorry + // 2. We should probably do something less bad here + // + // At the moment, the only file we "know" how to download is a + // testfile, which is pulled out of thin air. Realistically, we + // should pull this from the DB + query an external server. + // Happy to delete this as soon as we can. + "testfile" => { + // We should only create the intermediate directories + // after validating that this is a real artifact that + // can (and should) be downloaded. + if let Some(parent) = path.parent() { + tokio::fs::create_dir_all(parent).await.map_err(|e| { + Error::internal_error( + &format!("Failed to create intermediate directory: {}", e) + ) + })?; + } + tokio::fs::write(path, "testfile contents").await.map_err( + |e| { + Error::internal_error(&format!( + "Failed to write file: {}", + e + )) + }, + )?; + } + _ => { + return Err(Error::not_found_other( + ResourceType::DownloadArtifact, + file_name.display().to_string(), + )); + } + } + } else { + info!(self.log, "Accessing {} - already exists", path.display()); + } + + // TODO: These artifacts could be quite large - we should figure out how to + // stream this file back instead of holding it entirely in-memory in a + // Vec. + // + // Options: + // - RFC 7233 - "Range Requests" (is this HTTP/1.1 only?) + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests + // - "Roll our own". See: + // https://stackoverflow.com/questions/20969331/standard-method-for-http-partial-upload-resume-upload + let body = tokio::fs::read(&path).await.map_err(|e| { + Error::internal_error(&format!( + "Cannot read artifact from filesystem: {}", + e + )) + })?; + Ok(body) + } } fn generate_session_token() -> String { diff --git a/nexus/tests/integration_tests/artifact_download.rs b/nexus/tests/integration_tests/artifact_download.rs new file mode 100644 index 00000000000..3b790756580 --- /dev/null +++ b/nexus/tests/integration_tests/artifact_download.rs @@ -0,0 +1,73 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use http::method::Method; +use http::StatusCode; +use nexus_test_utils::test_setup; + +// Tests the "normal" case of downloading an artifact. +// +// This will typically be invoked by the Sled Agent, after instructed +// to access an artifact. +#[tokio::test] +async fn test_download_known_artifact_returns_ok() { + let cptestctx = test_setup("test_download_known_artifact_returns_ok").await; + let client = &cptestctx.internal_client; + + // TODO: Can we replace this with a "real" small file that must be + // downloaded, instead of synthetically created? + let filename = "testfile"; + let artifact_get_url = format!("/artifacts/{}", filename); + + let response = client + .make_request_no_body(Method::GET, &artifact_get_url, StatusCode::OK) + .await + .unwrap(); + + assert_eq!( + hyper::body::to_bytes(response.into_body()).await.unwrap(), + "testfile contents" + ); + cptestctx.teardown().await; +} + +// Tests that missing artifacts return "NOT_FOUND". +#[tokio::test] +async fn test_download_bad_artifact_not_found() { + let cptestctx = test_setup("test_download_bad_artifact_not_found").await; + let client = &cptestctx.internal_client; + + let filename = "not_a_real_artifact"; + let artifact_get_url = format!("/artifacts/{}", filename); + + client + .make_request_error( + Method::GET, + &artifact_get_url, + StatusCode::NOT_FOUND, + ) + .await; + + cptestctx.teardown().await; +} + +// Tests that ".." paths are disallowed by dropshot. +#[tokio::test] +async fn test_download_with_dots_fails() { + let cptestctx = test_setup("test_download_with_dots_fails").await; + let client = &cptestctx.internal_client; + + let filename = "hey/can/you/look/../../../../up/the/directory/tree"; + let artifact_get_url = format!("/artifacts/{}", filename); + + client + .make_request_error( + Method::GET, + &artifact_get_url, + StatusCode::BAD_REQUEST, + ) + .await; + + cptestctx.teardown().await; +} diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 1c581dca5c7..1de7f818c04 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -3,6 +3,7 @@ //! See the driver in the parent directory for how and why this is structured //! the way it is. +mod artifact_download; mod authn_http; mod authz; mod basic; diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 7c300bd12d1..b40e13d1284 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -10,6 +10,24 @@ "version": "0.0.1" }, "paths": { + "/artifacts/{path}": { + "get": { + "description": "Endpoint used by Sled Agents to download cached artifacts.", + "operationId": "cpapi_artifact_download", + "parameters": [ + { + "in": "path", + "name": "path", + "required": true, + "schema": { + "type": "string" + }, + "style": "simple" + } + ], + "responses": {} + } + }, "/disks/{disk_id}": { "put": { "description": "Report updated state for a disk.", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index ec525b2b23c..bff1c530d26 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -87,6 +87,26 @@ } } } + }, + "/update": { + "post": { + "operationId": "update_artifact", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateArtifact" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation" + } + } + } } }, "components": { @@ -611,6 +631,32 @@ "subnet_id", "vpc_id" ] + }, + "UpdateArtifact": { + "type": "object", + "properties": { + "kind": { + "$ref": "#/components/schemas/UpdateArtifactKind" + }, + "name": { + "type": "string" + }, + "version": { + "type": "integer", + "format": "int64" + } + }, + "required": [ + "kind", + "name", + "version" + ] + }, + "UpdateArtifactKind": { + "type": "string", + "enum": [ + "Zone" + ] } } } diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index ea2b2c77299..24d4aa708f1 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -43,6 +43,7 @@ zone = "0.1" [dev-dependencies] expectorate = "1.0.4" +http = "0.2.5" mockall = "0.11" omicron-test-utils = { path = "../test-utils" } openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index d7b433118c9..71d86665122 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -5,13 +5,11 @@ //! HTTP entrypoint functions for the sled agent's exposed API use super::params::DiskEnsureBody; -use dropshot::endpoint; -use dropshot::ApiDescription; -use dropshot::HttpError; -use dropshot::HttpResponseOk; -use dropshot::Path; -use dropshot::RequestContext; -use dropshot::TypedBody; +use super::updates::UpdateArtifact; +use dropshot::{ + endpoint, ApiDescription, HttpError, HttpResponseOk, Path, RequestContext, + TypedBody, +}; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::InstanceRuntimeState; @@ -30,6 +28,7 @@ pub fn api() -> SledApiDescription { fn register_endpoints(api: &mut SledApiDescription) -> Result<(), String> { api.register(instance_put)?; api.register(disk_put)?; + api.register(update_artifact)?; Ok(()) } @@ -93,3 +92,19 @@ async fn disk_put( .map_err(|e| Error::from(e))?, )) } + +#[endpoint { + method = POST, + path = "/update" +}] +async fn update_artifact( + rqctx: Arc>, + artifact: TypedBody, +) -> Result, HttpError> { + let sa = rqctx.context(); + let artifact = artifact.into_inner(); + + Ok(HttpResponseOk( + sa.update_artifact(&artifact).await.map_err(|e| Error::from(e))?, + )) +} diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index c31e977cc8f..f26b879e665 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -28,6 +28,7 @@ mod running_zone; pub mod server; mod sled_agent; mod storage_manager; +mod updates; mod vnic; #[cfg(test)] diff --git a/sled-agent/src/mocks/mod.rs b/sled-agent/src/mocks/mod.rs index 438a83f4553..d7e2a0063c1 100644 --- a/sled-agent/src/mocks/mod.rs +++ b/sled-agent/src/mocks/mod.rs @@ -10,6 +10,7 @@ use nexus_client::types::{ DatasetPutRequest, DatasetPutResponse, InstanceRuntimeState, SledAgentStartupInfo, ZpoolPutRequest, ZpoolPutResponse, }; +use reqwest::Response; use slog::Logger; use uuid::Uuid; @@ -26,6 +27,10 @@ mock! { id: &Uuid, new_runtime_state: &InstanceRuntimeState, ) -> Result<(), Error>; + pub async fn cpapi_artifact_download( + &self, + name: &str + ) -> Result; pub async fn zpool_put( &self, sled_id: &Uuid, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 1ee6680ede7..33e9d88b30a 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -11,6 +11,7 @@ use crate::illumos::zfs::{ use crate::instance_manager::InstanceManager; use crate::params::DiskStateRequested; use crate::storage_manager::StorageManager; +use crate::updates; use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, internal::sled_agent::InstanceHardware, @@ -50,6 +51,9 @@ pub enum Error { #[error("Error managing storage: {0}")] Storage(#[from] crate::storage_manager::Error), + + #[error("Error updating: {0}")] + Download(#[from] crate::updates::Error), } impl From for omicron_common::api::external::Error { @@ -64,6 +68,7 @@ impl From for omicron_common::api::external::Error { /// /// Contains both a connection to the Nexus, as well as managed instances. pub struct SledAgent { + nexus_client: Arc, _storage: StorageManager, instances: InstanceManager, } @@ -122,7 +127,7 @@ impl SledAgent { } let instances = InstanceManager::new(log, vlan, nexus_client.clone())?; - Ok(SledAgent { _storage: storage, instances }) + Ok(SledAgent { nexus_client, _storage: storage, instances }) } /// Idempotently ensures that a given Instance is running on the sled. @@ -150,4 +155,15 @@ impl SledAgent { ) -> Result { todo!("Disk attachment not yet implemented"); } + + /// Downloads and applies an artifact. + pub async fn update_artifact( + &self, + artifact: &updates::UpdateArtifact, + ) -> Result<(), Error> { + artifact + .download(self.nexus_client.as_ref()) + .await + .map_err(|e| Error::Download(e)) + } } diff --git a/sled-agent/src/updates.rs b/sled-agent/src/updates.rs new file mode 100644 index 00000000000..e21cc070412 --- /dev/null +++ b/sled-agent/src/updates.rs @@ -0,0 +1,136 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Management of per-sled updates + +use schemars::JsonSchema; +use serde::Deserialize; +use std::path::Path; + +#[cfg(test)] +use crate::mocks::MockNexusClient as NexusClient; +#[cfg(not(test))] +use nexus_client::Client as NexusClient; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("I/O Error: {0}")] + Io(#[from] std::io::Error), + + #[error("Failed to contact nexus: {0}")] + Nexus(anyhow::Error), + + #[error("Failed to read response from Nexus: {0}")] + Response(reqwest::Error), +} + +// TODO: De-duplicate this struct with the one in iliana's PR? +// +// This should likely be a wrapper around that type. +#[derive(Clone, Debug, Deserialize, JsonSchema)] +pub struct UpdateArtifact { + pub name: String, + pub version: i64, + pub kind: UpdateArtifactKind, +} + +// TODO: De-dup me too. +#[derive(Clone, Debug, Deserialize, JsonSchema)] +pub enum UpdateArtifactKind { + Zone, +} + +impl UpdateArtifact { + fn artifact_directory(&self) -> &'static Path { + match self.kind { + UpdateArtifactKind::Zone => Path::new("/var/tmp/zones"), + } + } + + /// Downloads an update artifact. + /// + /// The artifact is eventually stored in the path: + /// / + /// + /// Such as: + /// /var/tmp/zones/myzone + /// + /// While being downloaded, it is stored in a path also containing the + /// version: + /// / - + /// + /// Such as: + /// /var/tmp/zones/myzone-3 + pub async fn download(&self, nexus: &NexusClient) -> Result<(), Error> { + let file_name = format!("{}-{}", self.name, self.version); + let response = nexus + .cpapi_artifact_download(&file_name) + .await + .map_err(|e| Error::Nexus(e))?; + + let mut path = self.artifact_directory().to_path_buf(); + tokio::fs::create_dir_all(&path).await?; + + // We download the file to a location named "-". + // We then rename it to "" after it has successfully + // downloaded, to signify that it is ready for usage. + let mut tmp_path = path.clone(); + tmp_path.push(file_name); + path.push(&self.name); + + // Write the file in its entirety, replacing it if it exists. + // TODO: Would love to stream this instead. + let contents = + response.bytes().await.map_err(|e| Error::Response(e))?; + tokio::fs::write(&tmp_path, contents).await?; + tokio::fs::rename(&tmp_path, &path).await?; + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::mocks::MockNexusClient; + use http::{Response, StatusCode}; + + #[tokio::test] + #[serial_test::serial] + async fn test_write_artifact_to_filesystem() { + // The (completely fabricated) artifact we'd like to download. + let expected_name = "test_artifact"; + let expected_contents = "test_artifact contents"; + let artifact = UpdateArtifact { + name: expected_name.to_string(), + version: 3, + kind: UpdateArtifactKind::Zone, + }; + let expected_path = artifact.artifact_directory().join(expected_name); + + // Remove the file if it already exists. + let _ = tokio::fs::remove_file(&expected_path).await; + + // Let's pretend this is an artifact Nexus can actually give us. + let mut nexus_client = MockNexusClient::default(); + nexus_client.expect_cpapi_artifact_download().times(1).return_once( + move |name| { + assert_eq!(name, "test_artifact-3"); + let response = Response::builder() + .status(StatusCode::OK) + .body(expected_contents) + .unwrap(); + Ok(response.into()) + }, + ); + + // This should download the file to our local filesystem. + artifact.download(&nexus_client).await.unwrap(); + + // Confirm the download succeeded. + assert!(expected_path.exists()); + let contents = tokio::fs::read(&expected_path).await.unwrap(); + assert_eq!(std::str::from_utf8(&contents).unwrap(), expected_contents); + } +}