diff --git a/.gitmodules b/.gitmodules index fef8932cfba..2b2b66abf36 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,10 +2,6 @@ path = deps/rust-sgx-sdk url = https://github.com/apache/incubator-teaclave-sgx-sdk branch = master -[submodule "deps/xmas-elf"] - path = deps/xmas-elf - url = https://github.com/occlum/xmas-elf - branch = master [submodule "deps/sefs"] path = deps/sefs url = https://github.com/occlum/sefs.git @@ -26,3 +22,7 @@ [submodule "deps/ringbuf"] path = deps/ringbuf url = https://github.com/agerasev/ringbuf.git +[submodule "deps/goblin"] + path = deps/goblin + url = https://github.com/jessehui/goblin.git + branch = 0.3.0-for-occlum diff --git a/deps/goblin b/deps/goblin new file mode 160000 index 00000000000..26c9e42279a --- /dev/null +++ b/deps/goblin @@ -0,0 +1 @@ +Subproject commit 26c9e42279a71db64f1e4fbea67f1556844bdc05 diff --git a/deps/sefs b/deps/sefs index f9492a82f54..5f3a66904ae 160000 --- a/deps/sefs +++ b/deps/sefs @@ -1 +1 @@ -Subproject commit f9492a82f5453a4cfaeeebe0fbd20e65e96ef8f4 +Subproject commit 5f3a66904ae24c1b021b4425775a990bd7dc2732 diff --git a/deps/xmas-elf b/deps/xmas-elf deleted file mode 160000 index 792105500ef..00000000000 --- a/deps/xmas-elf +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 792105500ef80bdfe6f45ca6f64865121e4699c5 diff --git a/src/libos/Cargo.lock b/src/libos/Cargo.lock index e1cecd952b1..b26d257ce1d 100644 --- a/src/libos/Cargo.lock +++ b/src/libos/Cargo.lock @@ -9,6 +9,7 @@ dependencies = [ "bitflags", "bitvec", "derive_builder", + "goblin", "lazy_static", "log", "memoffset", @@ -19,6 +20,7 @@ dependencies = [ "rcore-fs-sefs", "rcore-fs-unionfs", "ringbuf", + "scroll", "serde", "serde_json", "sgx_cov", @@ -27,7 +29,6 @@ dependencies = [ "sgx_tse", "sgx_tstd", "sgx_types", - "xmas-elf", ] [[package]] @@ -204,6 +205,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "goblin" +version = "0.3.0" +dependencies = [ + "log", + "plain", + "scroll", +] + [[package]] name = "hashbrown_tstd" version = "0.9.0" @@ -254,6 +264,12 @@ dependencies = [ "autocfg 1.0.1", ] +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + [[package]] name = "proc-macro2" version = "1.0.19" @@ -466,6 +482,26 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +[[package]] +name = "scroll" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fda28d4b4830b807a8b43f7b0e6b5df875311b3e7621d84577188c175b6ec1ec" +dependencies = [ + "scroll_derive", +] + +[[package]] +name = "scroll_derive" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b12bd20b94c7cdfda8c7ba9b92ad0d9a56e3fa018c25fca83b51aa664c9b4c0d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde" version = "1.0.104" @@ -517,7 +553,6 @@ dependencies = [ "lazy_static", "profiler_builtins", "sgx_rand", - "sgx_trts", "sgx_tstd", "sgx_types", ] @@ -675,16 +710,3 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "xmas-elf" -version = "0.6.2" -dependencies = [ - "zero", -] - -[[package]] -name = "zero" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f1bc8a6b2005884962297587045002d8cfb8dcec9db332f4ca216ddc5de82c5" diff --git a/src/libos/Cargo.toml b/src/libos/Cargo.toml index 88e27bccd52..a42645577c4 100644 --- a/src/libos/Cargo.toml +++ b/src/libos/Cargo.toml @@ -25,6 +25,7 @@ rcore-fs-devfs = { path = "../../deps/sefs/rcore-fs-devfs" } serde = { path = "../../deps/serde-sgx/serde", features = ["derive"] } serde_json = { path = "../../deps/serde-json-sgx" } memoffset = "0.6.1" +scroll = { version = "0.10.2", default-features = false } [patch.'https://github.com/apache/teaclave-sgx-sdk.git'] sgx_tstd = { path = "../../deps/rust-sgx-sdk/sgx_tstd" } @@ -39,10 +40,10 @@ dcap = [] # DCAP support. The compilation relies on DCAP package. cov = ["sgx_cov"] # Enable coverage colletcion. [target.'cfg(not(target_env = "sgx"))'.dependencies] -xmas-elf = { path = "../../deps/xmas-elf" } sgx_types = { path = "../../deps/rust-sgx-sdk/sgx_types" } sgx_tstd = { path = "../../deps/rust-sgx-sdk/sgx_tstd", features = ["backtrace"] } sgx_trts = { path = "../../deps/rust-sgx-sdk/sgx_trts" } sgx_tse = { path = "../../deps/rust-sgx-sdk/sgx_tse" } sgx_tcrypto = { path = "../../deps/rust-sgx-sdk/sgx_tcrypto" } sgx_cov = { path = "../../deps/rust-sgx-sdk/sgx_cov", optional = true } +goblin = { path = "../../deps/goblin", default-features = false, features = ["elf64", "elf32", "endian_fd"] } diff --git a/src/libos/src/lib.rs b/src/libos/src/lib.rs index 5c42189bb84..f25891144ce 100644 --- a/src/libos/src/lib.rs +++ b/src/libos/src/lib.rs @@ -29,10 +29,11 @@ extern crate sgx_types; #[cfg(not(target_env = "sgx"))] #[macro_use] extern crate sgx_tstd as std; +extern crate goblin; +extern crate scroll; extern crate sgx_tcrypto; extern crate sgx_trts; extern crate sgx_tse; -extern crate xmas_elf; #[macro_use] extern crate lazy_static; #[macro_use] diff --git a/src/libos/src/process/do_spawn/exec_loader.rs b/src/libos/src/process/do_spawn/exec_loader.rs index 0bae60a5a63..303c58b37b3 100644 --- a/src/libos/src/process/do_spawn/exec_loader.rs +++ b/src/libos/src/process/do_spawn/exec_loader.rs @@ -1,21 +1,27 @@ +use super::super::elf_file::*; use super::ThreadRef; use crate::fs::{FileMode, INodeExt}; use crate::prelude::*; use rcore_fs::vfs::INode; use std::ffi::CString; -/// Load an ELF file itself or a script's interpreter into a vector. +/// Load an ELF file header or a script's interpreter header into a vector. /// -/// If the file is an executable binary, then just load this file. +/// If the file is an executable binary, then just load this file's header. /// If the file is an script text, then parse the shebang and load -/// the interpreter. -pub fn load_exec_file_to_vec( +/// the interpreter header. +pub fn load_exec_file_hdr_to_vec( file_path: &str, current_ref: &ThreadRef, -) -> Result<(Option, Vec)> { - let file_buf = load_file_to_vec(&file_path, current_ref)?; - let is_script = is_script_file(&file_buf); - if is_script { +) -> Result<(Option, Arc, Vec, ElfHeader)> { + let (inode, file_buf, elf_hdr) = load_file_hdr_to_vec(&file_path, current_ref)?; + if elf_hdr.is_some() { + Ok((None, inode, file_buf, elf_hdr.unwrap())) + } else { + // loaded file is not Elf format, try script file + if !is_script_file(&file_buf) { + return_errno!(ENOEXEC, "unknown executable file format"); + } // load interpreter let interpreter_path = parse_script_interpreter(&file_buf)?; if interpreter_path.starts_with("/host/") { @@ -24,10 +30,14 @@ pub fn load_exec_file_to_vec( "libos doesn't support executing binaries from \"/host\" directory" ); } - let elf_buf = load_file_to_vec(&interpreter_path, current_ref)?; - Ok((Some(interpreter_path), elf_buf)) - } else { - Ok((None, file_buf)) + let (interp_inode, interp_buf, interp_hdr) = + load_file_hdr_to_vec(&interpreter_path, current_ref)?; + let interp_hdr = if interp_hdr.is_none() { + return_errno!(ENOEXEC, "scrip interpreter is not ELF format"); + } else { + interp_hdr.unwrap() + }; + Ok((Some(interpreter_path), interp_inode, interp_buf, interp_hdr)) } } @@ -59,7 +69,10 @@ fn parse_script_interpreter(file_buf: &Vec) -> Result { Ok(interpreter.to_owned()) } -pub fn load_file_to_vec(file_path: &str, current_ref: &ThreadRef) -> Result> { +pub fn load_file_hdr_to_vec( + file_path: &str, + current_ref: &ThreadRef, +) -> Result<(Arc, Vec, Option)> { let inode = current_ref .fs() .lock() @@ -79,7 +92,16 @@ pub fn load_file_to_vec(file_path: &str, current_ref: &ThreadRef) -> Result Result { let mut argv = argv.clone().to_vec(); - let (is_script, elf_buf) = load_exec_file_to_vec(file_path, current_ref)?; + let (is_script, elf_inode, mut elf_buf, elf_header) = + load_exec_file_hdr_to_vec(file_path, current_ref)?; // elf_path might be different from file_path because file_path could lead to a script text file. // And intepreter will be the loaded ELF. @@ -117,30 +118,28 @@ fn new_process( file_path.to_string() }; - let exec_elf_file = - ElfFile::new(&elf_buf).cause_err(|e| errno!(e.errno(), "invalid executable"))?; - // Get the ldso_path of the executable - let exec_interp_segment = exec_elf_file - .program_headers() - .find(|segment| segment.is_interpreter()) + let exec_elf_hdr = ElfFile::new(&elf_inode, &mut elf_buf, elf_header) + .cause_err(|e| errno!(e.errno(), "invalid executable"))?; + let ldso_path = exec_elf_hdr + .elf_interpreter() .ok_or_else(|| errno!(EINVAL, "cannot find the interpreter segment"))?; - let ldso_path = match exec_interp_segment.get_content(&exec_elf_file) { - SegmentData::Undefined(bytes) => std::ffi::CStr::from_bytes_with_nul(bytes) - .unwrap() - .to_str() - .unwrap(), - _ => return_errno!(EINVAL, "cannot get ldso_path from executable"), + trace!("ldso_path = {:?}", ldso_path); + let (ldso_inode, mut ldso_elf_hdr_buf, ldso_elf_header) = + load_file_hdr_to_vec(ldso_path, current_ref) + .cause_err(|e| errno!(e.errno(), "cannot load ld.so"))?; + let ldso_elf_header = if ldso_elf_header.is_none() { + return_errno!(ENOEXEC, "ldso header is not ELF format"); + } else { + ldso_elf_header.unwrap() }; - let ldso_elf_buf = load_file_to_vec(ldso_path, current_ref) - .cause_err(|e| errno!(e.errno(), "cannot load ld.so"))?; - let ldso_elf_file = - ElfFile::new(&ldso_elf_buf).cause_err(|e| errno!(e.errno(), "invalid ld.so"))?; + let ldso_elf_hdr = ElfFile::new(&ldso_inode, &mut ldso_elf_hdr_buf, ldso_elf_header) + .cause_err(|e| errno!(e.errno(), "invalid ld.so"))?; let new_process_ref = { let process_ref = current_ref.process().clone(); - let vm = init_vm::do_init(&exec_elf_file, &ldso_elf_file)?; - let mut auxvec = init_auxvec(&vm, &exec_elf_file)?; + let vm = init_vm::do_init(&exec_elf_hdr, &ldso_elf_hdr)?; + let mut auxvec = init_auxvec(&vm, &exec_elf_hdr)?; // Notify debugger to load the symbols from elf file let ldso_elf_base = vm.get_elf_ranges()[1].start() as u64; @@ -163,8 +162,7 @@ fn new_process( let task = { let ldso_entry = { let ldso_range = vm.get_elf_ranges()[1]; - let ldso_entry = - ldso_range.start() + ldso_elf_file.elf_header().entry_point() as usize; + let ldso_entry = ldso_range.start() + ldso_elf_hdr.elf_header().e_entry as usize; if !ldso_range.contains(ldso_entry) { return_errno!(EINVAL, "Invalid program entry"); } @@ -309,13 +307,10 @@ fn init_auxvec(process_vm: &ProcessVM, exec_elf_file: &ElfFile) -> Result { elf_buf: &'a [u8], - elf_inner: xmas_elf::ElfFile<'a>, + elf_inner: Elf<'a>, + file_inode: &'a Arc, +} + +impl<'a> Debug for ElfFile<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "ElfFile {{ inode: ???, elf_buf: {:?}, elf_inner: {:?} }}", + self.elf_buf, self.elf_inner, + ) + } } impl<'a> ElfFile<'a> { - pub fn new(elf_buf: &'a [u8]) -> Result { - let elf_inner = - xmas_elf::ElfFile::new(elf_buf).map_err(|e| errno!(ENOEXEC, "invalid ELF header"))?; - Self::validate(&elf_inner)?; + pub fn new( + file_inode: &'a Arc, + mut elf_buf: &'a mut [u8], + header: ElfHeader, + ) -> Result> { + let ctx = Ctx { + le: scroll::Endian::Little, + container: Container::Big, + }; + let program_headers = ProgramHeader::parse( + elf_buf, + header.e_phoff as usize, + header.e_phnum as usize, + ctx, + ) + .map_err(|e| errno!(ENOEXEC, "invalid program headers"))?; - Ok(ElfFile { elf_buf, elf_inner }) + // read interpreter path + let mut count = 0; + let mut offset = 0; + let mut intepreter_count = 0; + let mut intepreter_offset = 0; + for ph in &program_headers { + ph.validate()?; + if ph.p_type == program_header::PT_INTERP && ph.p_filesz != 0 { + intepreter_count = ph.p_filesz as usize; + intepreter_offset = ph.p_offset as usize; + trace!( + "PT_INTERP offset = {}, count = {}", + intepreter_offset, + intepreter_count + ); + file_inode.read_at( + intepreter_offset, + &mut elf_buf[intepreter_offset..intepreter_offset + intepreter_count], + ); + break; + } + } + + let interpreter = if intepreter_count == 0 { + None + } else { + let cstr: &CStr = CStr::from_bytes_with_nul( + &elf_buf[intepreter_offset..intepreter_offset + intepreter_count], + ) + .map_err(|e| errno!(ENOEXEC, "invalid interpreter path"))?; + cstr.to_str().ok() + }; + trace!("interpreter = {:?}", interpreter); + let elf_inner = goblin::elf::Elf::lazy_parse(header, program_headers, interpreter) + .map_err(|e| errno!(ENOEXEC, "invalid ELF format"))?; + Ok(ElfFile { + elf_buf, + elf_inner, + file_inode, + }) } - pub fn program_headers<'b>(&'b self) -> ProgramIter<'b, 'a> { - self.elf_inner.program_iter() + pub fn program_headers<'b>(&'b self) -> impl Iterator { + self.elf_inner.program_headers.iter() } pub fn elf_header(&self) -> &ElfHeader { - &self.elf_inner.header.pt2 + &self.elf_inner.header + } + + pub fn elf_interpreter(&self) -> Option<&'a str> { + self.elf_inner.interpreter } pub fn as_slice(&self) -> &[u8] { self.elf_buf } - fn validate(elf_inner: &xmas_elf::ElfFile) -> Result<()> { - // Validate the ELF header - xmas_elf::header::sanity_check(elf_inner) - .map_err(|e| errno!(ENOEXEC, "invalid ELF header"))?; - // Validate ELF type - if elf_inner.header.pt2.type_().as_type() != xmas_elf::header::Type::SharedObject { + pub fn file_inode(&self) -> &Arc { + self.file_inode + } + + pub fn parse_elf_hdr(inode: &Arc, elf_buf: &mut Vec) -> Result { + // TODO: Sanity check the number of program headers.. + let mut phdr_start = 0; + let mut phdr_end = 0; + + let hdr_size = ELF64_HDR_SIZE; + let elf_hdr = + Elf::parse_elf_hdr(&elf_buf).map_err(|e| errno!(ENOEXEC, "invalid ELF header"))?; + + // executables built with -fPIE are type ET_DYN (shared object file) + if elf_hdr.e_type != ET_DYN { return_errno!(ENOEXEC, "ELF is not position-independent"); } - // Validate the segments - for segment in elf_inner.program_iter() { - segment.validate()?; + + if elf_hdr.e_phnum == 0 { + return_errno!(ENOEXEC, "ELF doesn't have any program segments"); } - Ok(()) + + let program_hdr_table_size = elf_hdr.e_phnum * elf_hdr.e_phentsize; + inode.read_at( + elf_hdr.e_phoff as usize, + &mut elf_buf[hdr_size..hdr_size + (program_hdr_table_size as usize)], + )?; + Ok(elf_hdr) } } @@ -53,43 +139,33 @@ pub trait ProgramHeaderExt<'a> { fn loadable(&self) -> bool; fn is_interpreter(&self) -> bool; fn validate(&self) -> Result<()>; - fn get_content(&self, elf_file: &ElfFile<'a>) -> SegmentData<'a>; + fn get_content(&self, elf_file: &ElfFile<'a>) -> &'a [u8]; } -impl<'a> ProgramHeaderExt<'a> for ProgramHeader<'a> { +impl<'a> ProgramHeaderExt<'a> for ProgramHeader { /// Is the segment loadable? fn loadable(&self) -> bool { - let type_ = self.get_type().unwrap(); - type_ == xmas_elf::program::Type::Load + let type_ = self.p_type; + type_ == goblin::elf::program_header::PT_LOAD } fn is_interpreter(&self) -> bool { - let type_ = self.get_type().unwrap(); - type_ == xmas_elf::program::Type::Interp + let type_ = self.p_type; + type_ == goblin::elf::program_header::PT_INTERP } - fn get_content(&self, elf_file: &ElfFile<'a>) -> SegmentData<'a> { - self.get_data(&elf_file.elf_inner).unwrap() + fn get_content(&self, elf_file: &ElfFile<'a>) -> &'a [u8] { + let file_range = self.file_range(); + &elf_file.elf_buf[file_range.start..file_range.end] } /// Do some basic sanity checks in case the ELF is corrupted somehow fn validate(&self) -> Result<()> { - let ph64 = match self { - ProgramHeader::Ph32(ph) => { - return_errno!(ENOEXEC, "not support 32-bit ELF"); - } - ProgramHeader::Ph64(ph64) => ph64, - }; - if !ph64.align.is_power_of_two() { + if !self.p_align.is_power_of_two() { return_errno!(EINVAL, "invalid memory alignment"); } - if (ph64.offset % ph64.align) != (ph64.virtual_addr % ph64.align) { - return_errno!( - EINVAL, - "memory address and file offset is not equal, per modulo" - ); - } - if ph64.mem_size < ph64.file_size { + + if self.p_memsz < self.p_filesz { return_errno!(EINVAL, "memory size must be no less than file size"); } Ok(()) diff --git a/src/libos/src/vm/process_vm.rs b/src/libos/src/vm/process_vm.rs index 6563fa7c4f8..dd8591e3da3 100644 --- a/src/libos/src/vm/process_vm.rs +++ b/src/libos/src/vm/process_vm.rs @@ -65,8 +65,8 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> { elf.program_headers() .filter(|segment| segment.loadable()) .fold(VMLayout::new_empty(), |mut elf_layout, segment| { - let segment_size = (segment.virtual_addr() + segment.mem_size()) as usize; - let segment_align = segment.align() as usize; + let segment_size = (segment.p_vaddr + segment.p_memsz) as usize; + let segment_align = segment.p_align as usize; let segment_layout = VMLayout::new(segment_size, segment_align).unwrap(); elf_layout.extend(&segment_layout); elf_layout @@ -172,10 +172,10 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> { fn init_elf_memory(elf_range: &VMRange, elf_file: &ElfFile) -> Result<()> { // Destination buffer: ELF appeared in the process let elf_proc_buf = unsafe { elf_range.as_slice_mut() }; - // Set zero for the buffer - for b in &mut elf_proc_buf[..] { - *b = 0; - } + let mut empty_offset_vec: Vec<(usize, usize)> = Vec::with_capacity(3); // usally two loadable segments + let mut empty_start_offset = 0; + let mut empty_end_offset = 0; + // Source buffer: ELF stored in the ELF file let elf_file_buf = elf_file.as_slice(); // Init all loadable segements @@ -183,17 +183,35 @@ impl<'a, 'b> ProcessVMBuilder<'a, 'b> { .program_headers() .filter(|segment| segment.loadable()) .for_each(|segment| { - let file_size = segment.file_size() as usize; - let file_offset = segment.offset() as usize; - let mem_addr = segment.virtual_addr() as usize; - let mem_size = segment.mem_size() as usize; + let file_size = segment.p_filesz as usize; + let file_offset = segment.p_offset as usize; + let mem_addr = segment.p_vaddr as usize; + let mem_size = segment.p_memsz as usize; debug_assert!(file_size <= mem_size); // The first file_size bytes are loaded from the ELF file, // the remaining (mem_size - file_size) bytes are zeros. - elf_proc_buf[mem_addr..mem_addr + file_size] - .copy_from_slice(&elf_file_buf[file_offset..file_offset + file_size]); + elf_file.file_inode().read_at( + file_offset, + &mut elf_proc_buf[mem_addr..mem_addr + file_size], + ); + + empty_end_offset = mem_addr; + empty_offset_vec.push((empty_start_offset, empty_end_offset)); + empty_start_offset = empty_end_offset + file_size; + }); + + empty_offset_vec.push((empty_start_offset, elf_proc_buf.len() - 1)); + + // Set zero for the remain part of the buffer + empty_offset_vec + .iter() + .for_each(|(start_offset, end_offset)| { + for b in &mut elf_proc_buf[*start_offset..*end_offset] { + *b = 0; + } }); + Ok(()) } }