Skip to content

Commit

Permalink
strtab: Parse Strtab only once on Creation (#275)
Browse files Browse the repository at this point in the history
* Parse Strtab immediately when created,
instead of on demand when entries are requested.
* Renames from_slice_unsafe => from_slice_unparsed
and updates comments.
  • Loading branch information
Lichtso committed Jul 2, 2021
1 parent ef33a75 commit 8febb72
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 69 deletions.
8 changes: 4 additions & 4 deletions src/archive/mod.rs
Expand Up @@ -282,8 +282,8 @@ impl<'a> Index<'a> {
let string_offset: u32 = buffer.pread_with(i * 8 + 4, scroll::LE)?;
let archive_member: u32 = buffer.pread_with(i * 8 + 8, scroll::LE)?;

let string = match strtab.get(string_offset as usize) {
Some(result) => result,
let string = match strtab.get_at(string_offset as usize) {
Some(result) => Ok(result),
None => Err(Error::Malformed(format!(
"{} entry {} has string offset {}, which is out of bounds",
BSD_SYMDEF_NAME, i, string_offset
Expand Down Expand Up @@ -353,8 +353,8 @@ impl<'a> NameIndex<'a> {
let idx = name.trim_start_matches('/').trim_end();
match usize::from_str_radix(idx, 10) {
Ok(idx) => {
let name = match self.strtab.get(idx + 1) {
Some(result) => result,
let name = match self.strtab.get_at(idx + 1) {
Some(result) => Ok(result),
None => Err(Error::Malformed(format!(
"Name {} is out of range in archive NameIndex",
name
Expand Down
2 changes: 1 addition & 1 deletion src/elf/dynamic.rs
Expand Up @@ -453,7 +453,7 @@ if_alloc! {
let mut needed = Vec::with_capacity(count);
for dynamic in &self.dyns {
if dynamic.d_tag as u64 == DT_NEEDED {
if let Some(Ok(lib)) = strtab.get(dynamic.d_val as usize) {
if let Some(lib) = strtab.get_at(dynamic.d_val as usize) {
needed.push(lib)
} else {
warn!("Invalid DT_NEEDED {}", dynamic.d_val)
Expand Down
6 changes: 2 additions & 4 deletions src/elf/mod.rs
Expand Up @@ -174,9 +174,7 @@ if_sylvan! {
continue;
}

if section_name.is_some() && !self.shdr_strtab
.get(sect.sh_name)
.map_or(false, |r| r.ok() == section_name) {
if section_name.is_some() && self.shdr_strtab.get_at(sect.sh_name) != section_name {
continue;
}

Expand Down Expand Up @@ -298,7 +296,7 @@ if_sylvan! {

if dyn_info.soname != 0 {
// FIXME: warn! here
soname = match dynstrtab.get(dyn_info.soname) { Some(Ok(soname)) => Some(soname), _ => None };
soname = dynstrtab.get_at(dyn_info.soname);
}
if dyn_info.needed_count > 0 {
libraries = dynamic.get_libraries(&dynstrtab);
Expand Down
7 changes: 2 additions & 5 deletions src/pe/symbol.rs
Expand Up @@ -218,11 +218,8 @@ impl Symbol {
/// a strtab entry.
pub fn name<'a>(&'a self, strtab: &'a strtab::Strtab) -> error::Result<&'a str> {
if let Some(offset) = self.name_offset() {
strtab.get(offset as usize).unwrap_or_else(|| {
Err(error::Error::Malformed(format!(
"Invalid Symbol name offset {:#x}",
offset
)))
strtab.get_at(offset as usize).ok_or_else(|| {
error::Error::Malformed(format!("Invalid Symbol name offset {:#x}", offset))
})
} else {
Ok(self.name.pread(0)?)
Expand Down
176 changes: 122 additions & 54 deletions src/strtab.rs
Expand Up @@ -3,7 +3,6 @@

use core::fmt;
use core::ops::Index;
use core::slice;
use core::str;
use scroll::{ctx, Pread};
if_alloc! {
Expand All @@ -15,8 +14,10 @@ if_alloc! {
/// member index). Constructed using [`parse`](#method.parse)
/// with your choice of delimiter. Please be careful.
pub struct Strtab<'a> {
bytes: &'a [u8],
delim: ctx::StrCtx,
bytes: &'a [u8],
#[cfg(feature = "alloc")]
strings: Vec<(usize, &'a str)>,
}

#[inline(always)]
Expand All @@ -25,31 +26,34 @@ fn get_str(offset: usize, bytes: &[u8], delim: ctx::StrCtx) -> scroll::Result<&s
}

impl<'a> Strtab<'a> {
/// Construct a new strtab with `bytes` as the backing string table, using `delim` as the delimiter between entries
pub fn new(bytes: &'a [u8], delim: u8) -> Self {
Strtab {
/// Creates a `Strtab` directly without bounds check and without parsing it.
///
/// This is potentially unsafe and should only be used if `feature = "alloc"` is disabled.
pub fn from_slice_unparsed(bytes: &'a [u8], offset: usize, len: usize, delim: u8) -> Self {
Self {
delim: ctx::StrCtx::Delimiter(delim),
bytes,
bytes: &bytes[offset..offset + len],
#[cfg(feature = "alloc")]
strings: Vec::new(),
}
}
/// Construct a strtab from a `ptr`, and a `size`, using `delim` as the delimiter
/// # Safety
/// Gets a str reference from the backing bytes starting at byte `offset`.
///
/// This function creates a `Strtab` directly from a raw pointer and size
pub unsafe fn from_raw(ptr: *const u8, size: usize, delim: u8) -> Strtab<'a> {
Strtab {
delim: ctx::StrCtx::Delimiter(delim),
bytes: slice::from_raw_parts(ptr, size),
/// If the index is out of bounds, `None` is returned. Panics if bytes are invalid UTF-8.
/// Use this method if the `Strtab` was created using `from_slice_unparsed()`.
pub fn get_unsafe(&self, offset: usize) -> Option<&'a str> {
if offset >= self.bytes.len() {
None
} else {
Some(get_str(offset, self.bytes, self.delim).unwrap())
}
}
#[cfg(feature = "alloc")]
/// Parses a strtab from `bytes` at `offset` with `len` size as the backing string table, using `delim` as the delimiter
pub fn parse(
bytes: &'a [u8],
offset: usize,
len: usize,
delim: u8,
) -> error::Result<Strtab<'a>> {
/// Parses a `Strtab` from `bytes` at `offset` with `len` size as the backing string table, using `delim` as the delimiter.
///
/// Errors if bytes are invalid UTF-8.
/// Requires `feature = "alloc"`
pub fn parse(bytes: &'a [u8], offset: usize, len: usize, delim: u8) -> error::Result<Self> {
let (end, overflow) = offset.overflowing_add(len);
if overflow || end > bytes.len() {
return Err(error::Error::Malformed(format!(
Expand All @@ -60,44 +64,81 @@ impl<'a> Strtab<'a> {
overflow
)));
}
Ok(Strtab {
bytes: &bytes[offset..end],
delim: ctx::StrCtx::Delimiter(delim),
})
let mut result = Self::from_slice_unparsed(bytes, offset, len, delim);
let mut i = 0;
while i < result.bytes.len() {
let string = get_str(i, result.bytes, result.delim)?;
result.strings.push((i, string));
i += string.len() + 1;
}
Ok(result)
}
#[cfg(feature = "alloc")]
/// Parses a `Strtab` with `bytes` as the backing string table, using `delim` as the delimiter between entries.
///
/// Requires `feature = "alloc"`
pub fn new(bytes: &'a [u8], delim: u8) -> error::Result<Self> {
Self::parse(bytes, 0, bytes.len(), delim)
}
#[cfg(feature = "alloc")]
/// Converts the string table to a vector, with the original `delim` used to separate the strings
/// Converts the string table to a vector of parsed strings.
///
/// Requires `feature = "alloc"`
pub fn to_vec(&self) -> error::Result<Vec<&'a str>> {
let len = self.bytes.len();
let mut strings = Vec::with_capacity(len);
let mut i = 0;
while i < len {
let string = self.get(i).unwrap()?;
i = i + string.len() + 1;
strings.push(string);
// Fallback in case `Strtab` was created using `from_slice_unparsed()`.
if self.strings.is_empty() {
let mut result = Vec::new();
let mut i = 0;
while i < self.bytes.len() {
let string = get_str(i, self.bytes, self.delim)?;
result.push(string);
i += string.len() + 1;
}
return Ok(result);
}
Ok(strings)
Ok(self.strings.iter().map(|&(_key, value)| value).collect())
}
/// Safely parses and gets a str reference from the backing bytes starting at byte `offset`.
#[cfg(feature = "alloc")]
/// Safely gets a str reference from the parsed table starting at byte `offset`.
///
/// If the index is out of bounds, `None` is returned.
/// Requires `feature = "alloc"`
pub fn get_at(&self, offset: usize) -> Option<&'a str> {
match self
.strings
.binary_search_by_key(&offset, |&(key, _value)| key)
{
Ok(index) => Some(self.strings[index].1),
Err(index) => {
if index == 0 {
return None;
}
let (string_begin_offset, entire_string) = self.strings[index - 1];
entire_string.get(offset - string_begin_offset..)
}
}
}
#[deprecated(since = "0.4.2", note = "Use from_slice_unparsed() instead")]
/// Construct a strtab from a `ptr`, and a `size`, using `delim` as the delimiter
///
/// # Safety
/// This function creates a `Strtab` directly from a raw pointer and size
pub unsafe fn from_raw(ptr: *const u8, len: usize, delim: u8) -> Strtab<'a> {
Self::from_slice_unparsed(core::slice::from_raw_parts(ptr, len), 0, len, delim)
}
#[deprecated(since = "0.4.2", note = "Bad performance, use get_at() instead")]
#[cfg(feature = "alloc")]
/// Parses a str reference from the parsed table starting at byte `offset`.
///
/// If the index is out of bounds, `None` is returned.
/// Requires `feature = "alloc"`
pub fn get(&self, offset: usize) -> Option<error::Result<&'a str>> {
if offset >= self.bytes.len() {
None
} else {
Some(get_str(offset, self.bytes, self.delim).map_err(core::convert::Into::into))
}
}
/// Gets a str reference from the backing bytes starting at byte `offset`.
/// If the index is out of bounds, `None` is returned. Panics if bytes are invalid UTF-8.
pub fn get_unsafe(&self, offset: usize) -> Option<&'a str> {
if offset >= self.bytes.len() {
None
} else {
Some(get_str(offset, self.bytes, self.delim).unwrap())
}
}
}

impl<'a> fmt::Debug for Strtab<'a> {
Expand All @@ -110,10 +151,12 @@ impl<'a> fmt::Debug for Strtab<'a> {
}

impl<'a> Default for Strtab<'a> {
fn default() -> Strtab<'a> {
Strtab {
bytes: &[],
fn default() -> Self {
Self {
delim: ctx::StrCtx::default(),
bytes: &[],
#[cfg(feature = "alloc")]
strings: Vec::new(),
}
}
}
Expand All @@ -133,36 +176,61 @@ impl<'a> Index<usize> for Strtab<'a> {

#[test]
fn as_vec_no_final_null() {
let bytes = b"\0printf\0memmove\0busta";
let strtab = unsafe { Strtab::from_raw(bytes.as_ptr(), bytes.len(), 0x0) };
let strtab = Strtab::new(b"\0printf\0memmove\0busta", 0x0).unwrap();
let vec = strtab.to_vec().unwrap();
assert_eq!(vec.len(), 4);
assert_eq!(vec, vec!["", "printf", "memmove", "busta"]);
}

#[test]
fn as_vec_no_first_null_no_final_null() {
let bytes = b"printf\0memmove\0busta";
let strtab = unsafe { Strtab::from_raw(bytes.as_ptr(), bytes.len(), 0x0) };
let strtab = Strtab::new(b"printf\0memmove\0busta", 0x0).unwrap();
let vec = strtab.to_vec().unwrap();
assert_eq!(vec.len(), 3);
assert_eq!(vec, vec!["printf", "memmove", "busta"]);
}

#[test]
fn to_vec_final_null() {
let bytes = b"\0printf\0memmove\0busta\0";
let strtab = unsafe { Strtab::from_raw(bytes.as_ptr(), bytes.len(), 0x0) };
let strtab = Strtab::new(b"\0printf\0memmove\0busta\0", 0x0).unwrap();
let vec = strtab.to_vec().unwrap();
assert_eq!(vec.len(), 4);
assert_eq!(vec, vec!["", "printf", "memmove", "busta"]);
}

#[test]
fn to_vec_newline_delim() {
let bytes = b"\nprintf\nmemmove\nbusta\n";
let strtab = unsafe { Strtab::from_raw(bytes.as_ptr(), bytes.len(), b'\n') };
let strtab = Strtab::new(b"\nprintf\nmemmove\nbusta\n", b'\n').unwrap();
let vec = strtab.to_vec().unwrap();
assert_eq!(vec.len(), 4);
assert_eq!(vec, vec!["", "printf", "memmove", "busta"]);
}

#[test]
fn parse_utf8() {
assert!(match Strtab::new(&[0x80, 0x80], b'\n') {
Err(error::Error::Scroll(scroll::Error::BadInput {
size: 2,
msg: "invalid utf8",
})) => true,
_ => false,
});
assert!(match Strtab::new(&[0xC6, 0x92, 0x6F, 0x6F], b'\n') {
Ok(_) => true,
_ => false,
});
}

#[test]
fn get_at_utf8() {
let strtab = Strtab::new("\nƒoo\nmemmove\n🅱️usta\n".as_bytes(), b'\n').unwrap();
assert_eq!(strtab.get_at(0), Some(""));
assert_eq!(strtab.get_at(5), Some(""));
assert_eq!(strtab.get_at(6), Some("memmove"));
assert_eq!(strtab.get_at(14), Some("\u{1f171}\u{fe0f}usta"));
assert_eq!(strtab.get_at(16), None);
assert_eq!(strtab.get_at(18), Some("\u{fe0f}usta"));
assert_eq!(strtab.get_at(21), Some("usta"));
assert_eq!(strtab.get_at(25), Some(""));
assert_eq!(strtab.get_at(26), None);
}
2 changes: 1 addition & 1 deletion tests/elf.rs
Expand Up @@ -60,7 +60,7 @@ fn parse_text_section_size_lazy(base: &[u8]) -> Result<u64, &'static str> {
)
.map_err(|_| "parse string table error")?;
for (_, section) in obj.section_headers.iter().enumerate() {
let section_name = strtab.get(section.sh_name).unwrap().unwrap();
let section_name = strtab.get_at(section.sh_name).unwrap();
if section_name == ".text" {
return Ok(section.sh_size);
}
Expand Down

0 comments on commit 8febb72

Please sign in to comment.