diff --git a/src/_unsafe.rs b/src/_unsafe.rs index cd47a8c..ae65f80 100644 --- a/src/_unsafe.rs +++ b/src/_unsafe.rs @@ -3,6 +3,7 @@ use std::{ alloc::Layout, collections::hash_map::DefaultHasher, + ffi::OsStr, hash::{Hash, Hasher}, }; @@ -235,6 +236,82 @@ impl Ord for StaticStr { impl std::fmt::Debug for StaticStr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("StaticStr") + .field("str", &self.as_str()) + .field("hash", &self.hash) + .finish() + } +} + +/// An internal struct used to represent a type-erased, heap-allocated `&'static OsStr`) (i.e. +/// not a reference, slice, or value). +/// +/// [`StaticOsStr`] is the only variant of [`Static`] where all methods are inherently safe, +/// because no type erasure occurs. +#[derive(Copy, Clone)] +pub struct StaticOsStr { + ptr: *const OsStr, + hash: u64, +} + +impl StaticOsStr { + /// Allows direct access to the [`OsStr`] stored in this [`StaticOsStr`]. + pub const fn as_os_str<'a>(&self) -> &'a OsStr { + unsafe { &*(self.ptr as *const OsStr) } + } + + /// Creates a new [`StaticOsStr`] from the specified `&OsStr`. Since [`StaticOsStr`] does + /// not de-allocate its associated heap string when it is dropped (in fact, it can't be + /// dropped because it is [`Copy`]), this amounts to a memory leak. + pub fn from(value: &OsStr) -> Self { + Self::with_hash(value, None) + } + + /// Creates a new [`StaticOsStr`] from the specified `&OsStr`, based on a + /// manually-specified hashcode. Since [`StaticOsStr`] does not de-allocate its associated + /// heap string when it is dropped (in fact, it can't be dropped because it is [`Copy`]), + /// this amounts to a memory leak. + pub fn with_hash(value: &OsStr, hash: Option) -> Self { + let hash = hash.unwrap_or_else(|| { + let mut hasher = DefaultHasher::default(); + value.hash(&mut hasher); + hasher.finish() + }); + let ptr = Box::leak(Box::from(value)) as *const OsStr; + let written_value = unsafe { (ptr as *const OsStr).as_ref().unwrap() }; + assert_eq!(written_value, value); + StaticOsStr { ptr, hash } + } +} + +impl Hash for StaticOsStr { + fn hash(&self, state: &mut H) { + self.hash.hash(state); + } +} + +impl PartialEq for StaticOsStr { + fn eq(&self, other: &Self) -> bool { + self.hash == other.hash + } +} + +impl Eq for StaticOsStr {} + +impl PartialOrd for StaticOsStr { + fn partial_cmp(&self, other: &Self) -> Option { + self.hash.partial_cmp(&other.hash) + } +} + +impl Ord for StaticOsStr { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.hash.cmp(&other.hash) + } +} + +impl std::fmt::Debug for StaticOsStr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("StaticOsStr") .field("hash", &self.hash) .finish() } @@ -250,6 +327,7 @@ pub enum Static { Value(StaticValue), Slice(StaticSlice), Str(StaticStr), + OsStr(StaticOsStr), } impl Static { @@ -260,6 +338,7 @@ impl Static { Static::Value(value) => value.ptr, Static::Slice(slice) => slice.ptr as *const (), Static::Str(string) => string.ptr as *const (), + Static::OsStr(os_str) => os_str.ptr as *const (), } } @@ -270,6 +349,7 @@ impl Static { Static::Value(value) => value.hash, Static::Slice(slice) => slice.hash, Static::Str(string) => string.hash, + Static::OsStr(os_str) => os_str.hash, } } @@ -288,6 +368,11 @@ impl Static { Static::Str(StaticStr::with_hash(value, hash)) } + /// Creates a [`Static`] from an `&OsStr`. + pub fn from_os_str(value: &OsStr, hash: Option) -> Static { + Static::OsStr(StaticOsStr::with_hash(value, hash)) + } + /// Unsafely accesses the slice pointed to by the underlying [`StaticSlice`]. If the /// underlying variant of the [`Static`] is not a [`StaticSlice`], this method will panic. /// Specifying the wrong `T` is UB. @@ -317,6 +402,15 @@ impl Static { } } + /// Unsafely accesses the `&OsStr` pointed to by the underlying [`StaticOsStr`]. If the + /// underlying variant of the [`StaticOsStr`] is not a [`StaticOsStr`], this method will panic. + pub fn as_os_str<'a>(&self) -> &'a OsStr { + match self { + Static::OsStr(static_os_str) => static_os_str.as_os_str(), + _ => panic!("not an &OsStr!"), + } + } + /// This is UB if the underlying types differ and a hash collision occurs. pub unsafe fn _partial_eq(&self, other: &Static) -> bool where @@ -338,6 +432,7 @@ impl Static { a.as_slice::().partial_cmp(b.as_slice::()) } (Static::Str(a), Static::Str(b)) => a.as_str().partial_cmp(b.as_str()), + (Static::OsStr(a), Static::OsStr(b)) => a.as_os_str().partial_cmp(b.as_os_str()), _ => (T::static_type_id(), self.hash_code()) .partial_cmp(&(T::static_type_id(), other.hash_code())), } @@ -349,6 +444,7 @@ impl Static { (Static::Value(a), Static::Value(b)) => a.as_value::().cmp(b.as_value::()), (Static::Slice(a), Static::Slice(b)) => a.as_slice::().cmp(b.as_slice::()), (Static::Str(a), Static::Str(b)) => a.as_str().cmp(b.as_str()), + (Static::OsStr(a), Static::OsStr(b)) => a.as_os_str().cmp(b.as_os_str()), _ => (T::static_type_id(), self.hash_code()) .cmp(&(T::static_type_id(), other.hash_code())), } @@ -361,6 +457,7 @@ impl Static { Static::Value(value) => (type_id, value).hash(state), Static::Slice(slice) => (type_id, slice).hash(state), Static::Str(string) => (type_id, string).hash(state), + Static::OsStr(os_str) => (type_id, os_str).hash(state), } } } diff --git a/src/datatype.rs b/src/datatype.rs index a9a8123..f787da7 100644 --- a/src/datatype.rs +++ b/src/datatype.rs @@ -5,6 +5,7 @@ use crate::_unsafe::*; use crate::*; +use std::ffi::OsStr; /// Variant of [`DataTypeTypeMarker`] representing a slice type. pub enum Slice {} @@ -148,6 +149,27 @@ unsafe impl<'a> DataType for &'a str { } } +unsafe impl<'a> DataType for &'a OsStr { + type Type = Reference; + type SliceType = &'a OsStr; + type ValueType = &'a OsStr; + type SliceValueType = (); + type InnerType = OsStr; + type DerefTargetType = OsStr; + + fn as_slice(&self) -> &'static [()] { + panic!("not supported"); + } + + fn as_value(&self) -> &'a OsStr { + *self + } + + fn to_static_with_hash(&self, hash: Option) -> Static { + Static::from_os_str(*self, hash) + } +} + unsafe_impl_data_type!((), Value); unsafe_impl_data_type!(char, Value); unsafe_impl_data_type!(bool, Value); diff --git a/src/interned_str.rs b/src/interned_str.rs index cda3a3b..4dcbdf5 100644 --- a/src/interned_str.rs +++ b/src/interned_str.rs @@ -1,6 +1,7 @@ use crate::*; use core::fmt::Display; use core::ops::Deref; +use std::ffi::OsString; /// A convenience abstraction around [`Interned<&'static str>`] with some extra [`From`] impls /// and other convenience functions. This should be your go-to type if you want to work with @@ -31,6 +32,11 @@ impl InStr { pub fn as_str(&self) -> &'static str { self.0.interned_str() } + + /// Returns the underlying heap pointer where this [`str`] is stored. + pub fn as_ptr(&self) -> *const () { + self.0.as_ptr() + } } impl Display for InStr { @@ -100,3 +106,125 @@ impl Deref for InStr { self.0.interned_str() } } + +/// A convenience abstraction around [`Interned<&'static OsStr>`] with some extra [`From`] impls +/// and other convenience functions. This should be your go-to type if you want to work with +/// interned [`OsStr`]s and/or [`OsString`]s. +/// +/// ``` +/// use std::ffi::{OsStr, OsString}; +/// use interned::InOsStr; +/// +/// let a = InOsStr::from(OsStr::new("this is a triumph")); +/// let b: InOsStr = OsString::from("this is a triumph").into(); +/// let c: InOsStr = OsStr::new("I'm making a note here, huge success").into(); +/// assert_eq!(a, b); +/// assert_eq!(a, OsStr::new("this is a triumph")); +/// assert_ne!(a, c); +/// assert_ne!(b, c); +/// assert_eq!(a.as_ptr(), b.as_ptr()); +/// ``` +/// +/// Note that as shown above, convenient impls are provided for [`From`]/[`Into`] conversions +/// and [`PartialEq`]/[`Eq`][`PartialOrd`]/[`Ord`] with all other [`str`] and [`String`] types, +/// meaning that for the most part you can use an [`InOsStr`] seamlessly in most places where +/// some sort of string type is expected. +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] +pub struct InOsStr(Interned<&'static OsStr>); + +impl InOsStr { + /// Returns a reference to the underlying interned string for this [`InOsStr`]. + pub fn as_os_str(&self) -> &'static OsStr { + self.0.interned_os_str() + } + + /// Returns the underlying heap pointer where this [`OsStr`] is stored. + pub fn as_ptr(&self) -> *const () { + self.0.as_ptr() + } +} + +impl Display for InOsStr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0.interned_os_str().to_string_lossy()) + } +} + +impl AsRef for InOsStr { + fn as_ref(&self) -> &OsStr { + self.0.interned_os_str() + } +} + +impl<'a> From<&'a OsStr> for InOsStr { + fn from(value: &'a OsStr) -> Self { + InOsStr(Interned::<&'static OsStr>::from(value)) + } +} + +impl From for InOsStr { + fn from(value: OsString) -> Self { + InOsStr(Interned::<&'static OsStr>::from(value.as_os_str())) + } +} + +impl From> for InOsStr { + fn from(value: Interned<&'static OsStr>) -> Self { + InOsStr(value) + } +} + +impl<'a> From for &'a OsStr { + fn from(value: InOsStr) -> Self { + value.0.interned_os_str() + } +} + +impl From for OsString { + fn from(value: InOsStr) -> Self { + value.0.interned_os_str().to_os_string() + } +} + +impl PartialEq<&OsStr> for InOsStr { + fn eq(&self, other: &&OsStr) -> bool { + self.0.interned_os_str().eq(*other) + } +} + +impl PartialEq for InOsStr { + fn eq(&self, other: &OsString) -> bool { + self.0.interned_os_str().eq(other.as_os_str()) + } +} + +impl PartialOrd<&OsStr> for InOsStr { + fn partial_cmp(&self, other: &&OsStr) -> Option { + self.0.interned_os_str().partial_cmp(*other) + } +} + +impl Deref for InOsStr { + type Target = OsStr; + + fn deref(&self) -> &Self::Target { + self.0.interned_os_str() + } +} + +#[test] +fn test_interned_os_str() { + let a: Interned<&'static OsStr> = OsStr::new("hey").into(); + let b: &OsStr = OsStr::new("hey"); + assert_eq!(a.interned_os_str(), b); +} + +#[test] +fn test_in_os_str() { + let a: InOsStr = InOsStr::from(OsStr::new("hello world")); + let b: InOsStr = OsString::from("hey").into(); + assert_ne!(a, b); + let c: InOsStr = OsStr::new("hello world").into(); + assert_eq!(a, c); + assert_ne!(b, c); +} diff --git a/src/lib.rs b/src/lib.rs index 16553f8..c99a908 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,6 +65,7 @@ use std::{ hash_map::{DefaultHasher, Entry}, HashMap, }, + ffi::OsStr, fmt::Display, hash::{BuildHasher, Hash, Hasher}, marker::PhantomData, @@ -186,6 +187,14 @@ impl Interned<&str> { } } +impl Interned<&OsStr> { + /// Returns a reference to the underlying `&OsStr` interned in this [`Interned`]. Calling + /// this method on a non-OsStr will panic. + pub fn interned_os_str<'a>(&self) -> &'a OsStr { + self.value.as_os_str() + } +} + impl> Interned { /// Returns a reference to the underlying `T` interned in this [`Interned`]. Calling this /// method on a non-value will panic. @@ -209,6 +218,9 @@ impl Deref for Interned { std::mem::transmute_copy(&static_value.as_value::()) }, Static::Str(static_str) => unsafe { std::mem::transmute_copy(&static_str.as_str()) }, + Static::OsStr(static_os_str) => unsafe { + std::mem::transmute_copy(&static_os_str.as_os_str()) + }, } } } @@ -265,6 +277,7 @@ where f.field("slice", unsafe { &slice.as_slice::() }) } Static::Str(string) => f.field("str", &string.as_str()), + Static::OsStr(os_str) => f.field("OsStr", &os_str.as_os_str()), } .finish(); ret @@ -278,6 +291,7 @@ impl Display for Interned { Static::Value(value) => unsafe { value.as_value::().fmt(f) }, Static::Slice(slice) => unsafe { slice.as_slice::().fmt(f) }, Static::Str(string) => string.as_str().fmt(f), + Static::OsStr(os_str) => os_str.as_os_str().fmt(f), } } } @@ -334,6 +348,12 @@ impl From> for &str { } } +impl From> for &OsStr { + fn from(value: Interned<&OsStr>) -> Self { + value.interned_os_str() + } +} + derive_from_interned_impl_value!(char); derive_from_interned_impl_value!(bool); derive_from_interned_impl_value!(usize); diff --git a/src/memoized.rs b/src/memoized.rs index 84fdefc..53ea54f 100644 --- a/src/memoized.rs +++ b/src/memoized.rs @@ -117,6 +117,9 @@ impl Deref for Memoized { std::mem::transmute_copy(&static_value.as_value::()) }, Static::Str(static_str) => unsafe { std::mem::transmute_copy(&static_str.as_str()) }, + Static::OsStr(static_os_str) => unsafe { + std::mem::transmute_copy(&static_os_str.as_os_str()) + }, } } }