Skip to content

Commit

Permalink
add InOsStr and support for interned OsStrs
Browse files Browse the repository at this point in the history
  • Loading branch information
sam0x17 committed Aug 25, 2023
1 parent de5f307 commit 79af0ec
Show file tree
Hide file tree
Showing 5 changed files with 270 additions and 0 deletions.
97 changes: 97 additions & 0 deletions src/_unsafe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
use std::{
alloc::Layout,
collections::hash_map::DefaultHasher,
ffi::OsStr,
hash::{Hash, Hasher},
};

Expand Down Expand Up @@ -235,6 +236,82 @@ impl Ord for StaticStr {
impl std::fmt::Debug for StaticStr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("StaticStr")
.field("str", &self.as_str())
.field("hash", &self.hash)
.finish()
}
}

/// An internal struct used to represent a type-erased, heap-allocated `&'static OsStr`) (i.e.
/// not a reference, slice, or value).
///
/// [`StaticOsStr`] is the only variant of [`Static`] where all methods are inherently safe,
/// because no type erasure occurs.
#[derive(Copy, Clone)]
pub struct StaticOsStr {
ptr: *const OsStr,
hash: u64,
}

impl StaticOsStr {
/// Allows direct access to the [`OsStr`] stored in this [`StaticOsStr`].
pub const fn as_os_str<'a>(&self) -> &'a OsStr {
unsafe { &*(self.ptr as *const OsStr) }
}

/// Creates a new [`StaticOsStr`] from the specified `&OsStr`. Since [`StaticOsStr`] does
/// not de-allocate its associated heap string when it is dropped (in fact, it can't be
/// dropped because it is [`Copy`]), this amounts to a memory leak.
pub fn from<T: Hash + Copy>(value: &OsStr) -> Self {
Self::with_hash(value, None)
}

/// Creates a new [`StaticOsStr`] from the specified `&OsStr`, based on a
/// manually-specified hashcode. Since [`StaticOsStr`] does not de-allocate its associated
/// heap string when it is dropped (in fact, it can't be dropped because it is [`Copy`]),
/// this amounts to a memory leak.
pub fn with_hash(value: &OsStr, hash: Option<u64>) -> Self {
let hash = hash.unwrap_or_else(|| {
let mut hasher = DefaultHasher::default();
value.hash(&mut hasher);
hasher.finish()
});
let ptr = Box::leak(Box::from(value)) as *const OsStr;
let written_value = unsafe { (ptr as *const OsStr).as_ref().unwrap() };
assert_eq!(written_value, value);
StaticOsStr { ptr, hash }
}
}

impl Hash for StaticOsStr {
fn hash<H: Hasher>(&self, state: &mut H) {
self.hash.hash(state);
}
}

impl PartialEq for StaticOsStr {
fn eq(&self, other: &Self) -> bool {
self.hash == other.hash
}
}

impl Eq for StaticOsStr {}

impl PartialOrd for StaticOsStr {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.hash.partial_cmp(&other.hash)
}
}

impl Ord for StaticOsStr {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.hash.cmp(&other.hash)
}
}

impl std::fmt::Debug for StaticOsStr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("StaticOsStr")
.field("hash", &self.hash)
.finish()
}
Expand All @@ -250,6 +327,7 @@ pub enum Static {
Value(StaticValue),
Slice(StaticSlice),
Str(StaticStr),
OsStr(StaticOsStr),
}

impl Static {
Expand All @@ -260,6 +338,7 @@ impl Static {
Static::Value(value) => value.ptr,
Static::Slice(slice) => slice.ptr as *const (),
Static::Str(string) => string.ptr as *const (),
Static::OsStr(os_str) => os_str.ptr as *const (),
}
}

Expand All @@ -270,6 +349,7 @@ impl Static {
Static::Value(value) => value.hash,
Static::Slice(slice) => slice.hash,
Static::Str(string) => string.hash,
Static::OsStr(os_str) => os_str.hash,
}
}

Expand All @@ -288,6 +368,11 @@ impl Static {
Static::Str(StaticStr::with_hash(value, hash))
}

/// Creates a [`Static`] from an `&OsStr`.
pub fn from_os_str(value: &OsStr, hash: Option<u64>) -> Static {
Static::OsStr(StaticOsStr::with_hash(value, hash))
}

/// Unsafely accesses the slice pointed to by the underlying [`StaticSlice`]. If the
/// underlying variant of the [`Static`] is not a [`StaticSlice`], this method will panic.
/// Specifying the wrong `T` is UB.
Expand Down Expand Up @@ -317,6 +402,15 @@ impl Static {
}
}

/// Unsafely accesses the `&OsStr` pointed to by the underlying [`StaticOsStr`]. If the
/// underlying variant of the [`StaticOsStr`] is not a [`StaticOsStr`], this method will panic.
pub fn as_os_str<'a>(&self) -> &'a OsStr {
match self {
Static::OsStr(static_os_str) => static_os_str.as_os_str(),
_ => panic!("not an &OsStr!"),
}
}

/// This is UB if the underlying types differ and a hash collision occurs.
pub unsafe fn _partial_eq<T: PartialEq + DataType + Staticize>(&self, other: &Static) -> bool
where
Expand All @@ -338,6 +432,7 @@ impl Static {
a.as_slice::<T>().partial_cmp(b.as_slice::<T>())
}
(Static::Str(a), Static::Str(b)) => a.as_str().partial_cmp(b.as_str()),
(Static::OsStr(a), Static::OsStr(b)) => a.as_os_str().partial_cmp(b.as_os_str()),
_ => (T::static_type_id(), self.hash_code())
.partial_cmp(&(T::static_type_id(), other.hash_code())),
}
Expand All @@ -349,6 +444,7 @@ impl Static {
(Static::Value(a), Static::Value(b)) => a.as_value::<T>().cmp(b.as_value::<T>()),
(Static::Slice(a), Static::Slice(b)) => a.as_slice::<T>().cmp(b.as_slice::<T>()),
(Static::Str(a), Static::Str(b)) => a.as_str().cmp(b.as_str()),
(Static::OsStr(a), Static::OsStr(b)) => a.as_os_str().cmp(b.as_os_str()),
_ => (T::static_type_id(), self.hash_code())
.cmp(&(T::static_type_id(), other.hash_code())),
}
Expand All @@ -361,6 +457,7 @@ impl Static {
Static::Value(value) => (type_id, value).hash(state),
Static::Slice(slice) => (type_id, slice).hash(state),
Static::Str(string) => (type_id, string).hash(state),
Static::OsStr(os_str) => (type_id, os_str).hash(state),
}
}
}
22 changes: 22 additions & 0 deletions src/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

use crate::_unsafe::*;
use crate::*;
use std::ffi::OsStr;

/// Variant of [`DataTypeTypeMarker`] representing a slice type.
pub enum Slice {}
Expand Down Expand Up @@ -148,6 +149,27 @@ unsafe impl<'a> DataType for &'a str {
}
}

unsafe impl<'a> DataType for &'a OsStr {
type Type = Reference;
type SliceType = &'a OsStr;
type ValueType = &'a OsStr;
type SliceValueType = ();
type InnerType = OsStr;
type DerefTargetType = OsStr;

fn as_slice(&self) -> &'static [()] {
panic!("not supported");
}

fn as_value(&self) -> &'a OsStr {
*self
}

fn to_static_with_hash(&self, hash: Option<u64>) -> Static {
Static::from_os_str(*self, hash)
}
}

unsafe_impl_data_type!((), Value);
unsafe_impl_data_type!(char, Value);
unsafe_impl_data_type!(bool, Value);
Expand Down
128 changes: 128 additions & 0 deletions src/interned_str.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::*;
use core::fmt::Display;
use core::ops::Deref;
use std::ffi::OsString;

/// A convenience abstraction around [`Interned<&'static str>`] with some extra [`From`] impls
/// and other convenience functions. This should be your go-to type if you want to work with
Expand Down Expand Up @@ -31,6 +32,11 @@ impl InStr {
pub fn as_str(&self) -> &'static str {
self.0.interned_str()
}

/// Returns the underlying heap pointer where this [`str`] is stored.
pub fn as_ptr(&self) -> *const () {
self.0.as_ptr()
}
}

impl Display for InStr {
Expand Down Expand Up @@ -100,3 +106,125 @@ impl Deref for InStr {
self.0.interned_str()
}
}

/// A convenience abstraction around [`Interned<&'static OsStr>`] with some extra [`From`] impls
/// and other convenience functions. This should be your go-to type if you want to work with
/// interned [`OsStr`]s and/or [`OsString`]s.
///
/// ```
/// use std::ffi::{OsStr, OsString};
/// use interned::InOsStr;
///
/// let a = InOsStr::from(OsStr::new("this is a triumph"));
/// let b: InOsStr = OsString::from("this is a triumph").into();
/// let c: InOsStr = OsStr::new("I'm making a note here, huge success").into();
/// assert_eq!(a, b);
/// assert_eq!(a, OsStr::new("this is a triumph"));
/// assert_ne!(a, c);
/// assert_ne!(b, c);
/// assert_eq!(a.as_ptr(), b.as_ptr());
/// ```
///
/// Note that as shown above, convenient impls are provided for [`From`]/[`Into`] conversions
/// and [`PartialEq`]/[`Eq`][`PartialOrd`]/[`Ord`] with all other [`str`] and [`String`] types,
/// meaning that for the most part you can use an [`InOsStr`] seamlessly in most places where
/// some sort of string type is expected.
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
pub struct InOsStr(Interned<&'static OsStr>);

impl InOsStr {
/// Returns a reference to the underlying interned string for this [`InOsStr`].
pub fn as_os_str(&self) -> &'static OsStr {
self.0.interned_os_str()
}

/// Returns the underlying heap pointer where this [`OsStr`] is stored.
pub fn as_ptr(&self) -> *const () {
self.0.as_ptr()
}
}

impl Display for InOsStr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.0.interned_os_str().to_string_lossy())
}
}

impl AsRef<OsStr> for InOsStr {
fn as_ref(&self) -> &OsStr {
self.0.interned_os_str()
}
}

impl<'a> From<&'a OsStr> for InOsStr {
fn from(value: &'a OsStr) -> Self {
InOsStr(Interned::<&'static OsStr>::from(value))
}
}

impl From<OsString> for InOsStr {
fn from(value: OsString) -> Self {
InOsStr(Interned::<&'static OsStr>::from(value.as_os_str()))
}
}

impl From<Interned<&'static OsStr>> for InOsStr {
fn from(value: Interned<&'static OsStr>) -> Self {
InOsStr(value)
}
}

impl<'a> From<InOsStr> for &'a OsStr {
fn from(value: InOsStr) -> Self {
value.0.interned_os_str()
}
}

impl From<InOsStr> for OsString {
fn from(value: InOsStr) -> Self {
value.0.interned_os_str().to_os_string()
}
}

impl PartialEq<&OsStr> for InOsStr {
fn eq(&self, other: &&OsStr) -> bool {
self.0.interned_os_str().eq(*other)
}
}

impl PartialEq<OsString> for InOsStr {
fn eq(&self, other: &OsString) -> bool {
self.0.interned_os_str().eq(other.as_os_str())
}
}

impl PartialOrd<&OsStr> for InOsStr {
fn partial_cmp(&self, other: &&OsStr) -> Option<std::cmp::Ordering> {
self.0.interned_os_str().partial_cmp(*other)
}
}

impl Deref for InOsStr {
type Target = OsStr;

fn deref(&self) -> &Self::Target {
self.0.interned_os_str()
}
}

#[test]
fn test_interned_os_str() {
let a: Interned<&'static OsStr> = OsStr::new("hey").into();
let b: &OsStr = OsStr::new("hey");
assert_eq!(a.interned_os_str(), b);
}

#[test]
fn test_in_os_str() {
let a: InOsStr = InOsStr::from(OsStr::new("hello world"));
let b: InOsStr = OsString::from("hey").into();
assert_ne!(a, b);
let c: InOsStr = OsStr::new("hello world").into();
assert_eq!(a, c);
assert_ne!(b, c);
}

0 comments on commit 79af0ec

Please sign in to comment.