diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ea8c4bf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..d0752af
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,7 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "rustc-stable-hash"
+version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..0c63d64
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "rustc-stable-hash"
+version = "0.1.0"
+authors = ["The Rust Project Developers"]
+description = "A stable hashing algorithm used by rustc"
+license = "Apache-2.0 OR MIT"
+readme = "README.md"
+repository = "https://github.com/rust-lang/rustc-stable-hash"
+edition = "2021"
+
+[features]
+nightly = [] # for feature(hasher_prefixfree_extras)
diff --git a/src/int_overflow.rs b/src/int_overflow.rs
new file mode 100644
index 0000000..65a81f0
--- /dev/null
+++ b/src/int_overflow.rs
@@ -0,0 +1,77 @@
+// Weaker version of `-Coverflow-checks`.
+
+/// Addition, but only overflow checked when `cfg(debug_assertions)` is set
+/// instead of respecting `-Coverflow-checks`.
+///
+/// This exists for performance reasons, as we ship rustc with overflow checks.
+/// While overflow checks are perf neutral in almost all of the compiler, there
+/// are a few particularly hot areas where we don't want overflow checks in our
+/// dist builds. Overflow is still a bug there, so we want overflow check for
+/// builds with debug assertions.
+///
+/// That's a long way to say that this should be used in areas where overflow
+/// is a bug but overflow checking is too slow.
+pub trait DebugStrictAdd {
+    /// See [`DebugStrictAdd`].
+    fn debug_strict_add(self, other: Self) -> Self;
+}
+
+macro_rules! impl_debug_strict_add {
+    ($( $ty:ty )*) => {
+        $(
+            impl DebugStrictAdd for $ty {
+                fn debug_strict_add(self, other: Self) -> Self {
+                    if cfg!(debug_assertions) {
+                        self + other
+                    } else {
+                        self.wrapping_add(other)
+                    }
+                }
+            }
+        )*
+    };
+}
+
+/// See [`DebugStrictAdd`].
+pub trait DebugStrictSub {
+    /// See [`DebugStrictAdd`].
+    fn debug_strict_sub(self, other: Self) -> Self;
+}
+
+macro_rules! impl_debug_strict_sub {
+    ($( $ty:ty )*) => {
+        $(
+            impl DebugStrictSub for $ty {
+                fn debug_strict_sub(self, other: Self) -> Self {
+                    if cfg!(debug_assertions) {
+                        self - other
+                    } else {
+                        self.wrapping_sub(other)
+                    }
+                }
+            }
+        )*
+    };
+}
+
+impl_debug_strict_add! {
+    usize
+}
+
+/*
+impl_debug_strict_add! {
+    u8 u16 u32 u64 u128 usize
+    i8 i16 i32 i64 i128 isize
+}
+*/
+
+impl_debug_strict_sub! {
+    usize
+}
+
+/*
+impl_debug_strict_sub! {
+    u8 u16 u32 u64 u128 usize
+    i8 i16 i32 i64 i128 isize
+}
+*/
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..88b5bbe
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,13 @@
+//! A stable hashing algorithm used by rustc
+
+#![cfg_attr(feature = "nightly", feature(hasher_prefixfree_extras))]
+
+mod int_overflow;
+mod sip128;
+mod stable_hasher;
+
+#[doc(inline)]
+pub use stable_hasher::StableHasher;
+
+#[doc(inline)]
+pub use stable_hasher::StableHasherResult;
diff --git a/src/sip128.rs b/src/sip128.rs
new file mode 100644
index 0000000..564f175
--- /dev/null
+++ b/src/sip128.rs
@@ -0,0 +1,518 @@
+//! This is a copy of `core::hash::sip` adapted to providing 128 bit hashes.
+
+// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance.
+// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727
+use crate::int_overflow::{DebugStrictAdd, DebugStrictSub};
+
+use std::hash::Hasher;
+use std::mem::{self, MaybeUninit};
+use std::ptr;
+
+#[cfg(test)]
+mod tests;
+
+// The SipHash algorithm operates on 8-byte chunks.
+const ELEM_SIZE: usize = mem::size_of::<u64>();
+
+// Size of the buffer in number of elements, not including the spill.
+//
+// The selection of this size was guided by rustc-perf benchmark comparisons of
+// different buffer sizes. It should be periodically reevaluated as the compiler
+// implementation and input characteristics change.
+//
+// Using the same-sized buffer for everything we hash is a performance versus
+// complexity tradeoff. The ideal buffer size, and whether buffering should even
+// be used, depends on what is being hashed. It may be worth it to size the
+// buffer appropriately (perhaps by making SipHasher128 generic over the buffer
+// size) or disable buffering depending on what is being hashed. But at this
+// time, we use the same buffer size for everything.
+const BUFFER_CAPACITY: usize = 8;
+
+// Size of the buffer in bytes, not including the spill.
+const BUFFER_SIZE: usize = BUFFER_CAPACITY * ELEM_SIZE;
+
+// Size of the buffer in number of elements, including the spill.
+const BUFFER_WITH_SPILL_CAPACITY: usize = BUFFER_CAPACITY + 1;
+
+// Size of the buffer in bytes, including the spill.
+const BUFFER_WITH_SPILL_SIZE: usize = BUFFER_WITH_SPILL_CAPACITY * ELEM_SIZE;
+
+// Index of the spill element in the buffer.
+const BUFFER_SPILL_INDEX: usize = BUFFER_WITH_SPILL_CAPACITY - 1;
+
+#[derive(Debug, Clone)]
+#[repr(C)]
+pub struct SipHasher128 {
+    // The access pattern during hashing consists of accesses to `nbuf` and
+    // `buf` until the buffer is full, followed by accesses to `state` and
+    // `processed`, and then repetition of that pattern until hashing is done.
+    // This is the basis for the ordering of fields below. However, in practice
+    // the cache miss-rate for data access is extremely low regardless of order.
+    nbuf: usize, // how many bytes in buf are valid
+    buf: [MaybeUninit<u64>; BUFFER_WITH_SPILL_CAPACITY], // unprocessed bytes le
+    state: State, // hash State
+    processed: usize, // how many bytes we've processed
+}
+
+#[derive(Debug, Clone, Copy)]
+#[repr(C)]
+struct State {
+    // v0, v2 and v1, v3 show up in pairs in the algorithm,
+    // and simd implementations of SipHash will use vectors
+    // of v02 and v13. By placing them in this order in the struct,
+    // the compiler can pick up on just a few simd optimizations by itself.
+    v0: u64,
+    v2: u64,
+    v1: u64,
+    v3: u64,
+}
+
+macro_rules! compress {
+    ($state:expr) => {{
+        compress!($state.v0, $state.v1, $state.v2, $state.v3)
+    }};
+    ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
+        $v0 = $v0.wrapping_add($v1);
+        $v1 = $v1.rotate_left(13);
+        $v1 ^= $v0;
+        $v0 = $v0.rotate_left(32);
+        $v2 = $v2.wrapping_add($v3);
+        $v3 = $v3.rotate_left(16);
+        $v3 ^= $v2;
+        $v0 = $v0.wrapping_add($v3);
+        $v3 = $v3.rotate_left(21);
+        $v3 ^= $v0;
+        $v2 = $v2.wrapping_add($v1);
+        $v1 = $v1.rotate_left(17);
+        $v1 ^= $v2;
+        $v2 = $v2.rotate_left(32);
+    }};
+}
+
+// Copies up to 8 bytes from source to destination. This performs better than
+// `ptr::copy_nonoverlapping` on microbenchmarks and may perform better on real
+// workloads since all of the copies have fixed sizes and avoid calling memcpy.
+//
+// This is specifically designed for copies of up to 8 bytes, because that's the
+// maximum of number bytes needed to fill an 8-byte-sized element on which
+// SipHash operates. Note that for variable-sized copies which are known to be
+// less than 8 bytes, this function will perform more work than necessary unless
+// the compiler is able to optimize the extra work away.
+#[inline]
+unsafe fn copy_nonoverlapping_small(src: *const u8, dst: *mut u8, count: usize) {
+    debug_assert!(count <= 8);
+
+    unsafe {
+        if count == 8 {
+            ptr::copy_nonoverlapping(src, dst, 8);
+            return;
+        }
+
+        let mut i = 0;
+        if i.debug_strict_add(3) < count {
+            ptr::copy_nonoverlapping(src.add(i), dst.add(i), 4);
+            i = i.debug_strict_add(4);
+        }
+
+        if i.debug_strict_add(1) < count {
+            ptr::copy_nonoverlapping(src.add(i), dst.add(i), 2);
+            i = i.debug_strict_add(2)
+        }
+
+        if i < count {
+            *dst.add(i) = *src.add(i);
+            i = i.debug_strict_add(1);
+        }
+
+        debug_assert_eq!(i, count);
+    }
+}
+
+// # Implementation
+//
+// This implementation uses buffering to reduce the hashing cost for inputs
+// consisting of many small integers. Buffering simplifies the integration of
+// integer input--the integer write function typically just appends to the
+// buffer with a statically sized write, updates metadata, and returns.
+//
+// Buffering also prevents alternating between writes that do and do not trigger
+// the hashing process. Only when the entire buffer is full do we transition
+// into hashing. This allows us to keep the hash state in registers for longer,
+// instead of loading and storing it before and after processing each element.
+//
+// When a write fills the buffer, a buffer processing function is invoked to
+// hash all of the buffered input. The buffer processing functions are marked
+// `#[inline(never)]` so that they aren't inlined into the append functions,
+// which ensures the more frequently called append functions remain inlineable
+// and don't include register pushing/popping that would only be made necessary
+// by inclusion of the complex buffer processing path which uses those
+// registers.
+//
+// The buffer includes a "spill"--an extra element at the end--which simplifies
+// the integer write buffer processing path. The value that fills the buffer can
+// be written with a statically sized write that may spill over into the spill.
+// After the buffer is processed, the part of the value that spilled over can be
+// written from the spill to the beginning of the buffer with another statically
+// sized write. This write may copy more bytes than actually spilled over, but
+// we maintain the metadata such that any extra copied bytes will be ignored by
+// subsequent processing. Due to the static sizes, this scheme performs better
+// than copying the exact number of bytes needed into the end and beginning of
+// the buffer.
+//
+// The buffer is uninitialized, which improves performance, but may preclude
+// efficient implementation of alternative approaches. The improvement is not so
+// large that an alternative approach should be disregarded because it cannot be
+// efficiently implemented with an uninitialized buffer. On the other hand, an
+// uninitialized buffer may become more important should a larger one be used.
+//
+// # Platform Dependence
+//
+// The SipHash algorithm operates on byte sequences. It parses the input stream
+// as 8-byte little-endian integers. Therefore, given the same byte sequence, it
+// produces the same result on big- and little-endian hardware.
+//
+// However, the Hasher trait has methods which operate on multi-byte integers.
+// How they are converted into byte sequences can be endian-dependent (by using
+// native byte order) or independent (by consistently using either LE or BE byte
+// order). It can also be `isize` and `usize` size dependent (by using the
+// native size), or independent (by converting to a common size), supposing the
+// values can be represented in 32 bits.
+//
+// In order to make `SipHasher128` consistent with `SipHasher` in libstd, we
+// choose to do the integer to byte sequence conversion in the platform-
+// dependent way. Clients can achieve platform-independent hashing by widening
+// `isize` and `usize` integers to 64 bits on 32-bit systems and byte-swapping
+// integers on big-endian systems before passing them to the writing functions.
+// This causes the input byte sequence to look identical on big- and little-
+// endian systems (supposing `isize` and `usize` values can be represented in 32
+// bits), which ensures platform-independent results.
+impl SipHasher128 {
+    #[inline]
+    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher128 {
+        let mut hasher = SipHasher128 {
+            nbuf: 0,
+            // HACK: Manual MaybeUninit::uninit_array, use inline const with Rust 1.79
+            buf: unsafe {
+                MaybeUninit::<[MaybeUninit<_>; BUFFER_WITH_SPILL_CAPACITY]>::uninit().assume_init()
+            },
+            state: State {
+                v0: key0 ^ 0x736f6d6570736575,
+                // The XOR with 0xee is only done on 128-bit algorithm version.
+                v1: key1 ^ (0x646f72616e646f6d ^ 0xee),
+                v2: key0 ^ 0x6c7967656e657261,
+                v3: key1 ^ 0x7465646279746573,
+            },
+            processed: 0,
+        };
+
+        unsafe {
+            // Initialize spill because we read from it in `short_write_process_buffer`.
+            *hasher.buf.get_unchecked_mut(BUFFER_SPILL_INDEX) = MaybeUninit::zeroed();
+        }
+
+        hasher
+    }
+
+    #[inline]
+    pub fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
+        let nbuf = self.nbuf;
+        debug_assert!(LEN <= 8);
+        debug_assert!(nbuf < BUFFER_SIZE);
+        debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);
+
+        if nbuf.debug_strict_add(LEN) < BUFFER_SIZE {
+            unsafe {
+                // The memcpy call is optimized away because the size is known.
+                let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
+                ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);
+            }
+
+            self.nbuf = nbuf.debug_strict_add(LEN);
+
+            return;
+        }
+
+        unsafe { self.short_write_process_buffer(bytes) }
+    }
+
+    // A specialized write function for values with size <= 8 that should only
+    // be called when the write would cause the buffer to fill.
+    //
+    // SAFETY: the write of `x` into `self.buf` starting at byte offset
+    // `self.nbuf` must cause `self.buf` to become fully initialized (and not
+    // overflow) if it wasn't already.
+    #[inline(never)]
+    unsafe fn short_write_process_buffer<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
+        unsafe {
+            let nbuf = self.nbuf;
+            debug_assert!(LEN <= 8);
+            debug_assert!(nbuf < BUFFER_SIZE);
+            debug_assert!(nbuf + LEN >= BUFFER_SIZE);
+            debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);
+
+            // Copy first part of input into end of buffer, possibly into spill
+            // element. The memcpy call is optimized away because the size is known.
+            let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
+            ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);
+
+            // Process buffer.
+            for i in 0..BUFFER_CAPACITY {
+                let elem = self.buf.get_unchecked(i).assume_init().to_le();
+                self.state.v3 ^= elem;
+                Sip13Rounds::c_rounds(&mut self.state);
+                self.state.v0 ^= elem;
+            }
+
+            // Copy remaining input into start of buffer by copying LEN - 1
+            // elements from spill (at most LEN - 1 bytes could have overflowed
+            // into the spill). The memcpy call is optimized away because the size
+            // is known. And the whole copy is optimized away for LEN == 1.
+            let dst = self.buf.as_mut_ptr() as *mut u8;
+            let src = self.buf.get_unchecked(BUFFER_SPILL_INDEX) as *const _ as *const u8;
+            ptr::copy_nonoverlapping(src, dst, LEN - 1);
+
+            // This function should only be called when the write fills the buffer.
+            // Therefore, when LEN == 1, the new `self.nbuf` must be zero.
+            // LEN is statically known, so the branch is optimized away.
+            self.nbuf = if LEN == 1 {
+                0
+            } else {
+                nbuf.debug_strict_add(LEN).debug_strict_sub(BUFFER_SIZE)
+            };
+            self.processed = self.processed.debug_strict_add(BUFFER_SIZE);
+        }
+    }
+
+    // A write function for byte slices.
+    #[inline]
+    fn slice_write(&mut self, msg: &[u8]) {
+        let length = msg.len();
+        let nbuf = self.nbuf;
+        debug_assert!(nbuf < BUFFER_SIZE);
+
+        if nbuf.debug_strict_add(length) < BUFFER_SIZE {
+            unsafe {
+                let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
+
+                if length <= 8 {
+                    copy_nonoverlapping_small(msg.as_ptr(), dst, length);
+                } else {
+                    // This memcpy is *not* optimized away.
+                    ptr::copy_nonoverlapping(msg.as_ptr(), dst, length);
+                }
+            }
+
+            self.nbuf = nbuf.debug_strict_add(length);
+
+            return;
+        }
+
+        unsafe { self.slice_write_process_buffer(msg) }
+    }
+
+    // A write function for byte slices that should only be called when the
+    // write would cause the buffer to fill.
+    //
+    // SAFETY: `self.buf` must be initialized up to the byte offset `self.nbuf`,
+    // and `msg` must contain enough bytes to initialize the rest of the element
+    // containing the byte offset `self.nbuf`.
+    #[inline(never)]
+    unsafe fn slice_write_process_buffer(&mut self, msg: &[u8]) {
+        unsafe {
+            let length = msg.len();
+            let nbuf = self.nbuf;
+            debug_assert!(nbuf < BUFFER_SIZE);
+            debug_assert!(nbuf + length >= BUFFER_SIZE);
+
+            // Always copy first part of input into current element of buffer.
+            // This function should only be called when the write fills the buffer,
+            // so we know that there is enough input to fill the current element.
+            let valid_in_elem = nbuf % ELEM_SIZE;
+            let needed_in_elem = ELEM_SIZE.debug_strict_sub(valid_in_elem);
+
+            let src = msg.as_ptr();
+            let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
+            copy_nonoverlapping_small(src, dst, needed_in_elem);
+
+            // Process buffer.
+
+            // Using `nbuf / ELEM_SIZE + 1` rather than `(nbuf + needed_in_elem) /
+            // ELEM_SIZE` to show the compiler that this loop's upper bound is > 0.
+            // We know that is true, because last step ensured we have a full
+            // element in the buffer.
+            let last = (nbuf / ELEM_SIZE).debug_strict_add(1);
+
+            for i in 0..last {
+                let elem = self.buf.get_unchecked(i).assume_init().to_le();
+                self.state.v3 ^= elem;
+                Sip13Rounds::c_rounds(&mut self.state);
+                self.state.v0 ^= elem;
+            }
+
+            // Process the remaining element-sized chunks of input.
+            let mut processed = needed_in_elem;
+            let input_left = length.debug_strict_sub(processed);
+            let elems_left = input_left / ELEM_SIZE;
+            let extra_bytes_left = input_left % ELEM_SIZE;
+
+            for _ in 0..elems_left {
+                let elem = (msg.as_ptr().add(processed) as *const u64)
+                    .read_unaligned()
+                    .to_le();
+                self.state.v3 ^= elem;
+                Sip13Rounds::c_rounds(&mut self.state);
+                self.state.v0 ^= elem;
+                processed = processed.debug_strict_add(ELEM_SIZE);
+            }
+
+            // Copy remaining input into start of buffer.
+            let src = msg.as_ptr().add(processed);
+            let dst = self.buf.as_mut_ptr() as *mut u8;
+            copy_nonoverlapping_small(src, dst, extra_bytes_left);
+
+            self.nbuf = extra_bytes_left;
+            self.processed = self
+                .processed
+                .debug_strict_add(nbuf.debug_strict_add(processed));
+        }
+    }
+
+    #[inline]
+    pub fn finish128(mut self) -> (u64, u64) {
+        debug_assert!(self.nbuf < BUFFER_SIZE);
+
+        // Process full elements in buffer.
+        let last = self.nbuf / ELEM_SIZE;
+
+        // Since we're consuming self, avoid updating members for a potential
+        // performance gain.
+        let mut state = self.state;
+
+        for i in 0..last {
+            let elem = unsafe { self.buf.get_unchecked(i).assume_init().to_le() };
+            state.v3 ^= elem;
+            Sip13Rounds::c_rounds(&mut state);
+            state.v0 ^= elem;
+        }
+
+        // Get remaining partial element.
+        let elem = if self.nbuf % ELEM_SIZE != 0 {
+            unsafe {
+                // Ensure element is initialized by writing zero bytes. At most
+                // `ELEM_SIZE - 1` are required given the above check. It's safe
+                // to write this many because we have the spill and we maintain
+                // `self.nbuf` such that this write will start before the spill.
+                let dst = (self.buf.as_mut_ptr() as *mut u8).add(self.nbuf);
+                ptr::write_bytes(dst, 0, ELEM_SIZE - 1);
+                self.buf.get_unchecked(last).assume_init().to_le()
+            }
+        } else {
+            0
+        };
+
+        // Finalize the hash.
+        let length = self.processed.debug_strict_add(self.nbuf);
+        let b: u64 = ((length as u64 & 0xff) << 56) | elem;
+
+        state.v3 ^= b;
+        Sip13Rounds::c_rounds(&mut state);
+        state.v0 ^= b;
+
+        state.v2 ^= 0xee;
+        Sip13Rounds::d_rounds(&mut state);
+        let _0 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
+
+        state.v1 ^= 0xdd;
+        Sip13Rounds::d_rounds(&mut state);
+        let _1 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
+
+        (_0, _1)
+    }
+}
+
+impl Hasher for SipHasher128 {
+    #[inline]
+    fn write_u8(&mut self, i: u8) {
+        self.short_write(i.to_ne_bytes());
+    }
+
+    #[inline]
+    fn write_u16(&mut self, i: u16) {
+        self.short_write(i.to_ne_bytes());
+    }
+
+    #[inline]
+    fn write_u32(&mut self, i: u32) {
+        self.short_write(i.to_ne_bytes());
+    }
+
+    #[inline]
+    fn write_u64(&mut self, i: u64) {
+        self.short_write(i.to_ne_bytes());
+    }
+
+    #[inline]
+    fn write_usize(&mut self, i: usize) {
+        self.short_write(i.to_ne_bytes());
+    }
+
+    #[inline]
+    fn write_i8(&mut self, i: i8) {
+        self.short_write((i as u8).to_ne_bytes());
+    }
+
+    #[inline]
+    fn write_i16(&mut self, i: i16) {
+        self.short_write((i as u16).to_ne_bytes());
+    }
+
+    #[inline]
+    fn write_i32(&mut self, i: i32) {
+        self.short_write((i as u32).to_ne_bytes());
+    }
+
+    #[inline]
+    fn write_i64(&mut self, i: i64) {
+        self.short_write((i as u64).to_ne_bytes());
+    }
+
+    #[inline]
+    fn write_isize(&mut self, i: isize) {
+        self.short_write((i as usize).to_ne_bytes());
+    }
+
+    #[inline]
+    fn write(&mut self, msg: &[u8]) {
+        self.slice_write(msg);
+    }
+
+    #[cfg(feature = "nightly")]
+    #[inline]
+    fn write_str(&mut self, s: &str) {
+        // This hasher works byte-wise, and `0xFF` cannot show up in a `str`,
+        // so just hashing the one extra byte is enough to be prefix-free.
+        self.write(s.as_bytes());
+        self.write_u8(0xFF);
+    }
+
+    fn finish(&self) -> u64 {
+        panic!("SipHasher128 cannot provide valid 64 bit hashes")
+    }
+}
+
+#[derive(Debug, Clone, Default)]
+struct Sip13Rounds;
+
+impl Sip13Rounds {
+    #[inline]
+    fn c_rounds(state: &mut State) {
+        compress!(state);
+    }
+
+    #[inline]
+    fn d_rounds(state: &mut State) {
+        compress!(state);
+        compress!(state);
+        compress!(state);
+    }
+}
diff --git a/src/sip128/tests.rs b/src/sip128/tests.rs
new file mode 100644
index 0000000..e9dd0f1
--- /dev/null
+++ b/src/sip128/tests.rs
@@ -0,0 +1,304 @@
+use super::*;
+
+use std::hash::Hash;
+
+// Hash just the bytes of the slice, without length prefix
+struct Bytes<'a>(&'a [u8]);
+
+impl<'a> Hash for Bytes<'a> {
+    #[allow(unused_must_use)]
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        for byte in self.0 {
+            state.write_u8(*byte);
+        }
+    }
+}
+
+fn hash_with<T: Hash>(mut st: SipHasher128, x: &T) -> (u64, u64) {
+    x.hash(&mut st);
+    st.finish128()
+}
+
+fn hash<T: Hash>(x: &T) -> (u64, u64) {
+    hash_with(SipHasher128::new_with_keys(0, 0), x)
+}
+#[rustfmt::skip]
+const TEST_VECTOR: [[u8; 16]; 64] = [
+    [0xe7, 0x7e, 0xbc, 0xb2, 0x27, 0x88, 0xa5, 0xbe, 0xfd, 0x62, 0xdb, 0x6a, 0xdd, 0x30, 0x30, 0x01],
+    [0xfc, 0x6f, 0x37, 0x04, 0x60, 0xd3, 0xed, 0xa8, 0x5e, 0x05, 0x73, 0xcc, 0x2b, 0x2f, 0xf0, 0x63],
+    [0x75, 0x78, 0x7f, 0x09, 0x05, 0x69, 0x83, 0x9b, 0x85, 0x5b, 0xc9, 0x54, 0x8c, 0x6a, 0xea, 0x95],
+    [0x6b, 0xc5, 0xcc, 0xfa, 0x1e, 0xdc, 0xf7, 0x9f, 0x48, 0x23, 0x18, 0x77, 0x12, 0xeb, 0xd7, 0x43],
+    [0x0c, 0x78, 0x4e, 0x71, 0xac, 0x2b, 0x28, 0x5a, 0x9f, 0x8e, 0x92, 0xe7, 0x8f, 0xbf, 0x2c, 0x25],
+    [0xf3, 0x28, 0xdb, 0x89, 0x34, 0x5b, 0x62, 0x0c, 0x79, 0x52, 0x29, 0xa4, 0x26, 0x95, 0x84, 0x3e],
+    [0xdc, 0xd0, 0x3d, 0x29, 0xf7, 0x43, 0xe7, 0x10, 0x09, 0x51, 0xb0, 0xe8, 0x39, 0x85, 0xa6, 0xf8],
+    [0x10, 0x84, 0xb9, 0x23, 0xf2, 0xaa, 0xe0, 0xc3, 0xa6, 0x2f, 0x2e, 0xc8, 0x08, 0x48, 0xab, 0x77],
+    [0xaa, 0x12, 0xfe, 0xe1, 0xd5, 0xe3, 0xda, 0xb4, 0x72, 0x4f, 0x16, 0xab, 0x35, 0xf9, 0xc7, 0x99],
+    [0x81, 0xdd, 0xb8, 0x04, 0x2c, 0xf3, 0x39, 0x94, 0xf4, 0x72, 0x0e, 0x00, 0x94, 0x13, 0x7c, 0x42],
+    [0x4f, 0xaa, 0x54, 0x1d, 0x5d, 0x49, 0x8e, 0x89, 0xba, 0x0e, 0xa4, 0xc3, 0x87, 0xb2, 0x2f, 0xb4],
+    [0x72, 0x3b, 0x9a, 0xf3, 0x55, 0x44, 0x91, 0xdb, 0xb1, 0xd6, 0x63, 0x3d, 0xfc, 0x6e, 0x0c, 0x4e],
+    [0xe5, 0x3f, 0x92, 0x85, 0x9e, 0x48, 0x19, 0xa8, 0xdc, 0x06, 0x95, 0x73, 0x9f, 0xea, 0x8c, 0x65],
+    [0xb2, 0xf8, 0x58, 0xc7, 0xc9, 0xea, 0x80, 0x1d, 0x53, 0xd6, 0x03, 0x59, 0x6d, 0x65, 0x78, 0x44],
+    [0x87, 0xe7, 0x62, 0x68, 0xdb, 0xc9, 0x22, 0x72, 0x26, 0xb0, 0xca, 0x66, 0x5f, 0x64, 0xe3, 0x78],
+    [0xc1, 0x7e, 0x55, 0x05, 0xb2, 0xbd, 0x52, 0x6c, 0x29, 0x21, 0xcd, 0xec, 0x1e, 0x7e, 0x01, 0x09],
+    [0xd0, 0xa8, 0xd9, 0x57, 0x15, 0x51, 0x8e, 0xeb, 0xb5, 0x13, 0xb0, 0xf8, 0x3d, 0x9e, 0x17, 0x93],
+    [0x23, 0x41, 0x26, 0xf9, 0x3f, 0xbb, 0x66, 0x8d, 0x97, 0x51, 0x12, 0xe8, 0xfe, 0xbd, 0xf7, 0xec],
+    [0xef, 0x42, 0xf0, 0x3d, 0xb7, 0x8f, 0x70, 0x4d, 0x02, 0x3c, 0x44, 0x9f, 0x16, 0xb7, 0x09, 0x2b],
+    [0xab, 0xf7, 0x62, 0x38, 0xc2, 0x0a, 0xf1, 0x61, 0xb2, 0x31, 0x4b, 0x4d, 0x55, 0x26, 0xbc, 0xe9],
+    [0x3c, 0x2c, 0x2f, 0x11, 0xbb, 0x90, 0xcf, 0x0b, 0xe3, 0x35, 0xca, 0x9b, 0x2e, 0x91, 0xe9, 0xb7],
+    [0x2a, 0x7a, 0x68, 0x0f, 0x22, 0xa0, 0x2a, 0x92, 0xf4, 0x51, 0x49, 0xd2, 0x0f, 0xec, 0xe0, 0xef],
+    [0xc9, 0xa8, 0xd1, 0x30, 0x23, 0x1d, 0xd4, 0x3e, 0x42, 0xe6, 0x45, 0x69, 0x57, 0xf8, 0x37, 0x79],
+    [0x1d, 0x12, 0x7b, 0x84, 0x40, 0x5c, 0xea, 0xb9, 0x9f, 0xd8, 0x77, 0x5a, 0x9b, 0xe6, 0xc5, 0x59],
+    [0x9e, 0x4b, 0xf8, 0x37, 0xbc, 0xfd, 0x92, 0xca, 0xce, 0x09, 0xd2, 0x06, 0x1a, 0x84, 0xd0, 0x4a],
+    [0x39, 0x03, 0x1a, 0x96, 0x5d, 0x73, 0xb4, 0xaf, 0x5a, 0x27, 0x4d, 0x18, 0xf9, 0x73, 0xb1, 0xd2],
+    [0x7f, 0x4d, 0x0a, 0x12, 0x09, 0xd6, 0x7e, 0x4e, 0xd0, 0x6f, 0x75, 0x38, 0xe1, 0xcf, 0xad, 0x64],
+    [0xe6, 0x1e, 0xe2, 0x40, 0xfb, 0xdc, 0xce, 0x38, 0x96, 0x9f, 0x4c, 0xd2, 0x49, 0x27, 0xdd, 0x93],
+    [0x4c, 0x3b, 0xa2, 0xb3, 0x7b, 0x0f, 0xdd, 0x8c, 0xfa, 0x5e, 0x95, 0xc1, 0x89, 0xb2, 0x94, 0x14],
+    [0xe0, 0x6f, 0xd4, 0xca, 0x06, 0x6f, 0xec, 0xdd, 0x54, 0x06, 0x8a, 0x5a, 0xd8, 0x89, 0x6f, 0x86],
+    [0x5c, 0xa8, 0x4c, 0x34, 0x13, 0x9c, 0x65, 0x80, 0xa8, 0x8a, 0xf2, 0x49, 0x90, 0x72, 0x07, 0x06],
+    [0x42, 0xea, 0x96, 0x1c, 0x5b, 0x3c, 0x85, 0x8b, 0x17, 0xc3, 0xe5, 0x50, 0xdf, 0xa7, 0x90, 0x10],
+    [0x40, 0x6c, 0x44, 0xde, 0xe6, 0x78, 0x57, 0xb2, 0x94, 0x31, 0x60, 0xf3, 0x0c, 0x74, 0x17, 0xd3],
+    [0xc5, 0xf5, 0x7b, 0xae, 0x13, 0x20, 0xfc, 0xf4, 0xb4, 0xe8, 0x68, 0xe7, 0x1d, 0x56, 0xc6, 0x6b],
+    [0x04, 0xbf, 0x73, 0x7a, 0x5b, 0x67, 0x6b, 0xe7, 0xc3, 0xde, 0x05, 0x01, 0x7d, 0xf4, 0xbf, 0xf9],
+    [0x51, 0x63, 0xc9, 0xc0, 0x3f, 0x19, 0x07, 0xea, 0x10, 0x44, 0xed, 0x5c, 0x30, 0x72, 0x7b, 0x4f],
+    [0x37, 0xa1, 0x10, 0xf0, 0x02, 0x71, 0x8e, 0xda, 0xd2, 0x4b, 0x3f, 0x9e, 0xe4, 0x53, 0xf1, 0x40],
+    [0xb9, 0x87, 0x7e, 0x38, 0x1a, 0xed, 0xd3, 0xda, 0x08, 0xc3, 0x3e, 0x75, 0xff, 0x23, 0xac, 0x10],
+    [0x7c, 0x50, 0x04, 0x00, 0x5e, 0xc5, 0xda, 0x4c, 0x5a, 0xc9, 0x44, 0x0e, 0x5c, 0x72, 0x31, 0x93],
+    [0x81, 0xb8, 0x24, 0x37, 0x83, 0xdb, 0xc6, 0x46, 0xca, 0x9d, 0x0c, 0xd8, 0x2a, 0xbd, 0xb4, 0x6c],
+    [0x50, 0x57, 0x20, 0x54, 0x3e, 0xb9, 0xb4, 0x13, 0xd5, 0x0b, 0x3c, 0xfa, 0xd9, 0xee, 0xf9, 0x38],
+    [0x94, 0x5f, 0x59, 0x4d, 0xe7, 0x24, 0x11, 0xe4, 0xd3, 0x35, 0xbe, 0x87, 0x44, 0x56, 0xd8, 0xf3],
+    [0x37, 0x92, 0x3b, 0x3e, 0x37, 0x17, 0x77, 0xb2, 0x11, 0x70, 0xbf, 0x9d, 0x7e, 0x62, 0xf6, 0x02],
+    [0x3a, 0xd4, 0xe7, 0xc8, 0x57, 0x64, 0x96, 0x46, 0x11, 0xeb, 0x0a, 0x6c, 0x4d, 0x62, 0xde, 0x56],
+    [0xcd, 0x91, 0x39, 0x6c, 0x44, 0xaf, 0x4f, 0x51, 0x85, 0x57, 0x8d, 0x9d, 0xd9, 0x80, 0x3f, 0x0a],
+    [0xfe, 0x28, 0x15, 0x8e, 0x72, 0x7b, 0x86, 0x8f, 0x39, 0x03, 0xc9, 0xac, 0xda, 0x64, 0xa2, 0x58],
+    [0x40, 0xcc, 0x10, 0xb8, 0x28, 0x8c, 0xe5, 0xf0, 0xbc, 0x3a, 0xc0, 0xb6, 0x8a, 0x0e, 0xeb, 0xc8],
+    [0x6f, 0x14, 0x90, 0xf5, 0x40, 0x69, 0x9a, 0x3c, 0xd4, 0x97, 0x44, 0x20, 0xec, 0xc9, 0x27, 0x37],
+    [0xd5, 0x05, 0xf1, 0xb7, 0x5e, 0x1a, 0x84, 0xa6, 0x03, 0xc4, 0x35, 0x83, 0xb2, 0xed, 0x03, 0x08],
+    [0x49, 0x15, 0x73, 0xcf, 0xd7, 0x2b, 0xb4, 0x68, 0x2b, 0x7c, 0xa5, 0x88, 0x0e, 0x1c, 0x8d, 0x6f],
+    [0x3e, 0xd6, 0x9c, 0xfe, 0x45, 0xab, 0x40, 0x3f, 0x2f, 0xd2, 0xad, 0x95, 0x9b, 0xa2, 0x76, 0x66],
+    [0x8b, 0xe8, 0x39, 0xef, 0x1b, 0x20, 0xb5, 0x7c, 0x83, 0xba, 0x7e, 0xb6, 0xa8, 0xc2, 0x2b, 0x6a],
+    [0x14, 0x09, 0x18, 0x6a, 0xb4, 0x22, 0x31, 0xfe, 0xde, 0xe1, 0x81, 0x62, 0xcf, 0x1c, 0xb4, 0xca],
+    [0x2b, 0xf3, 0xcc, 0xc2, 0x4a, 0xb6, 0x72, 0xcf, 0x15, 0x1f, 0xb8, 0xd2, 0xf3, 0xf3, 0x06, 0x9b],
+    [0xb9, 0xb9, 0x3a, 0x28, 0x82, 0xd6, 0x02, 0x5c, 0xdb, 0x8c, 0x56, 0xfa, 0x13, 0xf7, 0x53, 0x7b],
+    [0xd9, 0x7c, 0xca, 0x36, 0x94, 0xfb, 0x20, 0x6d, 0xb8, 0xbd, 0x1f, 0x36, 0x50, 0xc3, 0x33, 0x22],
+    [0x94, 0xec, 0x2e, 0x19, 0xa4, 0x0b, 0xe4, 0x1a, 0xf3, 0x94, 0x0d, 0x6b, 0x30, 0xc4, 0x93, 0x84],
+    [0x4b, 0x41, 0x60, 0x3f, 0x20, 0x9a, 0x04, 0x5b, 0xe1, 0x40, 0xa3, 0x41, 0xa3, 0xdf, 0xfe, 0x10],
+    [0x23, 0xfb, 0xcb, 0x30, 0x9f, 0x1c, 0xf0, 0x94, 0x89, 0x07, 0x55, 0xab, 0x1b, 0x42, 0x65, 0x69],
+    [0xe7, 0xd9, 0xb6, 0x56, 0x90, 0x91, 0x8a, 0x2b, 0x23, 0x2f, 0x2f, 0x5c, 0x12, 0xc8, 0x30, 0x0e],
+    [0xad, 0xe8, 0x3c, 0xf7, 0xe7, 0xf3, 0x84, 0x7b, 0x36, 0xfa, 0x4b, 0x54, 0xb0, 0x0d, 0xce, 0x61],
+    [0x06, 0x10, 0xc5, 0xf2, 0xee, 0x57, 0x1c, 0x8a, 0xc8, 0x0c, 0xbf, 0xe5, 0x38, 0xbd, 0xf1, 0xc7],
+    [0x27, 0x1d, 0x5d, 0x00, 0xfb, 0xdb, 0x5d, 0x15, 0x5d, 0x9d, 0xce, 0xa9, 0x7c, 0xb4, 0x02, 0x18],
+    [0x4c, 0x58, 0x00, 0xe3, 0x4e, 0xfe, 0x42, 0x6f, 0x07, 0x9f, 0x6b, 0x0a, 0xa7, 0x52, 0x60, 0xad],
+];
+
+#[test]
+fn test_siphash_1_3_test_vector() {
+    let k0 = 0x_07_06_05_04_03_02_01_00;
+    let k1 = 0x_0f_0e_0d_0c_0b_0a_09_08;
+
+    let mut input: Vec<u8> = Vec::new();
+
+    for i in 0..64 {
+        let out = hash_with(SipHasher128::new_with_keys(k0, k1), &Bytes(&input[..]));
+        let expected = (
+            ((TEST_VECTOR[i][0] as u64) << 0)
+                | ((TEST_VECTOR[i][1] as u64) << 8)
+                | ((TEST_VECTOR[i][2] as u64) << 16)
+                | ((TEST_VECTOR[i][3] as u64) << 24)
+                | ((TEST_VECTOR[i][4] as u64) << 32)
+                | ((TEST_VECTOR[i][5] as u64) << 40)
+                | ((TEST_VECTOR[i][6] as u64) << 48)
+                | ((TEST_VECTOR[i][7] as u64) << 56),
+            ((TEST_VECTOR[i][8] as u64) << 0)
+                | ((TEST_VECTOR[i][9] as u64) << 8)
+                | ((TEST_VECTOR[i][10] as u64) << 16)
+                | ((TEST_VECTOR[i][11] as u64) << 24)
+                | ((TEST_VECTOR[i][12] as u64) << 32)
+                | ((TEST_VECTOR[i][13] as u64) << 40)
+                | ((TEST_VECTOR[i][14] as u64) << 48)
+                | ((TEST_VECTOR[i][15] as u64) << 56),
+        );
+
+        assert_eq!(out, expected);
+        input.push(i as u8);
+    }
+}
+
+#[test]
+#[cfg(target_arch = "arm")]
+fn test_hash_usize() {
+    let val = 0xdeadbeef_deadbeef_u64;
+    assert!(hash(&(val as u64)) != hash(&(val as usize)));
+    assert_eq!(hash(&(val as u32)), hash(&(val as usize)));
+}
+#[test]
+#[cfg(target_arch = "x86_64")]
+fn test_hash_usize() {
+    let val = 0xdeadbeef_deadbeef_u64;
+    assert_eq!(hash(&(val as u64)), hash(&(val as usize)));
+    assert!(hash(&(val as u32)) != hash(&(val as usize)));
+}
+#[test]
+#[cfg(target_arch = "x86")]
+fn test_hash_usize() {
+    let val = 0xdeadbeef_deadbeef_u64;
+    assert!(hash(&(val as u64)) != hash(&(val as usize)));
+    assert_eq!(hash(&(val as u32)), hash(&(val as usize)));
+}
+
+#[test]
+fn test_hash_idempotent() {
+    let val64 = 0xdeadbeef_deadbeef_u64;
+    assert_eq!(hash(&val64), hash(&val64));
+    let val32 = 0xdeadbeef_u32;
+    assert_eq!(hash(&val32), hash(&val32));
+}
+
+#[test]
+fn test_hash_no_bytes_dropped_64() {
+    let val = 0xdeadbeef_deadbeef_u64;
+
+    assert!(hash(&val) != hash(&zero_byte(val, 0)));
+    assert!(hash(&val) != hash(&zero_byte(val, 1)));
+    assert!(hash(&val) != hash(&zero_byte(val, 2)));
+    assert!(hash(&val) != hash(&zero_byte(val, 3)));
+    assert!(hash(&val) != hash(&zero_byte(val, 4)));
+    assert!(hash(&val) != hash(&zero_byte(val, 5)));
+    assert!(hash(&val) != hash(&zero_byte(val, 6)));
+    assert!(hash(&val) != hash(&zero_byte(val, 7)));
+
+    fn zero_byte(val: u64, byte: usize) -> u64 {
+        assert!(byte < 8);
+        val & !(0xff << (byte * 8))
+    }
+}
+
+#[test]
+fn test_hash_no_bytes_dropped_32() {
+    let val = 0xdeadbeef_u32;
+
+    assert!(hash(&val) != hash(&zero_byte(val, 0)));
+    assert!(hash(&val) != hash(&zero_byte(val, 1)));
+    assert!(hash(&val) != hash(&zero_byte(val, 2)));
+    assert!(hash(&val) != hash(&zero_byte(val, 3)));
+
+    fn zero_byte(val: u32, byte: usize) -> u32 {
+        assert!(byte < 4);
+        val & !(0xff << (byte * 8))
+    }
+}
+
+#[test]
+fn test_hash_no_concat_alias() {
+    let s = ("aa", "bb");
+    let t = ("aabb", "");
+    let u = ("a", "abb");
+
+    assert!(s != t && t != u);
+    assert!(hash(&s) != hash(&t) && hash(&s) != hash(&u));
+
+    let u = [1, 0, 0, 0];
+    let v = (&u[..1], &u[1..3], &u[3..]);
+    let w = (&u[..], &u[4..4], &u[4..4]);
+
+    assert!(v != w);
+    assert!(hash(&v) != hash(&w));
+}
+
+#[test]
+fn test_short_write_works() {
+    let test_u8 = 0xFF_u8;
+    let test_u16 = 0x1122_u16;
+    let test_u32 = 0x22334455_u32;
+    let test_u64 = 0x33445566_778899AA_u64;
+    let test_u128 = 0x11223344_55667788_99AABBCC_DDEEFF77_u128;
+    let test_usize = 0xD0C0B0A0_usize;
+
+    let test_i8 = -1_i8;
+    let test_i16 = -2_i16;
+    let test_i32 = -3_i32;
+    let test_i64 = -4_i64;
+    let test_i128 = -5_i128;
+    let test_isize = -6_isize;
+
+    let mut h1 = SipHasher128::new_with_keys(0, 0);
+    h1.write(b"bytes");
+    h1.write(b"string");
+    h1.write_u8(test_u8);
+    h1.write_u16(test_u16);
+    h1.write_u32(test_u32);
+    h1.write_u64(test_u64);
+    h1.write_u128(test_u128);
+    h1.write_usize(test_usize);
+    h1.write_i8(test_i8);
+    h1.write_i16(test_i16);
+    h1.write_i32(test_i32);
+    h1.write_i64(test_i64);
+    h1.write_i128(test_i128);
+    h1.write_isize(test_isize);
+
+    let mut h2 = SipHasher128::new_with_keys(0, 0);
+    h2.write(b"bytes");
+    h2.write(b"string");
+    h2.write(&test_u8.to_ne_bytes());
+    h2.write(&test_u16.to_ne_bytes());
+    h2.write(&test_u32.to_ne_bytes());
+    h2.write(&test_u64.to_ne_bytes());
+    h2.write(&test_u128.to_ne_bytes());
+    h2.write(&test_usize.to_ne_bytes());
+    h2.write(&test_i8.to_ne_bytes());
+    h2.write(&test_i16.to_ne_bytes());
+    h2.write(&test_i32.to_ne_bytes());
+    h2.write(&test_i64.to_ne_bytes());
+    h2.write(&test_i128.to_ne_bytes());
+    h2.write(&test_isize.to_ne_bytes());
+
+    let h1_hash = h1.finish128();
+    let h2_hash = h2.finish128();
+
+    assert_eq!(h1_hash, h2_hash);
+}
+
+macro_rules! test_fill_buffer {
+    ($type:ty, $write_method:ident) => {{
+        // Test filling and overfilling the buffer from all possible offsets
+        // for a given integer type and its corresponding write method.
+        const SIZE: usize = std::mem::size_of::<$type>();
+        let input = [42; BUFFER_SIZE];
+        let x = 0x01234567_89ABCDEF_76543210_FEDCBA98_u128 as $type;
+        let x_bytes = &x.to_ne_bytes();
+
+        for i in 1..=SIZE {
+            let s = &input[..BUFFER_SIZE - i];
+
+            let mut h1 = SipHasher128::new_with_keys(7, 13);
+            h1.write(s);
+            h1.$write_method(x);
+
+            let mut h2 = SipHasher128::new_with_keys(7, 13);
+            h2.write(s);
+            h2.write(x_bytes);
+
+            let h1_hash = h1.finish128();
+            let h2_hash = h2.finish128();
+
+            assert_eq!(h1_hash, h2_hash);
+        }
+    }};
+}
+
+#[test]
+fn test_fill_buffer() {
+    test_fill_buffer!(u8, write_u8);
+    test_fill_buffer!(u16, write_u16);
+    test_fill_buffer!(u32, write_u32);
+    test_fill_buffer!(u64, write_u64);
+    test_fill_buffer!(u128, write_u128);
+    test_fill_buffer!(usize, write_usize);
+
+    test_fill_buffer!(i8, write_i8);
+    test_fill_buffer!(i16, write_i16);
+    test_fill_buffer!(i32, write_i32);
+    test_fill_buffer!(i64, write_i64);
+    test_fill_buffer!(i128, write_i128);
+    test_fill_buffer!(isize, write_isize);
+}
diff --git a/src/stable_hasher.rs b/src/stable_hasher.rs
new file mode 100644
index 0000000..200d313
--- /dev/null
+++ b/src/stable_hasher.rs
@@ -0,0 +1,170 @@
+//! Stable hasher adapted for cross-platform independent hash.
+
+use crate::sip128::SipHasher128;
+
+use std::fmt;
+use std::hash::Hasher;
+
+#[cfg(test)]
+mod tests;
+
+/// Trait for retrieving the result of the stable hashing operation.
+pub trait StableHasherResult: Sized {
+    fn finish(hasher: StableHasher) -> Self;
+}
+
+/// When hashing something that ends up affecting properties like symbol names,
+/// we want these symbol names to be calculated independently of other factors
+/// like what architecture you're compiling *from*.
+///
+/// To that end we always convert integers to little-endian format before
+/// hashing and the architecture dependent `isize` and `usize` types are
+/// extended to 64 bits if needed.
+pub struct StableHasher {
+    state: SipHasher128,
+}
+
+impl fmt::Debug for StableHasher {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{:?}", self.state)
+    }
+}
+
+impl StableHasher {
+    #[inline]
+    pub fn new() -> Self {
+        StableHasher {
+            state: SipHasher128::new_with_keys(0, 0),
+        }
+    }
+
+    #[inline]
+    pub fn finish<W: StableHasherResult>(self) -> W {
+        W::finish(self)
+    }
+
+    #[inline]
+    pub fn finalize(self) -> (u64, u64) {
+        self.state.finish128()
+    }
+}
+
+impl Hasher for StableHasher {
+    fn finish(&self) -> u64 {
+        panic!("use StableHasher::finalize instead");
+    }
+
+    #[inline]
+    fn write(&mut self, bytes: &[u8]) {
+        self.state.write(bytes);
+    }
+
+    #[cfg(feature = "nightly")]
+    #[inline]
+    fn write_str(&mut self, s: &str) {
+        self.state.write_str(s);
+    }
+
+    #[cfg(feature = "nightly")]
+    #[inline]
+    fn write_length_prefix(&mut self, len: usize) {
+        // Our impl for `usize` will extend it if needed.
+        self.write_usize(len);
+    }
+
+    #[inline]
+    fn write_u8(&mut self, i: u8) {
+        self.state.write_u8(i);
+    }
+
+    #[inline]
+    fn write_u16(&mut self, i: u16) {
+        self.state.short_write(i.to_le_bytes());
+    }
+
+    #[inline]
+    fn write_u32(&mut self, i: u32) {
+        self.state.short_write(i.to_le_bytes());
+    }
+
+    #[inline]
+    fn write_u64(&mut self, i: u64) {
+        self.state.short_write(i.to_le_bytes());
+    }
+
+    #[inline]
+    fn write_u128(&mut self, i: u128) {
+        self.write_u64(i as u64);
+        self.write_u64((i >> 64) as u64);
+    }
+
+    #[inline]
+    fn write_usize(&mut self, i: usize) {
+        // Always treat usize as u64 so we get the same results on 32 and 64 bit
+        // platforms. This is important for symbol hashes when cross compiling,
+        // for example.
+        self.state.short_write((i as u64).to_le_bytes());
+    }
+
+    #[inline]
+    fn write_i8(&mut self, i: i8) {
+        self.state.write_i8(i);
+    }
+
+    #[inline]
+    fn write_i16(&mut self, i: i16) {
+        self.state.short_write((i as u16).to_le_bytes());
+    }
+
+    #[inline]
+    fn write_i32(&mut self, i: i32) {
+        self.state.short_write((i as u32).to_le_bytes());
+    }
+
+    #[inline]
+    fn write_i64(&mut self, i: i64) {
+        self.state.short_write((i as u64).to_le_bytes());
+    }
+
+    #[inline]
+    fn write_i128(&mut self, i: i128) {
+        self.state.write(&(i as u128).to_le_bytes());
+    }
+
+    #[inline]
+    fn write_isize(&mut self, i: isize) {
+        // Always treat isize as a 64-bit number so we get the same results on 32 and 64 bit
+        // platforms. This is important for symbol hashes when cross compiling,
+        // for example. Sign extending here is preferable as it means that the
+        // same negative number hashes the same on both 32 and 64 bit platforms.
+        let value = i as u64;
+
+        // Cold path
+        #[cold]
+        #[inline(never)]
+        fn hash_value(state: &mut SipHasher128, value: u64) {
+            state.write_u8(0xFF);
+            state.short_write(value.to_le_bytes());
+        }
+
+        // `isize` values often seem to have a small (positive) numeric value in practice.
+        // To exploit this, if the value is small, we will hash a smaller amount of bytes.
+        // However, we cannot just skip the leading zero bytes, as that would produce the same hash
+        // e.g. if you hash two values that have the same bit pattern when they are swapped.
+        // See https://github.com/rust-lang/rust/pull/93014 for context.
+        //
+        // Therefore, we employ the following strategy:
+        // 1) When we encounter a value that fits within a single byte (the most common case), we
+        // hash just that byte. This is the most common case that is being optimized. However, we do
+        // not do this for the value 0xFF, as that is a reserved prefix (a bit like in UTF-8).
+        // 2) When we encounter a larger value, we hash a "marker" 0xFF and then the corresponding
+        // 8 bytes. Since this prefix cannot occur when we hash a single byte, when we hash two
+        // `isize`s that fit within a different amount of bytes, they should always produce a different
+        // byte stream for the hasher.
+        if value < 0xFF {
+            self.state.write_u8(value as u8);
+        } else {
+            hash_value(&mut self.state, value);
+        }
+    }
+}
diff --git a/src/stable_hasher/tests.rs b/src/stable_hasher/tests.rs
new file mode 100644
index 0000000..1aaec46
--- /dev/null
+++ b/src/stable_hasher/tests.rs
@@ -0,0 +1,102 @@
+use std::hash::Hash;
+
+use super::*;
+
+// The tests below compare the computed hashes to particular expected values
+// in order to test that we produce the same results on different platforms,
+// regardless of endianness and `usize` and `isize` size differences (this
+// of course assumes we run these tests on platforms that differ in those
+// ways). The expected values depend on the hashing algorithm used, so they
+// need to be updated whenever StableHasher changes its hashing algorithm.
+
+#[test]
+fn test_hash_integers() {
+    // Test that integers are handled consistently across platforms.
+    let test_u8 = 0xAB_u8;
+    let test_u16 = 0xFFEE_u16;
+    let test_u32 = 0x445577AA_u32;
+    let test_u64 = 0x01234567_13243546_u64;
+    let test_u128 = 0x22114433_66557788_99AACCBB_EEDDFF77_u128;
+    let test_usize = 0xD0C0B0A0_usize;
+
+    let test_i8 = -100_i8;
+    let test_i16 = -200_i16;
+    let test_i32 = -300_i32;
+    let test_i64 = -400_i64;
+    let test_i128 = -500_i128;
+    let test_isize = -600_isize;
+
+    let mut h = StableHasher::new();
+    test_u8.hash(&mut h);
+    test_u16.hash(&mut h);
+    test_u32.hash(&mut h);
+    test_u64.hash(&mut h);
+    test_u128.hash(&mut h);
+    test_usize.hash(&mut h);
+    test_i8.hash(&mut h);
+    test_i16.hash(&mut h);
+    test_i32.hash(&mut h);
+    test_i64.hash(&mut h);
+    test_i128.hash(&mut h);
+    test_isize.hash(&mut h);
+
+    // This depends on the hashing algorithm. See note at top of file.
+    let expected = (13997337031081104755, 6178945012502239489);
+
+    assert_eq!(h.finalize(), expected);
+}
+
+#[test]
+fn test_hash_usize() {
+    // Test that usize specifically is handled consistently across platforms.
+    let test_usize = 0xABCDEF01_usize;
+
+    let mut h = StableHasher::new();
+    test_usize.hash(&mut h);
+
+    // This depends on the hashing algorithm. See note at top of file.
+    let expected = (12037165114281468837, 3094087741167521712);
+
+    assert_eq!(h.finalize(), expected);
+}
+
+#[test]
+fn test_hash_isize() {
+    // Test that isize specifically is handled consistently across platforms.
+    let test_isize = -7_isize;
+
+    let mut h = StableHasher::new();
+    test_isize.hash(&mut h);
+
+    // This depends on the hashing algorithm. See note at top of file.
+    let expected = (3979067582695659080, 2322428596355037273);
+
+    assert_eq!(h.finalize(), expected);
+}
+
+fn hash<T: Hash>(t: &T) -> (u64, u64) {
+    let mut h = StableHasher::new();
+    t.hash(&mut h);
+    h.finalize()
+}
+
+// Check that the `isize` hashing optimization does not produce the same hash when permuting two
+// values.
+#[test]
+fn test_isize_compression() {
+    fn check_hash(a: u64, b: u64) {
+        let hash_a = hash(&(a as isize, b as isize));
+        let hash_b = hash(&(b as isize, a as isize));
+        assert_ne!(
+            hash_a, hash_b,
+            "The hash stayed the same when permuting values `{a}` and `{b}`!",
+        );
+    }
+
+    check_hash(0xAA, 0xAAAA);
+    check_hash(0xFF, 0xFFFF);
+    check_hash(0xAAAA, 0xAAAAAA);
+    check_hash(0xAAAAAA, 0xAAAAAAAA);
+    check_hash(0xFF, 0xFFFFFFFFFFFFFFFF);
+    check_hash(u64::MAX /* -1 */, 1);
+}