Skip to content

Commit

Permalink
Merge pull request #128 from meilisearch/a-new-era
Browse files Browse the repository at this point in the history
Rework heed to be lightweight and simpler to maintain
  • Loading branch information
Kerollmops committed Jan 11, 2023
2 parents da4b8f4 + abdb334 commit 50f4b89
Show file tree
Hide file tree
Showing 56 changed files with 3,191 additions and 1,688 deletions.
28 changes: 27 additions & 1 deletion .github/workflows/test.yml → .github/workflows/rust.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
on: [pull_request]

name: Cargo test
name: Rust
jobs:
check:
name: Test the heed project
Expand All @@ -16,6 +16,8 @@ jobs:

steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- uses: actions-rs/toolchain@v1
with:
profile: minimal
Expand All @@ -26,6 +28,30 @@ jobs:
cargo clean
cargo test
examples:
name: Run the heed examples
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
include:
- os: ubuntu-latest
- os: macos-latest

steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Run the examples
run: |
cargo clean
cargo run --example 2>&1 | grep -E '^ ' | xargs -n1 cargo run --example
fmt:
name: Ensure the heed project is formatted
runs-on: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
heed/target
**/*.rs.bk
Cargo.lock
/*.mdb
*.mdb
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[submodule "lmdb-master-sys/lmdb"]
path = lmdb-master-sys/lmdb
url = https://github.com/LMDB/lmdb
branch = mdb.master
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[workspace]
members = ["heed", "heed-traits", "heed-types"]
members = ["lmdb-master-sys", "heed", "heed-traits", "heed-types"]
40 changes: 13 additions & 27 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# heed
A fully typed [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database) wrapper with minimum overhead, uses zerocopy internally.
A fully typed [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database) wrapper with minimum overhead, uses bytemuck internally.

[![License](https://img.shields.io/badge/license-MIT-green)](#LICENSE)
[![Crates.io](https://img.shields.io/crates/v/heed)](https://crates.io/crates/heed)
Expand All @@ -11,36 +11,22 @@ A fully typed [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Datab

This library is able to serialize all kind of types, not just bytes slices, even _Serde_ types are supported.

## Example Usage
Go check out [the examples](heed/examples/).

```rust
fs::create_dir_all("target/heed.mdb")?;
let env = EnvOpenOptions::new().open("target/heed.mdb")?;
## Building from Source

// We open the default unamed database.
// Specifying the type of the newly created database.
// Here we specify that the key is an str and the value a simple integer.
let db: Database<Str, OwnedType<i32>> = env.create_database(None)?;
### Using the system LMDB if available

// We then open a write transaction and start writing into the database.
// All of those puts are type checked at compile time,
// therefore you cannot write an integer instead of a string.
let mut wtxn = env.write_txn()?;
db.put(&mut wtxn, "seven", &7)?;
db.put(&mut wtxn, "zero", &0)?;
db.put(&mut wtxn, "five", &5)?;
db.put(&mut wtxn, "three", &3)?;
wtxn.commit()?;
If you don't already have clone the repository you can use this command:

// We open a read transaction to check if those values are available.
// When we read we also type check at compile time.
let rtxn = env.read_txn()?;
```bash
git clone --recursive https://github.com/meilisearch/heed.git
cd heed
cargo build
```

let ret = db.get(&rtxn, "zero")?;
assert_eq!(ret, Some(0));
However, if you already cloned it and forgot about the initialising the submodules:

let ret = db.get(&rtxn, "five")?;
assert_eq!(ret, Some(5));
```bash
git submodule update --init
```

You want to see more about all the possibilities? Go check out [the examples](heed/examples/).
4 changes: 2 additions & 2 deletions heed-traits/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
[package]
name = "heed-traits"
version = "0.7.0"
version = "0.20.0-alpha.0"
authors = ["Kerollmops <renault.cle@gmail.com>"]
description = "The traits used inside of the fully typed LMDB wrapper, heed"
license = "MIT"
repository = "https://github.com/Kerollmops/heed"
readme = "../README.md"
edition = "2018"
edition = "2021"

[dependencies]
10 changes: 8 additions & 2 deletions heed-traits/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
use std::borrow::Cow;
use std::error::Error as StdError;

/// A boxed `Send + Sync + 'static` error.
pub type BoxedError = Box<dyn StdError + Send + Sync + 'static>;

/// A trait that represents an encoding structure.
pub trait BytesEncode<'a> {
type EItem: ?Sized + 'a;

fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>>;
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError>;
}

/// A trait that represents a decoding structure.
pub trait BytesDecode<'a> {
type DItem: 'a;

fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem>;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError>;
}
18 changes: 11 additions & 7 deletions heed-types/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
[package]
name = "heed-types"
version = "0.7.2"
version = "0.20.0-alpha.0"
authors = ["Kerollmops <renault.cle@gmail.com>"]
description = "The types used with the fully typed LMDB wrapper, heed"
license = "MIT"
repository = "https://github.com/Kerollmops/heed"
readme = "../README.md"
edition = "2018"
edition = "2021"

[dependencies]
bincode = { version = "1.2.1", optional = true }
heed-traits = { version = "0.7.0", path = "../heed-traits" }
serde = { version = "1.0.117", optional = true }
serde_json = { version = "1.0.59", optional = true }
zerocopy = "0.3.0"
bincode = { version = "1.3.3", optional = true }
bytemuck = { version = "1.12.3", features = ["extern_crate_alloc", "extern_crate_std"] }
byteorder = "1.4.3"
heed-traits = { version = "0.20.0-alpha.0", path = "../heed-traits" }
serde = { version = "1.0.151", optional = true }
serde_json = { version = "1.0.91", optional = true }

[dev-dependencies]
rand = "0.8.5"

[features]
default = ["serde-bincode", "serde-json"]
Expand Down
50 changes: 11 additions & 39 deletions heed-types/src/cow_slice.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
use std::borrow::Cow;
use std::{mem, ptr};

use heed_traits::{BytesDecode, BytesEncode};
use zerocopy::{AsBytes, FromBytes, LayoutVerified};

use crate::aligned_to;
use bytemuck::{pod_collect_to_vec, try_cast_slice, AnyBitPattern, NoUninit, PodCastError};
use heed_traits::{BoxedError, BytesDecode, BytesEncode};

/// Describes a slice that must be [memory aligned] and
/// will be reallocated if it is not.
Expand All @@ -23,47 +20,22 @@ use crate::aligned_to;
/// [`OwnedSlice`]: crate::OwnedSlice
pub struct CowSlice<T>(std::marker::PhantomData<T>);

impl<'a, T: 'a> BytesEncode<'a> for CowSlice<T>
where
T: AsBytes,
{
impl<'a, T: NoUninit> BytesEncode<'a> for CowSlice<T> {
type EItem = [T];

fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<[u8]>> {
Some(Cow::Borrowed(<[T] as AsBytes>::as_bytes(item)))
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
try_cast_slice(item).map(Cow::Borrowed).map_err(Into::into)
}
}

impl<'a, T: 'a> BytesDecode<'a> for CowSlice<T>
where
T: FromBytes + Copy,
{
impl<'a, T: AnyBitPattern + NoUninit> BytesDecode<'a> for CowSlice<T> {
type DItem = Cow<'a, [T]>;

fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
match LayoutVerified::<_, [T]>::new_slice(bytes) {
Some(layout) => Some(Cow::Borrowed(layout.into_slice())),
None => {
let len = bytes.len();
let elem_size = mem::size_of::<T>();

// ensure that it is the alignment that is wrong
// and the length is valid
if len % elem_size == 0 && !aligned_to(bytes, mem::align_of::<T>()) {
let elems = len / elem_size;
let mut vec = Vec::<T>::with_capacity(elems);

unsafe {
let dst = vec.as_mut_ptr() as *mut u8;
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, len);
vec.set_len(elems);
}

return Some(Cow::Owned(vec));
}

None
}
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
match try_cast_slice(bytes) {
Ok(items) => Ok(Cow::Borrowed(items)),
Err(PodCastError::AlignmentMismatch) => Ok(Cow::Owned(pod_collect_to_vec(bytes))),
Err(error) => Err(error.into()),
}
}
}
Expand Down
49 changes: 14 additions & 35 deletions heed-types/src/cow_type.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
use std::borrow::Cow;
use std::{mem, ptr};

use heed_traits::{BytesDecode, BytesEncode};
use zerocopy::{AsBytes, FromBytes, LayoutVerified};

use crate::aligned_to;
use bytemuck::{bytes_of, bytes_of_mut, try_from_bytes, AnyBitPattern, NoUninit, PodCastError};
use heed_traits::{BoxedError, BytesDecode, BytesEncode};

/// Describes a type that must be [memory aligned] and
/// will be reallocated if it is not.
Expand All @@ -29,44 +26,26 @@ use crate::aligned_to;
/// [`CowSlice`]: crate::CowSlice
pub struct CowType<T>(std::marker::PhantomData<T>);

impl<'a, T: 'a> BytesEncode<'a> for CowType<T>
where
T: AsBytes,
{
impl<'a, T: NoUninit> BytesEncode<'a> for CowType<T> {
type EItem = T;

fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<[u8]>> {
Some(Cow::Borrowed(<T as AsBytes>::as_bytes(item)))
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::Borrowed(bytes_of(item)))
}
}

impl<'a, T: 'a> BytesDecode<'a> for CowType<T>
where
T: FromBytes + Copy,
{
impl<'a, T: AnyBitPattern + NoUninit> BytesDecode<'a> for CowType<T> {
type DItem = Cow<'a, T>;

fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
match LayoutVerified::<_, T>::new(bytes) {
Some(layout) => Some(Cow::Borrowed(layout.into_ref())),
None => {
let len = bytes.len();
let elem_size = mem::size_of::<T>();

// ensure that it is the alignment that is wrong
// and the length is valid
if len == elem_size && !aligned_to(bytes, mem::align_of::<T>()) {
let mut data = mem::MaybeUninit::<T>::uninit();

unsafe {
let dst = data.as_mut_ptr() as *mut u8;
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, len);
return Some(Cow::Owned(data.assume_init()));
}
}

None
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
match try_from_bytes(bytes) {
Ok(item) => Ok(Cow::Borrowed(item)),
Err(PodCastError::TargetAlignmentGreaterAndInputNotAligned) => {
let mut item = T::zeroed();
bytes_of_mut(&mut item).copy_from_slice(bytes);
Ok(Cow::Owned(item))
}
Err(error) => Err(error.into()),
}
}
}
Expand Down

0 comments on commit 50f4b89

Please sign in to comment.