From 6e96e8d73f2bedd1a8e8cfcdc7f79e656f7cfa4a Mon Sep 17 00:00:00 2001 From: Aaron Kelbsch Date: Fri, 22 Sep 2023 12:24:44 +0200 Subject: [PATCH 1/3] change: Reference, ExternalFun, InternalFun are now boxed in term --- src/convert.rs | 23 ++++++++++++++++++++--- src/lib.rs | 12 ++++++------ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/convert.rs b/src/convert.rs index f182bb5..0fcffac 100644 --- a/src/convert.rs +++ b/src/convert.rs @@ -56,15 +56,32 @@ macro_rules! impl_term_try_into { } }; } +macro_rules! impl_term_try_into_boxed { + ($to:ident) => { + impl TryInto<$to> for Term { + type Error = Self; + + fn try_into(self) -> Result<$to, Self> + where + Self: Sized, + { + match self { + Term::$to(x) => Ok(*x), + _ => Err(self), + } + } + } + }; +} impl_term_try_into!(Atom); impl_term_try_into!(FixInteger); impl_term_try_into!(BigInteger); impl_term_try_into!(Float); impl_term_try_into!(Pid); impl_term_try_into!(Port); -impl_term_try_into!(Reference); -impl_term_try_into!(ExternalFun); -impl_term_try_into!(InternalFun); +impl_term_try_into_boxed!(Reference); +impl_term_try_into_boxed!(ExternalFun); +impl_term_try_into_boxed!(InternalFun); impl_term_try_into!(Binary); impl_term_try_into!(BitBinary); impl_term_try_into!(List); diff --git a/src/lib.rs b/src/lib.rs index 422676e..7daac4a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -50,9 +50,9 @@ pub enum Term { Float(Float), Pid(Pid), Port(Port), - Reference(Reference), - ExternalFun(ExternalFun), - InternalFun(InternalFun), + Reference(Box), + ExternalFun(Box), + InternalFun(Box), Binary(Binary), BitBinary(BitBinary), List(List), @@ -131,17 +131,17 @@ impl From for Term { } impl From for Term { fn from(x: Reference) -> Self { - Term::Reference(x) + Term::Reference(Box::new(x)) } } impl From for Term { fn from(x: ExternalFun) -> Self { - Term::ExternalFun(x) + Term::ExternalFun(Box::new(x)) } } impl From for Term { fn from(x: InternalFun) -> Self { - Term::InternalFun(x) + Term::InternalFun(Box::new(x)) } } impl From for Term { From 298e8de0b1a7579ff946854e950dcfec70204e12 Mon Sep 17 00:00:00 2001 From: Aaron Kelbsch Date: Fri, 22 Sep 2023 12:33:39 +0200 Subject: [PATCH 2/3] change: Map now uses HashMap internally --- src/codec.rs | 10 +++++----- src/lib.rs | 28 ++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/codec.rs b/src/codec.rs index 1d460fb..6469dd9 100644 --- a/src/codec.rs +++ b/src/codec.rs @@ -202,13 +202,13 @@ impl Decoder { } fn decode_map_ext(&mut self) -> DecodeResult { let count = self.reader.read_u32::()? as usize; - let mut entries = Vec::with_capacity(count); + let mut map = HashMap::::new(); for _ in 0..count { let k = self.decode_term()?; let v = self.decode_term()?; - entries.push((k, v)); + map.insert(k, v); } - Ok(Term::from(Map::from(entries))) + Ok(Term::from(Map::from(map))) } fn decode_binary_ext(&mut self) -> DecodeResult { let size = self.reader.read_u32::()? as usize; @@ -525,8 +525,8 @@ impl Encoder { } fn encode_map(&mut self, x: &Map) -> EncodeResult { self.writer.write_u8(MAP_EXT)?; - self.writer.write_u32::(x.entries.len() as u32)?; - for &(ref k, ref v) in &x.entries { + self.writer.write_u32::(x.map.len() as u32)?; + for (k, v) in x.map.iter() { self.encode_term(k)?; self.encode_term(v)?; } diff --git a/src/lib.rs b/src/lib.rs index 7daac4a..14e49ad 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,7 +29,9 @@ //! - [Erlang External Term Format](http://erlang.org/doc/apps/erts/erl_ext_dist.html) //! use num::bigint::BigInt; +use std::collections::HashMap; use std::fmt; +use std::hash::Hash; use std::io; mod codec; @@ -719,14 +721,14 @@ impl From> for Tuple { } /// Map. -#[derive(Debug, PartialEq, Eq, Hash, Clone)] +#[derive(Debug, PartialEq, Eq, Clone)] pub struct Map { - pub entries: Vec<(Term, Term)>, + pub map: HashMap, } impl fmt::Display for Map { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "#{{")?; - for (i, &(ref k, ref v)) in self.entries.iter().enumerate() { + for (i, (k, v)) in self.map.iter().enumerate() { if i != 0 { write!(f, ",")?; } @@ -736,6 +738,24 @@ impl fmt::Display for Map { Ok(()) } } +impl Hash for Map { + fn hash(&self, state: &mut H) { + for (k, v) in self.map.iter() { + k.hash(state); + v.hash(state); + } + } +} +impl From<[(Term,Term); N]> for Map{ + fn from(from: [(Term,Term); N]) -> Self { + Map{ map : HashMap::from(from) } + } +} +impl From> for Map{ + fn from(from_map: HashMap) -> Self { + Map{ map :from_map } + } +} impl From> for Map { fn from(entries: Vec<(Term, Term)>) -> Self { Map { entries } From c617f4acb14d25253fc72075bffcafb7b08192fb Mon Sep 17 00:00:00 2001 From: Aaron Kelbsch Date: Fri, 22 Sep 2023 12:34:54 +0200 Subject: [PATCH 3/3] change: added ByteList as an optimized list for STRING_EXT --- src/codec.rs | 20 +++++++----- src/convert.rs | 2 ++ src/lib.rs | 77 +++++++++++++++++++++++++++++++++++++++++++-- tests/lib.rs | 85 +++++++++++++++++++++++++++++++++++++------------- 4 files changed, 151 insertions(+), 33 deletions(-) diff --git a/src/codec.rs b/src/codec.rs index 6469dd9..e0a59bf 100644 --- a/src/codec.rs +++ b/src/codec.rs @@ -162,14 +162,10 @@ impl Decoder { Ok(Term::from(List::nil())) } fn decode_string_ext(&mut self) -> DecodeResult { - let size = self.reader.read_u16::()? as usize; - let mut elements = Vec::with_capacity(size); - for _ in 0..size { - elements.push(Term::from(FixInteger::from(i32::from( - self.reader.read_u8()?, - )))); - } - Ok(Term::from(List::from(elements))) + let size = self.reader.read_u16::()? as usize; + let mut bytes = vec![0; size]; + self.reader.read_exact(&mut bytes)?; + Ok(Term::from(ByteList::from(bytes))) } fn decode_list_ext(&mut self) -> DecodeResult { let count = self.reader.read_u32::()? as usize; @@ -465,6 +461,7 @@ impl Encoder { Term::ImproperList(ref x) => self.encode_improper_list(x), Term::Tuple(ref x) => self.encode_tuple(x), Term::Map(ref x) => self.encode_map(x), + Term::ByteList(ref x) => self.encode_byte_list(x.bytes.as_slice()) } } fn encode_nil(&mut self) -> EncodeResult { @@ -532,6 +529,13 @@ impl Encoder { } Ok(()) } + fn encode_byte_list(&mut self, x: &[u8]) -> EncodeResult{ + self.writer.write_u8(STRING_EXT)?; + self.writer.write_u16::(x.len() as u16)?; + self.writer.write_all(x)?; + + Ok(()) + } fn encode_binary(&mut self, x: &Binary) -> EncodeResult { self.writer.write_u8(BINARY_EXT)?; self.writer.write_u32::(x.bytes.len() as u32)?; diff --git a/src/convert.rs b/src/convert.rs index 0fcffac..e1b15e3 100644 --- a/src/convert.rs +++ b/src/convert.rs @@ -38,6 +38,7 @@ impl_term_try_as_ref!(List); impl_term_try_as_ref!(ImproperList); impl_term_try_as_ref!(Tuple); impl_term_try_as_ref!(Map); +impl_term_try_as_ref!(ByteList); macro_rules! impl_term_try_into { ($to:ident) => { @@ -88,6 +89,7 @@ impl_term_try_into!(List); impl_term_try_into!(ImproperList); impl_term_try_into!(Tuple); impl_term_try_into!(Map); +impl_term_try_into!(ByteList); pub trait AsOption { fn as_option(&self) -> Option<&Self>; diff --git a/src/lib.rs b/src/lib.rs index 14e49ad..4c44f66 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -57,6 +57,7 @@ pub enum Term { InternalFun(Box), Binary(Binary), BitBinary(BitBinary), + ByteList(ByteList), List(List), ImproperList(ImproperList), Tuple(Tuple), @@ -94,6 +95,7 @@ impl fmt::Display for Term { Term::InternalFun(ref x) => x.fmt(f), Term::Binary(ref x) => x.fmt(f), Term::BitBinary(ref x) => x.fmt(f), + Term::ByteList(ref x) => x.fmt(f), Term::List(ref x) => x.fmt(f), Term::ImproperList(ref x) => x.fmt(f), Term::Tuple(ref x) => x.fmt(f), @@ -156,6 +158,16 @@ impl From for Term { Term::BitBinary(x) } } +impl From for Term { + fn from(x: ByteList) -> Self { + Term::ByteList(x) + } +} +impl From for Term { + fn from(x: String) -> Self { + Term::ByteList(ByteList { bytes: x.into_bytes() }) + } +} impl From for Term { fn from(x: List) -> Self { Term::List(x) @@ -623,6 +635,55 @@ impl From<(Vec, u8)> for BitBinary { } } + +/// Erlang has a transport optimization for lists only containing u8 elements. \ +/// Since Strings in erlang are just lists with u8's they call this "STRING_EXT". +/// +/// This type does not exist in erlang and is to be seen as a subtype of List. +/// +/// See: https://erlang.org/doc/apps/erts/erl_ext_dist.html#STRING_EXT +/// +#[derive(Debug, PartialEq, Eq, Hash, Clone)] +pub struct ByteList { + pub bytes: Vec, +} +impl fmt::Display for ByteList { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "[")?; + for (i, b) in self.bytes.iter().enumerate() { + if i != 0 { + write!(f, ",")?; + } + write!(f, "{}", b)?; + } + write!(f, "]")?; + Ok(()) + } +} +impl From for ByteList { + fn from(string: String) -> Self { + ByteList { bytes : string.into_bytes()} + } +} +impl From<&str> for ByteList { + fn from(string: &str) -> Self { + ByteList { bytes :string.into() } + } +} +impl From> for ByteList { + fn from(bytes: Vec) -> Self { + ByteList { bytes } + } +} +impl From<&[u8;N]> for ByteList { + fn from(bytes: &[u8;N]) -> Self { + ByteList { + bytes: Vec::from(bytes.as_slice()), + } + } +} + + /// List. #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub struct List { @@ -659,6 +720,12 @@ impl From> for List { List { elements } } } +impl From for List { + fn from(byte_list: ByteList) -> Self { + let elements = byte_list.bytes.into_iter().map(|value|Term::FixInteger(FixInteger { value: value as i32 })).collect(); + List { elements } + } +} /// Improper list. #[derive(Debug, PartialEq, Eq, Hash, Clone)] @@ -756,9 +823,13 @@ impl From> for Map{ Map{ map :from_map } } } -impl From> for Map { - fn from(entries: Vec<(Term, Term)>) -> Self { - Map { entries } +impl From> for Map{ + fn from(from_map: HashMap) -> Self { + let mut result_map = HashMap::::new(); + for (k,v) in from_map { + result_map.insert(Term::from(k),v); + } + Map{ map : result_map } } } diff --git a/tests/lib.rs b/tests/lib.rs index f2ee51d..12b5244 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -327,6 +327,52 @@ fn bit_binary_test() { ); } + + +#[test] +fn byte_list_test(){ + + // Display + assert_eq!( + ByteList::from(vec![1,2]) + .to_string(), + "[1,2]", + ); + + // 01 - Simple byte list + + let byte_list = ByteList::from(&[1,2]); + let bytes = vec![131, 107, 0, 2, 1, 2]; + + // Decode + assert_eq!( + decode(bytes.as_slice()).try_into(), + Ok(byte_list.clone()) + ); + + // Encode + assert_eq!( + encode(Term::from(byte_list)), + bytes.as_slice(), + ); + + // 02 - Byte List from String + let byte_list = ByteList::from("test"); + let bytes = vec![131,107,0,4,116,101,115,116]; + + // Decode + assert_eq!( + decode(bytes.as_slice()).try_into(), + Ok(byte_list.clone()) + ); + + // Encode + assert_eq!( + encode(Term::from(byte_list)), + bytes.as_slice(), + ); +} + #[test] fn list_test() { // Display @@ -342,13 +388,6 @@ fn list_test() { // Decode assert_eq!(Ok(List::nil()), decode(&[131, 106]).try_into()); // NIL_EXT - assert_eq!( - Ok(List::from(vec![ - Term::from(FixInteger::from(1)), - Term::from(FixInteger::from(2)) - ])), - decode(&[131, 107, 0, 2, 1, 2]).try_into() - ); // STRING_EXT assert_eq!( Ok(List::from(vec![Term::from(Atom::from("a"))])), decode(&[131, 108, 0, 0, 0, 1, 100, 0, 1, 97, 106]).try_into() @@ -356,13 +395,6 @@ fn list_test() { // Encode assert_eq!(vec![131, 106], encode(Term::from(List::nil()))); - assert_eq!( - vec![131, 107, 0, 2, 1, 2], - encode(Term::from(List::from(vec![ - Term::from(FixInteger::from(1)), - Term::from(FixInteger::from(2)) - ]))) - ); assert_eq!( vec![131, 108, 0, 0, 0, 1, 100, 0, 1, 97, 106], encode(Term::from(List::from(vec![Term::from(Atom::from("a"))]))) @@ -434,18 +466,20 @@ fn tuple_test() { #[test] fn map_test() { - let map = Map::from(vec![ + let map = Map::from([ + (Term::from(Atom::from("a")), Term::from(Atom::from("b"))), ( Term::from(FixInteger::from(1)), Term::from(FixInteger::from(2)), ), - (Term::from(Atom::from("a")), Term::from(Atom::from("b"))), ]); + // Display - assert_eq!("#{1=>2,'a'=>'b'}", map.to_string()); - - assert_eq!("#{}", Map::from(vec![]).to_string()); + let as_str = map.to_string(); + // Hashmap Iter is not deterministic, so we need to check both possible outputs + assert!("#{'a'=>'b',1=>2}" == as_str || "#{1=>2,'a'=>'b'}"== as_str) ; + assert_eq!("#{}", Map::from([]).to_string()); // Decode assert_eq!( @@ -454,10 +488,17 @@ fn map_test() { ); // Encode - assert_eq!( - vec![131, 116, 0, 0, 0, 2, 97, 1, 97, 2, 100, 0, 1, 97, 100, 0, 1, 98], - encode(Term::from(map)) + let buf = encode(Term::from(map.clone())); + // Hashmap Iter is not deterministic, so we need to check both possible outputs + assert!( + [131, 116, 0, 0, 0, 2, 97, 1, 97, 2, 100, 0, 1, 97, 100, 0, 1, 98] == buf.as_slice() || [131, 116, 0, 0, 0, 2, 100, 0, 1, 97, 100, 0, 1, 98, 97, 1, 97, 2] == buf.as_slice() ); + + + //Access + + assert_eq!(map.map.get(&Term::from(Atom::from("a"))), Some(&Term::from(Atom::from("b")))); + assert_eq!(map.map.get(&Term::from(FixInteger::from(1))), Some(&Term::from(FixInteger::from(2)))); } #[test]