diff --git a/src/io/csv.rs b/src/io/csv.rs index 7d730d942..6afaf2150 100644 --- a/src/io/csv.rs +++ b/src/io/csv.rs @@ -2,7 +2,6 @@ use crate::error::Error; use crate::physical::datatypes::{data_value::VecT, DataTypeName}; -use crate::physical::dictionary::Dictionary; use csv::Reader; /// Imports a csv file @@ -15,7 +14,6 @@ use csv::Reader; pub fn read( datatypes: &[Option], csv_reader: &mut Reader, - dict: &mut dyn Dictionary, ) -> Result, Error> where T: std::io::Read, @@ -28,7 +26,6 @@ where DataTypeName::U64 => VecT::U64(Vec::new()), DataTypeName::Float => VecT::Float(Vec::new()), DataTypeName::Double => VecT::Double(Vec::new()), - DataTypeName::String => VecT::String(Vec::new()), }) })); }); @@ -38,7 +35,7 @@ where if let Err(Error::RollBack(rollback)) = row.iter().enumerate().try_for_each(|(idx, item)| { if let Some(datatype) = datatypes[idx] { - match datatype.parse(item, dict) { + match datatype.parse(item) { Ok(val) => { result[idx].as_mut().map(|vect| { vect.push(&val); @@ -70,7 +67,6 @@ where #[cfg(test)] mod test { use super::*; - use crate::physical::dictionary::PrefixedStringDictionary; use csv::ReaderBuilder; use quickcheck_macros::quickcheck; use test_log::test; @@ -85,11 +81,7 @@ Boston;United States;4628910 .delimiter(b';') .from_reader(data.as_bytes()); - let x = read( - &[None, None, None], - &mut rdr, - &mut PrefixedStringDictionary::new(), - ); + let x = read(&[None, None, None], &mut rdr); assert!(x.is_ok()); assert_eq!(x.unwrap().len(), 0); } @@ -100,7 +92,7 @@ Boston;United States;4628910 let data = "\ 10;20;30;40;20;valid asdf;12.2;413;22.3;23;invalid -node01;22;33.33;12.333332;10;valid again +node01;22;33.33;12.333332;10;valid node02;1312;12.33;313;1431;valid node03;123;123;13;55;123;invalid "; @@ -116,40 +108,14 @@ node03;123;123;13;55;123;invalid Some(DataTypeName::Double), Some(DataTypeName::Float), Some(DataTypeName::U64), - Some(DataTypeName::String), + None, ], &mut rdr, - &mut PrefixedStringDictionary::new(), ); assert!(imported.is_ok()); - assert_eq!(imported.as_ref().unwrap().len(), 5); + assert_eq!(imported.as_ref().unwrap().len(), 4); assert_eq!(imported.as_ref().unwrap()[0].len(), 3); - log::debug!("imported: {:?}", imported); - assert_eq!( - imported.as_ref().unwrap()[4] - .get(0) - .map(|v| v.as_string().unwrap()), - Some(0usize) - ); - assert_eq!( - imported.as_ref().unwrap()[4] - .get(1) - .map(|v| v.as_string().unwrap()), - Some(1usize) - ); - assert_eq!( - imported.as_ref().unwrap()[4] - .get(2) - .map(|v| v.as_string().unwrap()), - Some(0usize) - ); - assert_eq!( - imported.as_ref().unwrap()[4] - .get(3) - .map(|v| v.as_string().unwrap()), - None - ); } #[quickcheck] @@ -190,7 +156,6 @@ node03;123;123;13;55;123;invalid Some(DataTypeName::Float), ], &mut rdr, - &mut PrefixedStringDictionary::new(), ); assert!(imported.is_ok()); diff --git a/src/physical/columns/adaptive_column_builder.rs b/src/physical/columns/adaptive_column_builder.rs index 85e2db500..68079c4e1 100644 --- a/src/physical/columns/adaptive_column_builder.rs +++ b/src/physical/columns/adaptive_column_builder.rs @@ -132,8 +132,6 @@ pub enum AdaptiveColumnBuilderT { Float(AdaptiveColumnBuilder), /// Case Double Double(AdaptiveColumnBuilder), - /// Case String - String(AdaptiveColumnBuilder), } impl AdaptiveColumnBuilderT { @@ -143,7 +141,6 @@ impl AdaptiveColumnBuilderT { DataTypeName::U64 => Self::U64(AdaptiveColumnBuilder::new()), DataTypeName::Float => Self::Float(AdaptiveColumnBuilder::new()), DataTypeName::Double => Self::Double(AdaptiveColumnBuilder::new()), - DataTypeName::String => Self::String(AdaptiveColumnBuilder::new()), } } @@ -171,13 +168,6 @@ impl AdaptiveColumnBuilderT { panic!("value does not match AdaptiveColumn type"); } } - Self::String(cb) => { - cb.add( - value - .as_string() - .expect("Value does not match AdaptiveColumn type"), - ); - } } } @@ -187,7 +177,6 @@ impl AdaptiveColumnBuilderT { Self::U64(cb) => cb.count(), Self::Float(cb) => cb.count(), Self::Double(cb) => cb.count(), - Self::String(cb) => cb.count(), } } } diff --git a/src/physical/columns/column.rs b/src/physical/columns/column.rs index 688652824..87ca5c737 100644 --- a/src/physical/columns/column.rs +++ b/src/physical/columns/column.rs @@ -80,8 +80,6 @@ pub enum ColumnT { Float(ColumnEnum), /// Case ColumnEnum Double(ColumnEnum), - /// Case ColumnEnum - String(ColumnEnum), } generate_datatype_forwarder!(forward_to_column_enum); diff --git a/src/physical/columns/interval_column.rs b/src/physical/columns/interval_column.rs index a79129a2f..ed061ad9c 100644 --- a/src/physical/columns/interval_column.rs +++ b/src/physical/columns/interval_column.rs @@ -98,8 +98,6 @@ pub enum IntervalColumnT { Float(IntervalColumnEnum), /// Case Double Double(IntervalColumnEnum), - /// Case String - String(IntervalColumnEnum), } generate_datatype_forwarder!(forward_to_interval_column_enum); diff --git a/src/physical/columns/ranged_column_scan.rs b/src/physical/columns/ranged_column_scan.rs index 3c666f188..85109fe9e 100644 --- a/src/physical/columns/ranged_column_scan.rs +++ b/src/physical/columns/ranged_column_scan.rs @@ -354,8 +354,6 @@ pub enum RangedColumnScanT<'a> { Float(RangedColumnScanCell<'a, Float>), /// Case Double Double(RangedColumnScanCell<'a, Double>), - /// Case String - String(RangedColumnScanCell<'a, usize>), } generate_datatype_forwarder!(forward_to_ranged_column_scan_cell); @@ -401,25 +399,15 @@ impl<'a> ColumnScan for RangedColumnScanT<'a> { match self { Self::U64(cs) => match value { Self::Item::U64(val) => cs.seek(val).map(DataValueT::U64), - Self::Item::Float(_) => None, - Self::Item::Double(_) => None, - Self::Item::String(_) => None, + Self::Item::Float(_) | Self::Item::Double(_) => None, }, Self::Float(cs) => match value { - Self::Item::U64(_) => None, + Self::Item::U64(_) | Self::Item::Double(_) => None, Self::Item::Float(val) => cs.seek(val).map(DataValueT::Float), - Self::Item::Double(_) => None, - Self::Item::String(_) => None, }, Self::Double(cs) => match value { - Self::Item::U64(_) => None, - Self::Item::Float(_) => None, + Self::Item::U64(_) | Self::Item::Float(_) => None, Self::Item::Double(val) => cs.seek(val).map(DataValueT::Double), - Self::Item::String(_) => None, - }, - Self::String(cs) => match value { - Self::Item::String(val) => cs.seek(val).map(DataValueT::String), - _ => None, // no type mixing allowed, so in any other case it should be [None] }, } } diff --git a/src/physical/datatypes/data_type_name.rs b/src/physical/datatypes/data_type_name.rs index 3169ec15a..d1e45412c 100644 --- a/src/physical/datatypes/data_type_name.rs +++ b/src/physical/datatypes/data_type_name.rs @@ -1,7 +1,6 @@ use crate::error::Error; use super::DataValueT; -use crate::physical::dictionary::Dictionary; /// Descriptors to refer to the possible data types at runtime. #[derive(Clone, Copy, Debug, Ord, PartialOrd, Eq, PartialEq)] @@ -12,18 +11,15 @@ pub enum DataTypeName { Float, /// Data type [`super::double::Double`] Double, - /// Data type `String`, uses [`usize`] and a [dictionary][crate::physical::dictionary::Dictionary] - String, } impl DataTypeName { /// Parses a string, based on the name of the Datatype - pub fn parse(&self, string: &str, dict: &mut dyn Dictionary) -> Result { + pub fn parse(&self, string: &str) -> Result { Ok(match self { DataTypeName::U64 => DataValueT::U64(string.parse::()?), DataTypeName::Float => DataValueT::Float(super::Float::new(string.parse::()?)?), DataTypeName::Double => DataValueT::Double(super::Double::new(string.parse::()?)?), - DataTypeName::String => DataValueT::String(dict.add(string.to_string())), }) } } diff --git a/src/physical/datatypes/data_value.rs b/src/physical/datatypes/data_value.rs index 2696d30f6..842bd7957 100644 --- a/src/physical/datatypes/data_value.rs +++ b/src/physical/datatypes/data_value.rs @@ -18,8 +18,6 @@ pub enum DataValueT { Float(Float), /// Case Double Double(Double), - /// Case String - String(usize), } impl DataValueT { @@ -47,21 +45,12 @@ impl DataValueT { } } - /// Returns an [`Option`] , answering whether the [`DataValueT`] is of this datatype - pub fn as_string(&self) -> Option { - match *self { - DataValueT::String(val) => Some(val), - _ => None, - } - } - /// Compares its value with another given [`DataValueT`] pub fn compare(&self, other: &Self) -> Option { match self { DataValueT::U64(val) => other.as_u64().map(|otherval| val.cmp(&otherval)), DataValueT::Float(val) => other.as_float().map(|otherval| val.cmp(&otherval)), DataValueT::Double(val) => other.as_double().map(|otherval| val.cmp(&otherval)), - DataValueT::String(val) => other.as_string().map(|otherval| val.cmp(&otherval)), } } @@ -71,7 +60,6 @@ impl DataValueT { Self::U64(_) => DataTypeName::U64, Self::Float(_) => DataTypeName::Float, Self::Double(_) => DataTypeName::Double, - Self::String(_) => DataTypeName::String, } } } @@ -82,7 +70,6 @@ impl std::fmt::Display for DataValueT { Self::U64(val) => write!(f, "{}", val), Self::Float(val) => write!(f, "{}", val), Self::Double(val) => write!(f, "{}", val), - Self::String(val) => write!(f, "str{}", val), } } } @@ -96,8 +83,6 @@ pub enum VecT { Float(Vec), /// Case Vec Double(Vec), - /// Case Vec - String(Vec), } generate_datatype_forwarder!(forward_to_vec); @@ -109,7 +94,6 @@ impl VecT { DataTypeName::U64 => Self::U64(Vec::new()), DataTypeName::Float => Self::Float(Vec::new()), DataTypeName::Double => Self::Double(Vec::new()), - DataTypeName::String => Self::String(Vec::new()), } } @@ -119,7 +103,6 @@ impl VecT { Self::U64(_) => DataTypeName::U64, Self::Float(_) => DataTypeName::Float, Self::Double(_) => DataTypeName::Double, - Self::String(_) => DataTypeName::String, } } @@ -134,7 +117,6 @@ impl VecT { VecT::U64(vec) => vec.get(index).copied().map(DataValueT::U64), VecT::Float(vec) => vec.get(index).copied().map(DataValueT::Float), VecT::Double(vec) => vec.get(index).copied().map(DataValueT::Double), - VecT::String(vec) => vec.get(index).copied().map(DataValueT::String), } } @@ -153,9 +135,6 @@ impl VecT { VecT::Double(vec) => vec.push(value.as_double().expect( "expecting VecT::Double and DataValueT::Double, but DataValueT does not match", )), - VecT::String(vec) => vec.push(value.as_string().expect( - "expecting VecT::String and DataValueT::String, but DataValueT does not match", - )), }; } @@ -182,9 +161,6 @@ impl VecT { VecT::Double(vec) => vec .get(idx_a) .and_then(|&val_a| vec.get(idx_b).map(|val_b| val_a.cmp(val_b))), - VecT::String(vec) => vec - .get(idx_a) - .and_then(|&val_a| vec.get(idx_b).map(|val_b| val_a.cmp(val_b))), } } } diff --git a/src/physical/tables/materialize.rs b/src/physical/tables/materialize.rs index e894453f8..f9642d57e 100644 --- a/src/physical/tables/materialize.rs +++ b/src/physical/tables/materialize.rs @@ -34,8 +34,6 @@ pub fn materialize(trie_scan: &mut TrieScanEnum) -> Trie { .push(AdaptiveColumnBuilderT::Float(AdaptiveColumnBuilder::new())), DataTypeName::Double => data_column_builders .push(AdaptiveColumnBuilderT::Double(AdaptiveColumnBuilder::new())), - DataTypeName::String => data_column_builders - .push(AdaptiveColumnBuilderT::String(AdaptiveColumnBuilder::new())), } } diff --git a/src/physical/tables/trie.rs b/src/physical/tables/trie.rs index e55fadff6..2cc4a1834 100644 --- a/src/physical/tables/trie.rs +++ b/src/physical/tables/trie.rs @@ -181,7 +181,7 @@ impl Table for Trie { .map(|_| { let empty_data_col = AdaptiveColumnBuilderT::new(DataTypeName::U64); let empty_interval_col = AdaptiveColumnBuilder::::new(); - build_interval_column!(empty_data_col, empty_interval_col; U64; Float; Double; String) + build_interval_column!(empty_data_col, empty_interval_col; U64; Float; Double) }) .collect(), ); @@ -201,9 +201,6 @@ impl Table for Trie { VecT::Double(vec) => VecT::Double(permutator.permutate(vec).expect( "length matches since permutator is constructed from these vectores", )), - VecT::String(vec) => VecT::String(permutator.permutate(vec).expect( - "length matches since permutator is constructed from these vectors", - )), }) .collect(); @@ -294,7 +291,7 @@ impl Table for Trie { condensed_data_builders .into_iter() .zip(condensed_interval_starts_builders) - .map(|(col, iv)| build_interval_column!(col, iv; U64; Float; Double; String)) + .map(|(col, iv)| build_interval_column!(col, iv; U64; Float; Double)) .collect(), ) } diff --git a/src/physical/tables/trie_difference.rs b/src/physical/tables/trie_difference.rs index 6457fa071..ca99c5346 100644 --- a/src/physical/tables/trie_difference.rs +++ b/src/physical/tables/trie_difference.rs @@ -67,7 +67,6 @@ impl<'a> TrieDifference<'a> { DataTypeName::U64 => init_scans_for_datatype!(U64), DataTypeName::Float => init_scans_for_datatype!(Float), DataTypeName::Double => init_scans_for_datatype!(Double), - DataTypeName::String => init_scans_for_datatype!(String), }; } @@ -158,7 +157,6 @@ impl<'a> TrieScan<'a> for TrieDifference<'a> { DataTypeName::U64 => down_for_datatype!(U64), DataTypeName::Float => down_for_datatype!(Float), DataTypeName::Double => down_for_datatype!(Double), - DataTypeName::String => down_for_datatype!(String), } } else { self.difference_scans[next_layer].get_mut().reset(); diff --git a/src/physical/tables/trie_join.rs b/src/physical/tables/trie_join.rs index 2e4167716..e277c0f0d 100644 --- a/src/physical/tables/trie_join.rs +++ b/src/physical/tables/trie_join.rs @@ -88,7 +88,6 @@ impl<'a> TrieJoin<'a> { DataTypeName::U64 => merge_join_for_datatype!(U64, u64), DataTypeName::Float => merge_join_for_datatype!(Float, Float), DataTypeName::Double => merge_join_for_datatype!(Double, Double), - DataTypeName::String => merge_join_for_datatype!(String, usize), } } @@ -138,10 +137,9 @@ impl<'a> TrieScan<'a> for TrieJoin<'a> { debug_assert!(self.current_variable.is_some()); match self.target_schema.get_type(self.current_variable?) { - DataTypeName::U64 - | DataTypeName::Float - | DataTypeName::Double - | DataTypeName::String => Some(&self.merge_joins[self.current_variable?]), + DataTypeName::U64 | DataTypeName::Float | DataTypeName::Double => { + Some(&self.merge_joins[self.current_variable?]) + } } } diff --git a/src/physical/tables/trie_project.rs b/src/physical/tables/trie_project.rs index 4c4947449..6507721d6 100644 --- a/src/physical/tables/trie_project.rs +++ b/src/physical/tables/trie_project.rs @@ -35,7 +35,6 @@ fn shrink_position(column: &IntervalColumnT, pos: usize) -> usize { IntervalColumnT::U64(col) => shrink_position_t(col, pos), IntervalColumnT::Float(col) => shrink_position_t(col, pos), IntervalColumnT::Double(col) => shrink_position_t(col, pos), - IntervalColumnT::String(col) => shrink_position_t(col, pos), } } @@ -88,7 +87,6 @@ impl<'a> TrieProject<'a> { DataTypeName::U64 => init_scans_for_datatype!(U64), DataTypeName::Float => init_scans_for_datatype!(Float), DataTypeName::Double => init_scans_for_datatype!(Double), - DataTypeName::String => init_scans_for_datatype!(String), } } let target_schema = TrieSchema::new(target_attributes); @@ -190,7 +188,6 @@ impl<'a> TrieScan<'a> for TrieProject<'a> { IntervalColumnT::U64(_) => down_for_datatype!(U64), IntervalColumnT::Float(_) => down_for_datatype!(Float), IntervalColumnT::Double(_) => down_for_datatype!(Double), - IntervalColumnT::String(_) => down_for_datatype!(String), } self.current_layer = Some(next_layer); diff --git a/src/physical/tables/trie_select.rs b/src/physical/tables/trie_select.rs index 062ee7e6a..97f0123aa 100644 --- a/src/physical/tables/trie_select.rs +++ b/src/physical/tables/trie_select.rs @@ -46,7 +46,6 @@ impl<'a> TrieSelectEqual<'a> { DataTypeName::U64 => init_scans_for_datatype!(U64), DataTypeName::Float => init_scans_for_datatype!(Float), DataTypeName::Double => init_scans_for_datatype!(Double), - DataTypeName::String => init_scans_for_datatype!(String), }; } @@ -88,7 +87,6 @@ impl<'a> TrieSelectEqual<'a> { DataTypeName::U64 => init_scans_for_datatype!(U64), DataTypeName::Float => init_scans_for_datatype!(Float), DataTypeName::Double => init_scans_for_datatype!(Double), - DataTypeName::String => init_scans_for_datatype!(String), } } } @@ -184,7 +182,6 @@ impl<'a> TrieSelectValue<'a> { DataTypeName::U64 => init_scans_for_datatype!(U64), DataTypeName::Float => init_scans_for_datatype!(Float), DataTypeName::Double => init_scans_for_datatype!(Double), - DataTypeName::String => init_scans_for_datatype!(String), } } @@ -220,7 +217,6 @@ impl<'a> TrieSelectValue<'a> { DataTypeName::U64 => init_scans_for_datatype!(U64), DataTypeName::Float => init_scans_for_datatype!(Float), DataTypeName::Double => init_scans_for_datatype!(Double), - DataTypeName::String => init_scans_for_datatype!(String), } } diff --git a/src/physical/tables/trie_union.rs b/src/physical/tables/trie_union.rs index afc56df26..161ec551e 100644 --- a/src/physical/tables/trie_union.rs +++ b/src/physical/tables/trie_union.rs @@ -58,7 +58,6 @@ impl<'a> TrieUnion<'a> { DataTypeName::U64 => init_scans_for_datatype!(U64, u64), DataTypeName::Float => init_scans_for_datatype!(Float, Float), DataTypeName::Double => init_scans_for_datatype!(Double, Double), - DataTypeName::String => init_scans_for_datatype!(String, usize), }; } diff --git a/src/physical/util.rs b/src/physical/util.rs index 7ce3c2143..3b2d1a586 100644 --- a/src/physical/util.rs +++ b/src/physical/util.rs @@ -92,7 +92,6 @@ macro_rules! generate_datatype_forwarder { $crate::generate_forwarder!($name; U64, Float, - Double, - String); + Double); } }