Skip to content

Commit

Permalink
Revert "Add String datatype to stage2 (#68)"
Browse files Browse the repository at this point in the history
This reverts commit 910d36c.

As per discussion (#70), revert the string type.
  • Loading branch information
mmarx committed Nov 24, 2022
1 parent 910d36c commit bf974a2
Show file tree
Hide file tree
Showing 15 changed files with 15 additions and 123 deletions.
45 changes: 5 additions & 40 deletions src/io/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

use crate::error::Error;
use crate::physical::datatypes::{data_value::VecT, DataTypeName};
use crate::physical::dictionary::Dictionary;
use csv::Reader;

/// Imports a csv file
Expand All @@ -15,7 +14,6 @@ use csv::Reader;
pub fn read<T>(
datatypes: &[Option<DataTypeName>],
csv_reader: &mut Reader<T>,
dict: &mut dyn Dictionary,
) -> Result<Vec<VecT>, Error>
where
T: std::io::Read,
Expand All @@ -28,7 +26,6 @@ where
DataTypeName::U64 => VecT::U64(Vec::new()),
DataTypeName::Float => VecT::Float(Vec::new()),
DataTypeName::Double => VecT::Double(Vec::new()),
DataTypeName::String => VecT::String(Vec::new()),
})
}));
});
Expand All @@ -38,7 +35,7 @@ where
if let Err(Error::RollBack(rollback)) =
row.iter().enumerate().try_for_each(|(idx, item)| {
if let Some(datatype) = datatypes[idx] {
match datatype.parse(item, dict) {
match datatype.parse(item) {
Ok(val) => {
result[idx].as_mut().map(|vect| {
vect.push(&val);
Expand Down Expand Up @@ -70,7 +67,6 @@ where
#[cfg(test)]
mod test {
use super::*;
use crate::physical::dictionary::PrefixedStringDictionary;
use csv::ReaderBuilder;
use quickcheck_macros::quickcheck;
use test_log::test;
Expand All @@ -85,11 +81,7 @@ Boston;United States;4628910
.delimiter(b';')
.from_reader(data.as_bytes());

let x = read(
&[None, None, None],
&mut rdr,
&mut PrefixedStringDictionary::new(),
);
let x = read(&[None, None, None], &mut rdr);
assert!(x.is_ok());
assert_eq!(x.unwrap().len(), 0);
}
Expand All @@ -100,7 +92,7 @@ Boston;United States;4628910
let data = "\
10;20;30;40;20;valid
asdf;12.2;413;22.3;23;invalid
node01;22;33.33;12.333332;10;valid again
node01;22;33.33;12.333332;10;valid
node02;1312;12.33;313;1431;valid
node03;123;123;13;55;123;invalid
";
Expand All @@ -116,40 +108,14 @@ node03;123;123;13;55;123;invalid
Some(DataTypeName::Double),
Some(DataTypeName::Float),
Some(DataTypeName::U64),
Some(DataTypeName::String),
None,
],
&mut rdr,
&mut PrefixedStringDictionary::new(),
);

assert!(imported.is_ok());
assert_eq!(imported.as_ref().unwrap().len(), 5);
assert_eq!(imported.as_ref().unwrap().len(), 4);
assert_eq!(imported.as_ref().unwrap()[0].len(), 3);
log::debug!("imported: {:?}", imported);
assert_eq!(
imported.as_ref().unwrap()[4]
.get(0)
.map(|v| v.as_string().unwrap()),
Some(0usize)
);
assert_eq!(
imported.as_ref().unwrap()[4]
.get(1)
.map(|v| v.as_string().unwrap()),
Some(1usize)
);
assert_eq!(
imported.as_ref().unwrap()[4]
.get(2)
.map(|v| v.as_string().unwrap()),
Some(0usize)
);
assert_eq!(
imported.as_ref().unwrap()[4]
.get(3)
.map(|v| v.as_string().unwrap()),
None
);
}

#[quickcheck]
Expand Down Expand Up @@ -190,7 +156,6 @@ node03;123;123;13;55;123;invalid
Some(DataTypeName::Float),
],
&mut rdr,
&mut PrefixedStringDictionary::new(),
);

assert!(imported.is_ok());
Expand Down
11 changes: 0 additions & 11 deletions src/physical/columns/adaptive_column_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,6 @@ pub enum AdaptiveColumnBuilderT {
Float(AdaptiveColumnBuilder<Float>),
/// Case Double
Double(AdaptiveColumnBuilder<Double>),
/// Case String
String(AdaptiveColumnBuilder<usize>),
}

impl AdaptiveColumnBuilderT {
Expand All @@ -143,7 +141,6 @@ impl AdaptiveColumnBuilderT {
DataTypeName::U64 => Self::U64(AdaptiveColumnBuilder::new()),
DataTypeName::Float => Self::Float(AdaptiveColumnBuilder::new()),
DataTypeName::Double => Self::Double(AdaptiveColumnBuilder::new()),
DataTypeName::String => Self::String(AdaptiveColumnBuilder::new()),
}
}

Expand Down Expand Up @@ -171,13 +168,6 @@ impl AdaptiveColumnBuilderT {
panic!("value does not match AdaptiveColumn type");
}
}
Self::String(cb) => {
cb.add(
value
.as_string()
.expect("Value does not match AdaptiveColumn type"),
);
}
}
}

Expand All @@ -187,7 +177,6 @@ impl AdaptiveColumnBuilderT {
Self::U64(cb) => cb.count(),
Self::Float(cb) => cb.count(),
Self::Double(cb) => cb.count(),
Self::String(cb) => cb.count(),
}
}
}
Expand Down
2 changes: 0 additions & 2 deletions src/physical/columns/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,6 @@ pub enum ColumnT {
Float(ColumnEnum<Float>),
/// Case ColumnEnum<Double>
Double(ColumnEnum<Double>),
/// Case ColumnEnum<String>
String(ColumnEnum<usize>),
}

generate_datatype_forwarder!(forward_to_column_enum);
Expand Down
2 changes: 0 additions & 2 deletions src/physical/columns/interval_column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,6 @@ pub enum IntervalColumnT {
Float(IntervalColumnEnum<Float>),
/// Case Double
Double(IntervalColumnEnum<Double>),
/// Case String
String(IntervalColumnEnum<usize>),
}

generate_datatype_forwarder!(forward_to_interval_column_enum);
Expand Down
18 changes: 3 additions & 15 deletions src/physical/columns/ranged_column_scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,6 @@ pub enum RangedColumnScanT<'a> {
Float(RangedColumnScanCell<'a, Float>),
/// Case Double
Double(RangedColumnScanCell<'a, Double>),
/// Case String
String(RangedColumnScanCell<'a, usize>),
}

generate_datatype_forwarder!(forward_to_ranged_column_scan_cell);
Expand Down Expand Up @@ -401,25 +399,15 @@ impl<'a> ColumnScan for RangedColumnScanT<'a> {
match self {
Self::U64(cs) => match value {
Self::Item::U64(val) => cs.seek(val).map(DataValueT::U64),
Self::Item::Float(_) => None,
Self::Item::Double(_) => None,
Self::Item::String(_) => None,
Self::Item::Float(_) | Self::Item::Double(_) => None,
},
Self::Float(cs) => match value {
Self::Item::U64(_) => None,
Self::Item::U64(_) | Self::Item::Double(_) => None,
Self::Item::Float(val) => cs.seek(val).map(DataValueT::Float),
Self::Item::Double(_) => None,
Self::Item::String(_) => None,
},
Self::Double(cs) => match value {
Self::Item::U64(_) => None,
Self::Item::Float(_) => None,
Self::Item::U64(_) | Self::Item::Float(_) => None,
Self::Item::Double(val) => cs.seek(val).map(DataValueT::Double),
Self::Item::String(_) => None,
},
Self::String(cs) => match value {
Self::Item::String(val) => cs.seek(val).map(DataValueT::String),
_ => None, // no type mixing allowed, so in any other case it should be [None]
},
}
}
Expand Down
6 changes: 1 addition & 5 deletions src/physical/datatypes/data_type_name.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::error::Error;

use super::DataValueT;
use crate::physical::dictionary::Dictionary;

/// Descriptors to refer to the possible data types at runtime.
#[derive(Clone, Copy, Debug, Ord, PartialOrd, Eq, PartialEq)]
Expand All @@ -12,18 +11,15 @@ pub enum DataTypeName {
Float,
/// Data type [`super::double::Double`]
Double,
/// Data type `String`, uses [`usize`] and a [dictionary][crate::physical::dictionary::Dictionary]
String,
}

impl DataTypeName {
/// Parses a string, based on the name of the Datatype
pub fn parse(&self, string: &str, dict: &mut dyn Dictionary) -> Result<DataValueT, Error> {
pub fn parse(&self, string: &str) -> Result<DataValueT, Error> {
Ok(match self {
DataTypeName::U64 => DataValueT::U64(string.parse::<u64>()?),
DataTypeName::Float => DataValueT::Float(super::Float::new(string.parse::<f32>()?)?),
DataTypeName::Double => DataValueT::Double(super::Double::new(string.parse::<f64>()?)?),
DataTypeName::String => DataValueT::String(dict.add(string.to_string())),
})
}
}
24 changes: 0 additions & 24 deletions src/physical/datatypes/data_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ pub enum DataValueT {
Float(Float),
/// Case Double
Double(Double),
/// Case String
String(usize),
}

impl DataValueT {
Expand Down Expand Up @@ -47,21 +45,12 @@ impl DataValueT {
}
}

/// Returns an [`Option<usize>`] , answering whether the [`DataValueT`] is of this datatype
pub fn as_string(&self) -> Option<usize> {
match *self {
DataValueT::String(val) => Some(val),
_ => None,
}
}

/// Compares its value with another given [`DataValueT`]
pub fn compare(&self, other: &Self) -> Option<Ordering> {
match self {
DataValueT::U64(val) => other.as_u64().map(|otherval| val.cmp(&otherval)),
DataValueT::Float(val) => other.as_float().map(|otherval| val.cmp(&otherval)),
DataValueT::Double(val) => other.as_double().map(|otherval| val.cmp(&otherval)),
DataValueT::String(val) => other.as_string().map(|otherval| val.cmp(&otherval)),
}
}

Expand All @@ -71,7 +60,6 @@ impl DataValueT {
Self::U64(_) => DataTypeName::U64,
Self::Float(_) => DataTypeName::Float,
Self::Double(_) => DataTypeName::Double,
Self::String(_) => DataTypeName::String,
}
}
}
Expand All @@ -82,7 +70,6 @@ impl std::fmt::Display for DataValueT {
Self::U64(val) => write!(f, "{}", val),
Self::Float(val) => write!(f, "{}", val),
Self::Double(val) => write!(f, "{}", val),
Self::String(val) => write!(f, "str{}", val),
}
}
}
Expand All @@ -96,8 +83,6 @@ pub enum VecT {
Float(Vec<Float>),
/// Case Vec<Double>
Double(Vec<Double>),
/// Case Vec<String>
String(Vec<usize>),
}

generate_datatype_forwarder!(forward_to_vec);
Expand All @@ -109,7 +94,6 @@ impl VecT {
DataTypeName::U64 => Self::U64(Vec::new()),
DataTypeName::Float => Self::Float(Vec::new()),
DataTypeName::Double => Self::Double(Vec::new()),
DataTypeName::String => Self::String(Vec::new()),
}
}

Expand All @@ -119,7 +103,6 @@ impl VecT {
Self::U64(_) => DataTypeName::U64,
Self::Float(_) => DataTypeName::Float,
Self::Double(_) => DataTypeName::Double,
Self::String(_) => DataTypeName::String,
}
}

Expand All @@ -134,7 +117,6 @@ impl VecT {
VecT::U64(vec) => vec.get(index).copied().map(DataValueT::U64),
VecT::Float(vec) => vec.get(index).copied().map(DataValueT::Float),
VecT::Double(vec) => vec.get(index).copied().map(DataValueT::Double),
VecT::String(vec) => vec.get(index).copied().map(DataValueT::String),
}
}

Expand All @@ -153,9 +135,6 @@ impl VecT {
VecT::Double(vec) => vec.push(value.as_double().expect(
"expecting VecT::Double and DataValueT::Double, but DataValueT does not match",
)),
VecT::String(vec) => vec.push(value.as_string().expect(
"expecting VecT::String and DataValueT::String, but DataValueT does not match",
)),
};
}

Expand All @@ -182,9 +161,6 @@ impl VecT {
VecT::Double(vec) => vec
.get(idx_a)
.and_then(|&val_a| vec.get(idx_b).map(|val_b| val_a.cmp(val_b))),
VecT::String(vec) => vec
.get(idx_a)
.and_then(|&val_a| vec.get(idx_b).map(|val_b| val_a.cmp(val_b))),
}
}
}
2 changes: 0 additions & 2 deletions src/physical/tables/materialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ pub fn materialize(trie_scan: &mut TrieScanEnum) -> Trie {
.push(AdaptiveColumnBuilderT::Float(AdaptiveColumnBuilder::new())),
DataTypeName::Double => data_column_builders
.push(AdaptiveColumnBuilderT::Double(AdaptiveColumnBuilder::new())),
DataTypeName::String => data_column_builders
.push(AdaptiveColumnBuilderT::String(AdaptiveColumnBuilder::new())),
}
}

Expand Down
7 changes: 2 additions & 5 deletions src/physical/tables/trie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ impl Table for Trie {
.map(|_| {
let empty_data_col = AdaptiveColumnBuilderT::new(DataTypeName::U64);
let empty_interval_col = AdaptiveColumnBuilder::<usize>::new();
build_interval_column!(empty_data_col, empty_interval_col; U64; Float; Double; String)
build_interval_column!(empty_data_col, empty_interval_col; U64; Float; Double)
})
.collect(),
);
Expand All @@ -201,9 +201,6 @@ impl Table for Trie {
VecT::Double(vec) => VecT::Double(permutator.permutate(vec).expect(
"length matches since permutator is constructed from these vectores",
)),
VecT::String(vec) => VecT::String(permutator.permutate(vec).expect(
"length matches since permutator is constructed from these vectors",
)),
})
.collect();

Expand Down Expand Up @@ -294,7 +291,7 @@ impl Table for Trie {
condensed_data_builders
.into_iter()
.zip(condensed_interval_starts_builders)
.map(|(col, iv)| build_interval_column!(col, iv; U64; Float; Double; String))
.map(|(col, iv)| build_interval_column!(col, iv; U64; Float; Double))
.collect(),
)
}
Expand Down
2 changes: 0 additions & 2 deletions src/physical/tables/trie_difference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ impl<'a> TrieDifference<'a> {
DataTypeName::U64 => init_scans_for_datatype!(U64),
DataTypeName::Float => init_scans_for_datatype!(Float),
DataTypeName::Double => init_scans_for_datatype!(Double),
DataTypeName::String => init_scans_for_datatype!(String),
};
}

Expand Down Expand Up @@ -158,7 +157,6 @@ impl<'a> TrieScan<'a> for TrieDifference<'a> {
DataTypeName::U64 => down_for_datatype!(U64),
DataTypeName::Float => down_for_datatype!(Float),
DataTypeName::Double => down_for_datatype!(Double),
DataTypeName::String => down_for_datatype!(String),
}
} else {
self.difference_scans[next_layer].get_mut().reset();
Expand Down

0 comments on commit bf974a2

Please sign in to comment.