Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arithmetic Expressions in Projections #30

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Expand Up @@ -35,7 +35,7 @@ vec_map = { version = "0.8.0", features = ["eders"] }
hurdles = "1.0.0"
arrayvec = "0.4.0"

nom_sql = { git = "https://github.com/ms705/nom-sql.git", rev = "7758136babe72f470b7ea9fea384fca4f47db647"}
nom_sql = { git = "https://github.com/ms705/nom-sql.git", rev = "b01998fc34a5d473387987110724a395298fc6c0"}

# for benchmarks
# cli
Expand Down
137 changes: 135 additions & 2 deletions src/flow/core/data.rs
Expand Up @@ -8,9 +8,10 @@ use nom_sql::Literal;
use serde_json::Value;

use std::hash::{Hash, Hasher};
use std::ops::{Deref, DerefMut};
use std::ops::{Add, Deref, DerefMut, Div, Mul, Sub};
use std::fmt;

const FLOAT_PRECISION: f64 = 1000_000_000.0;
const TINYTEXT_WIDTH: usize = 15;

/// The main type used for user data throughout the codebase.
Expand Down Expand Up @@ -128,7 +129,7 @@ impl From<f64> for DataType {
}

let mut i = f.trunc() as i32;
let mut frac = (f.fract() * 1000_000_000.0).round() as i32;
let mut frac = (f.fract() * FLOAT_PRECISION).round() as i32;
if frac == 1000_000_000 {
i += 1;
frac = 0;
Expand Down Expand Up @@ -202,6 +203,17 @@ impl Into<i64> for DataType {
}
}

impl Into<f64> for DataType {
fn into(self) -> f64 {
match self {
DataType::Real(i, f) => i as f64 + (f as f64) / FLOAT_PRECISION,
DataType::Int(i) => i as f64,
DataType::BigInt(i) => i as f64,
_ => unreachable!(),
}
}
}

impl From<String> for DataType {
fn from(s: String) -> Self {
let len = s.as_bytes().len();
Expand All @@ -225,6 +237,67 @@ impl<'a> From<&'a str> for DataType {
}
}

// Performs an arithmetic operation on two numeric DataTypes,
// returning a new DataType as the result.
macro_rules! arithmetic_operation (
($op:tt, $first:ident, $second:ident) => (
match ($first, $second) {
(DataType::Int(a), DataType::Int(b)) => (a $op b).into(),
(DataType::BigInt(a), DataType::BigInt(b)) => (a $op b).into(),
(DataType::Int(a), DataType::BigInt(b)) => ((a as i64) $op b).into(),
(DataType::BigInt(a), DataType::Int(b)) => (a $op (b as i64)).into(),

(first @ DataType::Int(..), second @ DataType::Real(..)) |
(first @ DataType::Real(..), second @ DataType::Int(..)) |
(first @ DataType::Real(..), second @ DataType::Real(..)) => {
let a: f64 = first.into();
let b: f64 = second.into();
(a $op b).into()
}
(first, second) => panic!(
format!(
"can't {} a {:?} and {:?}",
stringify!($op),
first,
second,
)
),
}
);
);

impl Add for DataType {
type Output = DataType;

fn add(self, other: DataType) -> DataType {
arithmetic_operation!(+, self, other)
}
}

impl Sub for DataType {
type Output = DataType;

fn sub(self, other: DataType) -> DataType {
arithmetic_operation!(-, self, other)
}
}

impl Mul for DataType {
type Output = DataType;

fn mul(self, other: DataType) -> DataType {
arithmetic_operation!(*, self, other)
}
}

impl Div for DataType {
type Output = DataType;

fn div(self, other: DataType) -> DataType {
arithmetic_operation!(/, self, other)
}
}

impl fmt::Debug for DataType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Expand Down Expand Up @@ -442,6 +515,66 @@ mod tests {
assert_eq!(c.to_json(), json!(-0.012345678));
}

#[test]
fn real_to_float() {
let original = 2.5;
let data_type: DataType = original.into();
let converted: f64 = data_type.into();
assert_eq!(original, converted);
}

#[test]
fn add_data_types() {
assert_eq!(DataType::from(1) + DataType::from(2), 3.into());
assert_eq!(DataType::from(1.5) + DataType::from(2), (3.5).into());
assert_eq!(DataType::from(2) + DataType::from(1.5), (3.5).into());
assert_eq!(DataType::from(1.5) + DataType::from(2.5), (4.0).into());
assert_eq!(DataType::BigInt(1) + DataType::BigInt(2), 3.into());
assert_eq!(DataType::from(1) + DataType::BigInt(2), 3.into());
assert_eq!(DataType::BigInt(2) + DataType::from(1), 3.into());
}

#[test]
fn subtract_data_types() {
assert_eq!(DataType::from(2) - DataType::from(1), 1.into());
assert_eq!(DataType::from(3.5) - DataType::from(2), (1.5).into());
assert_eq!(DataType::from(2) - DataType::from(1.5), (0.5).into());
assert_eq!(DataType::from(3.5) - DataType::from(2.0), (1.5).into());
assert_eq!(DataType::BigInt(1) - DataType::BigInt(2), (-1).into());
assert_eq!(DataType::from(1) - DataType::BigInt(2), (-1).into());
assert_eq!(DataType::BigInt(2) - DataType::from(1), 1.into());
}

#[test]
fn multiply_data_types() {
assert_eq!(DataType::from(2) * DataType::from(1), 2.into());
assert_eq!(DataType::from(3.5) * DataType::from(2), (7.0).into());
assert_eq!(DataType::from(2) * DataType::from(1.5), (3.0).into());
assert_eq!(DataType::from(3.5) * DataType::from(2.0), (7.0).into());
assert_eq!(DataType::BigInt(1) * DataType::BigInt(2), 2.into());
assert_eq!(DataType::from(1) * DataType::BigInt(2), 2.into());
assert_eq!(DataType::BigInt(2) * DataType::from(1), 2.into());
}

#[test]
fn divide_data_types() {
assert_eq!(DataType::from(2) / DataType::from(1), 2.into());
assert_eq!(DataType::from(7.5) / DataType::from(2), (3.75).into());
assert_eq!(DataType::from(7) / DataType::from(2.5), (2.8).into());
assert_eq!(DataType::from(3.5) / DataType::from(2.0), (1.75).into());
assert_eq!(DataType::BigInt(4) / DataType::BigInt(2), 2.into());
assert_eq!(DataType::from(4) / DataType::BigInt(2), 2.into());
assert_eq!(DataType::BigInt(4) / DataType::from(2), 2.into());
}

#[test]
#[should_panic(expected = "can't + a TinyText(\"hi\") and Int(5)")]
fn add_invalid_types() {
let a: DataType = "hi".into();
let b: DataType = 5.into();
a + b;
}

#[test]
fn data_type_debug() {
let tiny_text: DataType = "hi".into();
Expand Down
32 changes: 25 additions & 7 deletions src/mir/node.rs
@@ -1,4 +1,4 @@
use nom_sql::{Column, ColumnSpecification, Operator, OrderType};
use nom_sql::{ArithmeticExpression, Column, ColumnSpecification, Operator, OrderType};
use std::cell::RefCell;
use std::fmt::{Debug, Display, Error, Formatter};
use std::rc::Rc;
Expand Down Expand Up @@ -477,6 +477,7 @@ impl MirNode {
MirNodeType::Project {
ref emit,
ref literals,
ref arithmetic,
} => {
assert_eq!(self.ancestors.len(), 1);
let parent = self.ancestors[0].clone();
Expand All @@ -485,6 +486,7 @@ impl MirNode {
parent,
self.columns.as_slice(),
emit,
arithmetic,
literals,
mig,
)
Expand Down Expand Up @@ -598,6 +600,7 @@ pub enum MirNodeType {
/// emit columns
Project {
emit: Vec<Column>,
arithmetic: Vec<(String, ArithmeticExpression)>,
literals: Vec<(String, DataType)>,
},
/// emit columns
Expand Down Expand Up @@ -772,11 +775,13 @@ impl MirNodeType {
MirNodeType::Project {
emit: ref our_emit,
literals: ref our_literals,
arithmetic: ref our_arithmetic,
} => match *other {
MirNodeType::Project {
ref emit,
ref literals,
} => our_emit == emit && our_literals == literals,
ref arithmetic,
} => our_emit == emit && our_literals == literals && our_arithmetic == arithmetic,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is quite conservative (though probably fine for v0 of arithmetic expression support): it will only allow reuse of projections that have exactly identical arithmetic expressions. For example, with multiple elements in the arithmetic vector, it would only reuse the projection if the vectors have the same elements and they're all identical. However, it is also acceptable to reuse the projection if the new our_arithmetic is a strict subset of arithmetic.

Now, you might observe that we are similarly unnecessarily strict about emit, and you'd be right ;-)

_ => false,
},
MirNodeType::Reuse { node: ref us } => {
Expand Down Expand Up @@ -994,14 +999,29 @@ impl Debug for MirNodeType {
MirNodeType::Project {
ref emit,
ref literals,
ref arithmetic,
} => write!(
f,
"π [{}{}]",
"π [{}{}{}]",
emit.iter()
.map(|c| c.name.as_str())
.collect::<Vec<_>>()
.join(", "),
if literals.len() > 0 {
if arithmetic.is_empty() {
format!("")
} else {
format!(
", {}",
arithmetic
.iter()
.map(|&(ref n, ref e)| format!("{}: {:?}", n, e))
.collect::<Vec<_>>()
.join(", ")
)
},
if literals.is_empty() {
format!("")
} else {
format!(
", lit: {}",
literals
Expand All @@ -1010,9 +1030,7 @@ impl Debug for MirNodeType {
.collect::<Vec<_>>()
.join(", ")
)
} else {
format!("")
}
},
),
MirNodeType::Reuse { ref node } => write!(
f,
Expand Down
1 change: 1 addition & 0 deletions src/mir/reuse.rs
Expand Up @@ -314,6 +314,7 @@ mod tests {
vec![Column::from("aa")],
MirNodeType::Project {
emit: vec![Column::from("aa")],
arithmetic: vec![],
literals: vec![],
},
vec![c.clone()],
Expand Down
32 changes: 30 additions & 2 deletions src/mir/to_flow.rs
@@ -1,4 +1,5 @@
use nom_sql::{Column, ColumnConstraint, ColumnSpecification, Operator, OrderType};
use nom_sql::{ArithmeticBase, ArithmeticExpression, Column, ColumnConstraint, ColumnSpecification,
Operator, OrderType};
use std::collections::HashMap;

use flow::Migration;
Expand All @@ -9,7 +10,7 @@ use mir::node::GroupedNodeType;
use ops;
use ops::join::{Join, JoinType};
use ops::latest::Latest;
use ops::project::Project;
use ops::project::{Project, ProjectExpression, ProjectExpressionBase};

#[derive(Clone, Debug)]
pub enum FlowNode {
Expand Down Expand Up @@ -360,11 +361,26 @@ pub(crate) fn make_latest_node(
FlowNode::New(na)
}

// Converts a nom_sql::ArithmeticBase into a project::ProjectExpressionBase:
fn generate_projection_base(parent: &MirNodeRef, base: &ArithmeticBase) -> ProjectExpressionBase {
match *base {
ArithmeticBase::Column(ref column) => {
let column_id = parent.borrow().column_id_for_column(column);
ProjectExpressionBase::Column(column_id)
}
ArithmeticBase::Scalar(ref literal) => {
let data: DataType = literal.into();
ProjectExpressionBase::Literal(data)
}
}
}

pub(crate) fn make_project_node(
name: &str,
parent: MirNodeRef,
columns: &[Column],
emit: &Vec<Column>,
arithmetic: &Vec<(String, ArithmeticExpression)>,
literals: &Vec<(String, DataType)>,
mig: &mut Migration,
) -> FlowNode {
Expand All @@ -377,13 +393,25 @@ pub(crate) fn make_project_node(

let (_, literal_values): (Vec<_>, Vec<_>) = literals.iter().cloned().unzip();

let projected_arithmetic: Vec<ProjectExpression> = arithmetic
.iter()
.map(|&(_, ref e)| {
ProjectExpression::new(
e.op.clone(),
generate_projection_base(&parent, &e.left),
generate_projection_base(&parent, &e.right),
)
})
.collect();

let n = mig.add_ingredient(
String::from(name),
column_names.as_slice(),
Project::new(
parent_na,
projected_column_ids.as_slice(),
Some(literal_values),
Some(projected_arithmetic),
),
);
FlowNode::New(n)
Expand Down