Skip to content

Commit

Permalink
remove enum dispatch dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 3, 2020
1 parent 241ea20 commit 42e325c
Show file tree
Hide file tree
Showing 7 changed files with 220 additions and 208 deletions.
1 change: 0 additions & 1 deletion polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ unsafe_unwrap = "^0.1.0"
rayon = "^1.3.1"
prettytable-rs = { version="^0.8.0", features=["win_crlf"], optional = true, default_features = false}
chrono = {version = "^0.4.13", optional = true}
enum_dispatch = "^0.3.2"
parquet = {version = "1", optional = true}
rand = {version = "0.7", optional = true}
rand_distr = {version = "0.3", optional = true}
Expand Down
82 changes: 52 additions & 30 deletions polars/src/frame/group_by.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use crate::frame::select::Selection;
use crate::prelude::*;
use crate::utils::Xob;
use arrow::array::{PrimitiveBuilder, StringBuilder};
use enum_dispatch::enum_dispatch;
use fnv::FnvBuildHasher;
use itertools::Itertools;
use num::{Num, NumCast, ToPrimitive, Zero};
Expand All @@ -31,7 +30,6 @@ where
.collect()
}

#[enum_dispatch(Series)]
trait IntoGroupTuples {
fn group_tuples(&self) -> Vec<(usize, Vec<usize>)> {
unimplemented!()
Expand Down Expand Up @@ -168,7 +166,10 @@ impl DataFrame {
let selected_keys = self.select_series(by)?;

let groups = match selected_keys.len() {
1 => selected_keys[0].group_tuples(),
1 => {
let series = &selected_keys[0];
apply_method_all_series!(series, group_tuples,)
}
2 => {
let iter = selected_keys[0]
.as_groupable_iter()?
Expand Down Expand Up @@ -274,7 +275,6 @@ pub struct GroupBy<'df, 'selection_str> {
selected_agg: Option<Vec<&'selection_str str>>,
}

#[enum_dispatch(Series)]
trait NumericAggSync {
fn agg_mean(&self, _groups: &Vec<(usize, Vec<usize>)>) -> Option<Series> {
None
Expand Down Expand Up @@ -415,7 +415,6 @@ where
}
}

#[enum_dispatch(Series)]
trait AggFirst {
fn agg_first(&self, _groups: &Vec<(usize, Vec<usize>)>) -> Series;
}
Expand Down Expand Up @@ -451,7 +450,6 @@ impl AggFirst for LargeListChunked {
}
}

#[enum_dispatch(Series)]
trait AggLast {
fn agg_last(&self, _groups: &Vec<(usize, Vec<usize>)>) -> Series;
}
Expand Down Expand Up @@ -487,7 +485,6 @@ impl AggLast for LargeListChunked {
}
}

#[enum_dispatch(Series)]
trait AggNUnique {
fn agg_n_unique(&self, _groups: &Vec<(usize, Vec<usize>)>) -> Option<UInt32Chunked> {
None
Expand Down Expand Up @@ -547,7 +544,6 @@ impl AggNUnique for Utf8Chunked {
}
}

#[enum_dispatch(Series)]
trait AggQuantile {
fn agg_quantile(&self, _groups: &Vec<(usize, Vec<usize>)>, _quantile: f64) -> Option<Series> {
None
Expand Down Expand Up @@ -665,7 +661,7 @@ impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {

for agg_col in agg_cols {
let new_name = format!["{}_mean", agg_col.name()];
let opt_agg = agg_col.agg_mean(&self.groups);
let opt_agg = apply_method_all_series!(agg_col, agg_mean, &self.groups);
if let Some(mut agg) = opt_agg {
agg.rename(&new_name);
cols.push(agg);
Expand Down Expand Up @@ -704,7 +700,7 @@ impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {

for agg_col in agg_cols {
let new_name = format!["{}_sum", agg_col.name()];
let opt_agg = agg_col.agg_sum(&self.groups);
let opt_agg = apply_method_all_series!(agg_col, agg_sum, &self.groups);
if let Some(mut agg) = opt_agg {
agg.rename(&new_name);
cols.push(agg);
Expand Down Expand Up @@ -742,7 +738,7 @@ impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {
let (mut cols, agg_cols) = self.prepare_agg()?;
for agg_col in agg_cols {
let new_name = format!["{}_min", agg_col.name()];
let opt_agg = agg_col.agg_min(&self.groups);
let opt_agg = apply_method_all_series!(agg_col, agg_min, &self.groups);
if let Some(mut agg) = opt_agg {
agg.rename(&new_name);
cols.push(agg);
Expand Down Expand Up @@ -780,7 +776,7 @@ impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {
let (mut cols, agg_cols) = self.prepare_agg()?;
for agg_col in agg_cols {
let new_name = format!["{}_max", agg_col.name()];
let opt_agg = agg_col.agg_max(&self.groups);
let opt_agg = apply_method_all_series!(agg_col, agg_max, &self.groups);
if let Some(mut agg) = opt_agg {
agg.rename(&new_name);
cols.push(agg);
Expand Down Expand Up @@ -818,7 +814,7 @@ impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {
let (mut cols, agg_cols) = self.prepare_agg()?;
for agg_col in agg_cols {
let new_name = format!["{}_first", agg_col.name()];
let mut agg = agg_col.agg_first(&self.groups);
let mut agg = apply_method_all_series!(agg_col, agg_first, &self.groups);
agg.rename(&new_name);
cols.push(agg);
}
Expand Down Expand Up @@ -854,7 +850,7 @@ impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {
let (mut cols, agg_cols) = self.prepare_agg()?;
for agg_col in agg_cols {
let new_name = format!["{}_last", agg_col.name()];
let mut agg = agg_col.agg_last(&self.groups);
let mut agg = apply_method_all_series!(agg_col, agg_last, &self.groups);
agg.rename(&new_name);
cols.push(agg);
}
Expand Down Expand Up @@ -890,7 +886,7 @@ impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {
let (mut cols, agg_cols) = self.prepare_agg()?;
for agg_col in agg_cols {
let new_name = format!["{}_n_unique", agg_col.name()];
let opt_agg = agg_col.agg_n_unique(&self.groups);
let opt_agg = apply_method_all_series!(agg_col, agg_n_unique, &self.groups);
if let Some(mut agg) = opt_agg {
agg.rename(&new_name);
cols.push(agg.into_series());
Expand Down Expand Up @@ -918,7 +914,7 @@ impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {
let (mut cols, agg_cols) = self.prepare_agg()?;
for agg_col in agg_cols {
let new_name = format!["{}_quantile_{:.2}", agg_col.name(), quantile];
let opt_agg = agg_col.agg_quantile(&self.groups, quantile);
let opt_agg = apply_method_all_series!(agg_col, agg_quantile, &self.groups, quantile);
if let Some(mut agg) = opt_agg {
agg.rename(&new_name);
cols.push(agg.into_series());
Expand All @@ -941,7 +937,7 @@ impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {
let (mut cols, agg_cols) = self.prepare_agg()?;
for agg_col in agg_cols {
let new_name = format!["{}_median", agg_col.name()];
let opt_agg = agg_col.agg_median(&self.groups);
let opt_agg = apply_method_all_series!(agg_col, agg_median, &self.groups);
if let Some(mut agg) = opt_agg {
agg.rename(&new_name);
cols.push(agg.into_series());
Expand Down Expand Up @@ -1063,7 +1059,7 @@ impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {
macro_rules! finish_agg_opt {
($self:ident, $name_fmt:expr, $agg_fn:ident, $agg_col:ident, $cols:ident) => {{
let new_name = format![$name_fmt, $agg_col.name()];
let opt_agg = $agg_col.$agg_fn(&$self.groups);
let opt_agg = apply_method_all_series!($agg_col, $agg_fn, &$self.groups);
if let Some(mut agg) = opt_agg {
agg.rename(&new_name);
$cols.push(agg.into_series());
Expand All @@ -1073,14 +1069,14 @@ impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {
macro_rules! finish_agg {
($self:ident, $name_fmt:expr, $agg_fn:ident, $agg_col:ident, $cols:ident) => {{
let new_name = format![$name_fmt, $agg_col.name()];
let mut agg = $agg_col.$agg_fn(&$self.groups);
let mut agg = apply_method_all_series!($agg_col, $agg_fn, &$self.groups);
agg.rename(&new_name);
$cols.push(agg.into_series());
}};
}

let (mut cols, agg_cols) = self.prepare_agg()?;
for agg_col in agg_cols {
for agg_col in &agg_cols {
if let Some(&aggregations) = map.get(agg_col.name()) {
for aggregation_f in aggregations.as_ref() {
match aggregation_f.as_ref() {
Expand Down Expand Up @@ -1271,7 +1267,6 @@ pub struct Pivot<'df, 'selection_str> {
values_column: &'selection_str str,
}

#[enum_dispatch(Series)]
trait ChunkPivot {
fn pivot(
&self,
Expand Down Expand Up @@ -1476,55 +1471,82 @@ impl<'df, 'sel_str> Pivot<'df, 'sel_str> {
pub fn first(&self) -> Result<DataFrame> {
let pivot_series = self.gb.df.column(self.pivot_column)?;
let values_series = self.gb.df.column(self.values_column)?;
values_series.pivot(
apply_method_all_series!(
values_series,
pivot,
pivot_series,
self.gb.keys(),
&self.gb.groups,
PivotAgg::First,
PivotAgg::First
)
}

/// Aggregate the pivot results by taking the sum of all duplicates.
pub fn sum(&self) -> Result<DataFrame> {
let pivot_series = self.gb.df.column(self.pivot_column)?;
let values_series = self.gb.df.column(self.values_column)?;
values_series.pivot(pivot_series, self.gb.keys(), &self.gb.groups, PivotAgg::Sum)
apply_method_all_series!(
values_series,
pivot,
pivot_series,
self.gb.keys(),
&self.gb.groups,
PivotAgg::Sum
)
}

/// Aggregate the pivot results by taking the minimal value of all duplicates.
pub fn min(&self) -> Result<DataFrame> {
let pivot_series = self.gb.df.column(self.pivot_column)?;
let values_series = self.gb.df.column(self.values_column)?;
values_series.pivot(pivot_series, self.gb.keys(), &self.gb.groups, PivotAgg::Min)
apply_method_all_series!(
values_series,
pivot,
pivot_series,
self.gb.keys(),
&self.gb.groups,
PivotAgg::Min
)
}

/// Aggregate the pivot results by taking the maximum value of all duplicates.
pub fn max(&self) -> Result<DataFrame> {
let pivot_series = self.gb.df.column(self.pivot_column)?;
let values_series = self.gb.df.column(self.values_column)?;
values_series.pivot(pivot_series, self.gb.keys(), &self.gb.groups, PivotAgg::Max)
apply_method_all_series!(
values_series,
pivot,
pivot_series,
self.gb.keys(),
&self.gb.groups,
PivotAgg::Max
)
}

/// Aggregate the pivot results by taking the mean value of all duplicates.
pub fn mean(&self) -> Result<DataFrame> {
let pivot_series = self.gb.df.column(self.pivot_column)?;
let values_series = self.gb.df.column(self.values_column)?;
values_series.pivot(
apply_method_all_series!(
values_series,
pivot,
pivot_series,
self.gb.keys(),
&self.gb.groups,
PivotAgg::Mean,
PivotAgg::Mean
)
}
/// Aggregate the pivot results by taking the median value of all duplicates.
pub fn median(&self) -> Result<DataFrame> {
let pivot_series = self.gb.df.column(self.pivot_column)?;
let values_series = self.gb.df.column(self.values_column)?;
values_series.pivot(
apply_method_all_series!(
values_series,
pivot,
pivot_series,
self.gb.keys(),
&self.gb.groups,
PivotAgg::Median,
PivotAgg::Median
)
}
}
Expand Down
5 changes: 2 additions & 3 deletions polars/src/frame/hash_join.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use crate::prelude::*;
use crate::utils::Xob;
use enum_dispatch::enum_dispatch;
use fnv::{FnvBuildHasher, FnvHashMap};
#[cfg(feature = "parallel")]
use rayon::prelude::*;
Expand Down Expand Up @@ -564,7 +563,6 @@ impl HashJoin<Utf8Type> for Utf8Chunked {
}
}

#[enum_dispatch(Series)]
trait ZipOuterJoinColumn {
fn zip_outer_join_column(
&self,
Expand Down Expand Up @@ -781,7 +779,8 @@ impl DataFrame {
)
},
);
let mut s = s_left.zip_outer_join_column(s_right, &opt_join_tuples);
let mut s =
apply_method_all_series!(s_left, zip_outer_join_column, s_right, &opt_join_tuples);
s.rename(left_on);
df_left.hstack(&[s])?;
self.finish_join(df_left, df_right)
Expand Down
3 changes: 1 addition & 2 deletions polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,6 @@
#![feature(iterator_fold_self)]
#![feature(doc_cfg)]
#[macro_use]
pub mod series;
#[macro_use]
pub(crate) mod utils;
pub mod chunked_array;
pub mod datatypes;
Expand All @@ -199,4 +197,5 @@ pub mod error;
mod fmt;
pub mod frame;
pub mod prelude;
pub mod series;
pub mod testing;

0 comments on commit 42e325c

Please sign in to comment.