Skip to content

Commit

Permalink
fix asofjoin dispatch of dates
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 21, 2021
1 parent adf241c commit 2794f04
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::prelude::*;
use crate::utils::CustomIterTools;
use num::Bounded;
use polars_arrow::trusted_len::PushUnchecked;
use std::borrow::Cow;

impl<T> ChunkedArray<T>
where
Expand Down Expand Up @@ -104,6 +105,13 @@ impl DataFrame {
assert!((*idx as usize) < other.height())
}

// drop right join column
let other = if left_on == right_on {
Cow::Owned(other.drop(right_on)?)
} else {
Cow::Borrowed(other)
};

// Safety:
// join tuples are in bounds
let right_df = unsafe {
Expand Down
3 changes: 0 additions & 3 deletions polars/polars-core/src/frame/hash_join/multiple_keys.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,9 +242,6 @@ pub fn private_left_join_multiple_keys(a: &DataFrame, b: &DataFrame) -> Vec<(u32
}

pub(crate) fn left_join_multiple_keys(a: &DataFrame, b: &DataFrame) -> Vec<(u32, Option<u32>)> {
// we assume that the b DataFrame is the shorter relation.
// b will be used for the build phase.

let n_threads = POOL.current_num_threads();
let dfs_a = split_df(a, n_threads).unwrap();
let dfs_b = split_df(b, n_threads).unwrap();
Expand Down
3 changes: 2 additions & 1 deletion polars/polars-core/src/series/implementations/dates_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ macro_rules! impl_dyn_series {

#[cfg(feature = "asof_join")]
fn join_asof(&self, other: &Series) -> Result<Vec<Option<u32>>> {
self.0.deref().join_asof(other)
let other = other.to_physical_repr();
self.0.deref().join_asof(&other)
}

fn set_sorted(&mut self, reverse: bool) {
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/series/series_trait.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ pub(crate) mod private {

#[cfg(feature = "asof_join")]
fn join_asof(&self, _other: &Series) -> Result<Vec<Option<u32>>> {
unimplemented!()
invalid_operation!(self)
}

fn set_sorted(&mut self, _reverse: bool) {
Expand Down
72 changes: 70 additions & 2 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,10 +913,10 @@ def test_asof_cross_join():

# only test dispatch of asof join
out = left.join(right, on="a", how="asof")
assert out.shape == (3, 4)
assert out.shape == (3, 3)

left.lazy().join(right.lazy(), on="a", how="asof").collect()
assert out.shape == (3, 4)
assert out.shape == (3, 3)

# only test dispatch of cross join
out = left.join(right, how="cross")
Expand Down Expand Up @@ -1059,3 +1059,71 @@ def test_groupby_cat_list():
["b"]
]"""
)


def test_asof_join():
fmt = "%F %T%.3f"
dates = """2016-05-25 13:30:00.023
2016-05-25 13:30:00.023
2016-05-25 13:30:00.030
2016-05-25 13:30:00.041
2016-05-25 13:30:00.048
2016-05-25 13:30:00.049
2016-05-25 13:30:00.072
2016-05-25 13:30:00.075""".split(
"\n"
)
dates

ticker = """GOOG
MSFT
MSFT
MSFT
GOOG
AAPL
GOOG
MSFT""".split(
"\n"
)

quotes = pl.DataFrame(
{
"dates": pl.Series(dates).str.strptime(pl.Datetime, fmt=fmt),
"ticker": ticker,
"bid": [720.5, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
}
)

dates = """2016-05-25 13:30:00.023
2016-05-25 13:30:00.038
2016-05-25 13:30:00.048
2016-05-25 13:30:00.048
2016-05-25 13:30:00.048""".split(
"\n"
)

ticker = """MSFT
MSFT
GOOG
GOOG
AAPL""".split(
"\n"
)

trades = pl.DataFrame(
{
"dates": pl.Series(dates).str.strptime(pl.Datetime, fmt=fmt),
"ticker": ticker,
"bid": [51.95, 51.95, 720.77, 720.92, 98.0],
}
)

out = trades.join(quotes, on="dates", how="asof")
assert out.columns == ["dates", "ticker", "bid", "ticker_right", "bid_right"]
assert out["dates"].cast(int).to_list() == [
1464183000023,
1464183000038,
1464183000048,
1464183000048,
1464183000048,
]

0 comments on commit 2794f04

Please sign in to comment.