Skip to content

Commit

Permalink
🔧 hotfix for rounding error
Browse files Browse the repository at this point in the history
  • Loading branch information
jonasvdd committed May 19, 2022
1 parent df9f98d commit 45bb304
Show file tree
Hide file tree
Showing 2 changed files with 175 additions and 6 deletions.
19 changes: 16 additions & 3 deletions plotly_resampler/aggregation/aggregators.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,22 @@ def _aggregate(self, s: pd.Series, n_out: int) -> pd.Series:
if s_i.dtype.type == np.datetime64:
# lttbc does not support this datatype -> convert to int
# (where the time is represented in ns)
s_i = s_i.astype(int)
idx, data = lttbc.downsample(s_i, s_v, n_out)
idx = pd.to_datetime(idx, unit="ns", utc=True).tz_convert(s.index.tz)
# REMARK:
# -> additional logic is needed to mitigate rounding errors
# First, the start offset is subtracted, after which the input series
# is set in the already requested format, i.e. np.float64

# NOTE -> Rounding errors can still persist, but this approach is already
# significantly less prone to it than the previos implementation.
s_i0 = s_i[0].astype(np.int64)
idx, data = lttbc.downsample(
(s_i.astype(np.int64) - s_i0).astype(np.float64), s_v, 1000
)

# add the start-offset and convert back to datetime
idx = pd.to_datetime(
idx.astype(np.int64) + s_i0, unit="ns", utc=True
).tz_convert(s.index.tz)
else:
idx, data = lttbc.downsample(s_i, s_v, n_out)
idx = idx.astype(s_i.dtype)
Expand Down
162 changes: 159 additions & 3 deletions tests/test_figurewidget_resampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
__author__ = "Jonas Van Der Donckt, Jeroen Van Der Donckt, Emiel Deprost"


import pytest
from copy import copy
from datetime import datetime

import numpy as np
import pandas as pd
from copy import copy
import plotly.graph_objects as go
import pytest
from plotly.subplots import make_subplots
from plotly_resampler import FigureWidgetResampler, EfficientLTTB, EveryNthPoint
from plotly_resampler import EfficientLTTB, EveryNthPoint, FigureWidgetResampler


def test_add_trace_kwarg_space(float_series, bool_series, cat_series):
Expand Down Expand Up @@ -1375,3 +1377,157 @@ def test_fwr_adjust_series_text_input():

# text === -hovertext -> so the sum should their length
assert (text == -hovertext).sum() == 1000


def test_fwr_time_based_data_ns():
n = 100_000
fig = FigureWidgetResampler(
default_n_shown_samples=1000, verbose=True, default_downsampler=EfficientLTTB()
)

for i in range(3):
s = pd.Series(
index=pd.date_range(
datetime.now(), freq=f"{np.random.randint(5,100_000)}ns", periods=n
),
data=np.arange(n),
)

fig.add_trace(
go.Scatter(name="hf_text"),
hf_x=s.index,
hf_y=s,
hf_text=s.astype(str),
hf_hovertext=(-s).astype(str),
)

x = fig.data[i]["x"]
y = fig.data[i]["y"]

assert len(x) == 1000
assert len(y) == 1000

text = fig.data[i]["text"].astype(int)
hovertext = fig.data[i]["hovertext"].astype(int)

assert len(hovertext) == 1000
assert len(text) == 1000

# text === -hovertext -> so the sum should their length
assert (text == -hovertext).sum() == 1000


def test_fwr_time_based_data_us():
n = 100_000
fig = FigureWidgetResampler(
default_n_shown_samples=1000, verbose=True, default_downsampler=EfficientLTTB()
)

for i in range(3):
s = pd.Series(
index=pd.date_range(
datetime.now(), freq=f"{np.random.randint(5,100_000)}us", periods=n
),
data=np.arange(n),
)

fig.add_trace(
go.Scatter(name="hf_text"),
hf_x=s.index,
hf_y=s,
hf_text=s.astype(str),
hf_hovertext=(-s).astype(str),
)

x = fig.data[i]["x"]
y = fig.data[i]["y"]

assert len(x) == 1000
assert len(y) == 1000

text = fig.data[i]["text"].astype(int)
hovertext = fig.data[i]["hovertext"].astype(int)

assert len(hovertext) == 1000
assert len(text) == 1000

# text === -hovertext -> so the sum should their length
assert (text == -hovertext).sum() == 1000


def test_fwr_time_based_data_ms():
n = 100_000
fig = FigureWidgetResampler(
default_n_shown_samples=1000, verbose=True, default_downsampler=EfficientLTTB()
)

for i in range(3):
s = pd.Series(
index=pd.date_range(
datetime.now(), freq=f"{np.random.randint(5,10_000)}ms", periods=n
),
data=np.arange(n),
)

fig.add_trace(
go.Scatter(name="hf_text"),
hf_x=s.index,
hf_y=s,
hf_text=s.astype(str),
hf_hovertext=(-s).astype(str),
)

x = fig.data[i]["x"]
y = fig.data[i]["y"]

assert len(x) == 1000
assert len(y) == 1000

text = fig.data[i]["text"].astype(int)
hovertext = fig.data[i]["hovertext"].astype(int)

assert len(hovertext) == 1000
assert len(text) == 1000

# text === -hovertext -> so the sum should their length
assert (text == -hovertext).sum() == 1000


def test_fwr_time_based_data_s():
n = 100_000
fig = FigureWidgetResampler(
default_n_shown_samples=1000, verbose=True, default_downsampler=EfficientLTTB()
)

for i in range(3):
s = pd.Series(
index=pd.date_range(
datetime.now(),
freq=pd.Timedelta(f"{round(np.abs(np.random.randn()) * 1000, 4)}s"),
periods=n,
),
data=np.arange(n),
)

fig.add_trace(
go.Scatter(name="hf_text"),
hf_x=s.index,
hf_y=s,
hf_text=s.astype(str),
hf_hovertext=(-s).astype(str),
)

x = fig.data[i]["x"]
y = fig.data[i]["y"]

assert len(x) == 1000
assert len(y) == 1000

text = fig.data[i]["text"].astype(int)
hovertext = fig.data[i]["hovertext"].astype(int)

assert len(hovertext) == 1000
assert len(text) == 1000

# text === -hovertext -> so the sum should their length
assert (text == -hovertext).sum() == 1000

0 comments on commit 45bb304

Please sign in to comment.