Skip to content

Commit

Permalink
Fix bugs and documentation for plot_backtest and `plot_backtest_int…
Browse files Browse the repository at this point in the history
…eractive` (#700)

* Fix plot_backtest

* Fix plot_backtest_interactive

* Update changelog

* Fixes after merge

* Improve _validate_intersecting_segments performance

* Change opacity
  • Loading branch information
Mr-Geekman committed May 19, 2022
1 parent 93fe247 commit be1b719
Show file tree
Hide file tree
Showing 3 changed files with 262 additions and 70 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -50,6 +50,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fix bugs and documentation for cross_corr_plot ([#691](https://github.com/tinkoff-ai/etna/pull/691))
-
-
- Fix bugs and documentation for plot_backtest and plot_backtest_interactive ([#700](https://github.com/tinkoff-ai/etna/pull/700))
-
-
-
-
Expand Down
252 changes: 182 additions & 70 deletions etna/analysis/plotters.py
Expand Up @@ -245,17 +245,33 @@ def plot_forecast(
ax[i].legend(loc="upper left")


def _validate_intersecting_segments(fold_numbers: pd.Series):
"""Validate if segments aren't intersecting."""
fold_info = []
for fold_number in fold_numbers.unique():
fold_start = fold_numbers[fold_numbers == fold_number].index.min()
fold_end = fold_numbers[fold_numbers == fold_number].index.max()
fold_info.append({"fold_start": fold_start, "fold_end": fold_end})

fold_info.sort(key=lambda x: x["fold_start"])

for fold_info_1, fold_info_2 in zip(fold_info[:-1], fold_info[1:]):
if fold_info_2["fold_start"] <= fold_info_1["fold_end"]:
raise ValueError("Folds are intersecting")


def plot_backtest(
forecast_df: pd.DataFrame,
ts: "TSDataset",
segments: Optional[List[str]] = None,
folds: Optional[List[int]] = None,
columns_num: int = 2,
history_len: int = 0,
history_len: Union[int, Literal["all"]] = 0,
figsize: Tuple[int, int] = (10, 5),
):
"""Plot targets and forecast for backtest pipeline.
This function doesn't support intersecting folds.
Parameters
----------
forecast_df:
Expand All @@ -264,62 +280,107 @@ def plot_backtest(
dataframe of timeseries that was used for backtest
segments:
segments to plot
folds:
folds to plot
columns_num:
number of subplots columns
history_len:
length of pre-backtest history to plot
length of pre-backtest history to plot, if value is "all" then plot all the history
figsize:
size of the figure per subplot with one segment in inches
Raises
------
ValueError:
if ``history_len`` is negative
ValueError:
if folds are intersecting
"""
if history_len != "all" and history_len < 0:
raise ValueError("Parameter history_len should be non-negative or 'all'")

if segments is None:
segments = sorted(ts.segments)
df = ts.df

_, ax = prepare_axes(num_plots=len(segments), columns_num=columns_num, figsize=figsize)

if not folds:
folds = sorted(set(forecast_df[segments[0]]["fold_number"]))
fold_numbers = forecast_df[segments[0]]["fold_number"]
_validate_intersecting_segments(fold_numbers)
folds = sorted(set(fold_numbers))

# prepare dataframes
df = ts.df
forecast_start = forecast_df.index.min()
history_df = df[df.index < forecast_start]
backtest_df = df[df.index >= forecast_start]
freq_timedelta = df.index[1] - df.index[0]

# prepare colors
default_colors = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = itertools.cycle(default_colors)
lines_colors = {line_name: next(color_cycle) for line_name in ["history", "test", "forecast"]}

_, ax = prepare_axes(num_plots=len(segments), columns_num=columns_num, figsize=figsize)
for i, segment in enumerate(segments):
segment_backtest_df = backtest_df[segment]
segment_history_df = history_df[segment]
segment_forecast_df = forecast_df[segment]
is_full_folds = set(segment_backtest_df.index) == set(segment_forecast_df.index)

if history_len:
plot_df = segment_history_df.tail(history_len)
# plot history
if history_len == "all":
plot_df = segment_history_df.append(segment_backtest_df)
elif history_len > 0:
plot_df = segment_history_df.tail(history_len).append(segment_backtest_df)
else:
plot_df = segment_backtest_df
ax[i].plot(plot_df.index, plot_df.target, color=lines_colors["history"])

ax[i].plot(plot_df.index, plot_df.target, label="history")
ax[i].plot(segment_backtest_df.index, segment_backtest_df.target, label="test")

segment_forecast_df = forecast_df[segment]
for fold_number in folds:
forecast_df_slice_fold = segment_forecast_df[segment_forecast_df.fold_number == fold_number]
start_fold = fold_numbers[fold_numbers == fold_number].index.min()
end_fold = fold_numbers[fold_numbers == fold_number].index.max()
end_fold_exclusive = end_fold + freq_timedelta

# draw test
backtest_df_slice_fold = segment_backtest_df[start_fold:end_fold_exclusive]
ax[i].plot(backtest_df_slice_fold.index, backtest_df_slice_fold.target, color=lines_colors["test"])

if is_full_folds:
# draw forecast
forecast_df_slice_fold = segment_forecast_df[start_fold:end_fold_exclusive]
ax[i].plot(forecast_df_slice_fold.index, forecast_df_slice_fold.target, color=lines_colors["forecast"])
else:
forecast_df_slice_fold = segment_forecast_df[start_fold:end_fold]
backtest_df_slice_fold = backtest_df_slice_fold.loc[forecast_df_slice_fold.index]

# draw points on test
ax[i].scatter(backtest_df_slice_fold.index, backtest_df_slice_fold.target, color=lines_colors["test"])

# draw forecast
ax[i].scatter(
forecast_df_slice_fold.index, forecast_df_slice_fold.target, color=lines_colors["forecast"]
)

# draw borders of current fold
opacity = 0.075 * ((fold_number + 1) % 2) + 0.075
ax[i].axvspan(
forecast_df_slice_fold.index.min(),
forecast_df_slice_fold.index.max(),
alpha=0.15 * (int(forecast_df_slice_fold.fold_number.max() + 1) % 2),
start_fold,
end_fold_exclusive,
alpha=opacity,
color="skyblue",
)

ax[i].plot(segment_forecast_df.index, segment_forecast_df.target, label="forecast")
# plot legend
legend_handles = [
Line2D([0], [0], marker="o", color=color, label=label) for label, color in lines_colors.items()
]
ax[i].legend(handles=legend_handles)

ax[i].set_title(segment)
ax[i].legend()
ax[i].tick_params("x", rotation=45)


def plot_backtest_interactive(
forecast_df: pd.DataFrame,
ts: "TSDataset",
segments: Optional[List[str]] = None,
folds: Optional[List[int]] = None,
history_len: int = 0,
history_len: Union[int, Literal["all"]] = 0,
figsize: Tuple[int, int] = (900, 600),
) -> go.Figure:
"""Plot targets and forecast for backtest pipeline using plotly.
Expand All @@ -332,94 +393,145 @@ def plot_backtest_interactive(
dataframe of timeseries that was used for backtest
segments:
segments to plot
folds:
folds to plot
history_len:
length of pre-backtest history to plot
length of pre-backtest history to plot, if value is "all" then plot all the history
figsize:
size of the figure in pixels
Returns
-------
go.Figure:
result of plotting
Raises
------
ValueError:
if ``history_len`` is negative
ValueError:
if folds are intersecting
"""
if history_len != "all" and history_len < 0:
raise ValueError("Parameter history_len should be non-negative or 'all'")

if segments is None:
segments = sorted(ts.segments)
df = ts.df

if not folds:
folds = sorted(set(forecast_df[segments[0]]["fold_number"]))

fig = go.Figure()
colors = plotly.colors.qualitative.Dark24
fold_numbers = forecast_df[segments[0]]["fold_number"]
_validate_intersecting_segments(fold_numbers)
folds = sorted(set(fold_numbers))

# prepare dataframes
df = ts.df
forecast_start = forecast_df.index.min()
history_df = df[df.index < forecast_start]
backtest_df = df[df.index >= forecast_start]
freq_timedelta = df.index[1] - df.index[0]

# prepare colors
colors = plotly.colors.qualitative.Dark24

fig = go.Figure()
for i, segment in enumerate(segments):
segment_backtest_df = backtest_df[segment]
segment_history_df = history_df[segment]
segment_forecast_df = forecast_df[segment]
is_full_folds = set(segment_backtest_df.index) == set(segment_forecast_df.index)

if history_len:
plot_df = segment_history_df.tail(history_len)
# plot history
if history_len == "all":
plot_df = segment_history_df.append(segment_backtest_df)
elif history_len > 0:
plot_df = segment_history_df.tail(history_len).append(segment_backtest_df)
else:
plot_df = segment_backtest_df

# history
fig.add_trace(
go.Scattergl(
x=plot_df.index,
y=plot_df.target,
legendgroup=f"{segment}",
name=f"{segment}",
mode="lines",
marker_color=colors[i % len(colors)],
showlegend=True,
line=dict(width=2, dash="solid"),
line=dict(width=2, dash="dash"),
)
)

# test
fig.add_trace(
go.Scattergl(
x=segment_backtest_df.index,
y=segment_backtest_df.target,
legendgroup=f"{segment}",
name=f"Test: {segment}",
marker_color=colors[i % len(colors)],
showlegend=False,
line=dict(width=2, dash="dot"),
for fold_number in folds:
start_fold = fold_numbers[fold_numbers == fold_number].index.min()
end_fold = fold_numbers[fold_numbers == fold_number].index.max()
end_fold_exclusive = end_fold + freq_timedelta

# draw test
backtest_df_slice_fold = segment_backtest_df[start_fold:end_fold_exclusive]
fig.add_trace(
go.Scattergl(
x=backtest_df_slice_fold.index,
y=backtest_df_slice_fold.target,
legendgroup=f"{segment}",
name=f"Test: {segment}",
mode="lines",
marker_color=colors[i % len(colors)],
showlegend=False,
line=dict(width=2, dash="solid"),
)
)
)

# folds
segment_forecast_df = forecast_df[segment]
if i == 0:
for fold_number in folds:
forecast_df_slice_fold = segment_forecast_df[segment_forecast_df.fold_number == fold_number]
opacity = 0.15 * (int(forecast_df_slice_fold.fold_number.max() + 1) % 2)
if is_full_folds:
# draw forecast
forecast_df_slice_fold = segment_forecast_df[start_fold:end_fold_exclusive]
fig.add_trace(
go.Scattergl(
x=forecast_df_slice_fold.index,
y=forecast_df_slice_fold.target,
legendgroup=f"{segment}",
name=f"Forecast: {segment}",
mode="lines",
marker_color=colors[i % len(colors)],
showlegend=False,
line=dict(width=2, dash="dot"),
)
)
else:
forecast_df_slice_fold = segment_forecast_df[start_fold:end_fold]
backtest_df_slice_fold = backtest_df_slice_fold.loc[forecast_df_slice_fold.index]

# draw points on test
fig.add_trace(
go.Scattergl(
x=backtest_df_slice_fold.index,
y=backtest_df_slice_fold.target,
legendgroup=f"{segment}",
name=f"Test: {segment}",
mode="markers",
marker_color=colors[i % len(colors)],
showlegend=False,
)
)

# draw forecast
fig.add_trace(
go.Scattergl(
x=forecast_df_slice_fold.index,
y=forecast_df_slice_fold.target,
legendgroup=f"{segment}",
name=f"Forecast: {segment}",
mode="markers",
marker_color=colors[i % len(colors)],
showlegend=False,
)
)

if i == 0:
opacity = 0.075 * ((fold_number + 1) % 2) + 0.075
fig.add_vrect(
x0=forecast_df_slice_fold.index.min(),
x1=forecast_df_slice_fold.index.max(),
x0=start_fold,
x1=end_fold_exclusive,
line_width=0,
fillcolor="blue",
opacity=opacity,
)

# forecast
fig.add_trace(
go.Scattergl(
x=segment_forecast_df.index,
y=segment_forecast_df.target,
legendgroup=f"{segment}",
name=f"Forecast: {segment}",
marker_color=colors[i % len(colors)],
showlegend=False,
line=dict(width=2, dash="dash"),
)
)

fig.update_layout(
height=figsize[1],
width=figsize[0],
Expand Down

1 comment on commit be1b719

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.