Skip to content

Commit

Permalink
NEW: reset global seed before every test case (#155)
Browse files Browse the repository at this point in the history
  • Loading branch information
martins0n committed Oct 11, 2021
1 parent 7219c3d commit 50f2074
Show file tree
Hide file tree
Showing 13 changed files with 40 additions and 76 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Colorebar scaling in Correlation heatmap plotter ([#143](https://github.com/tinkoff-ai/etna-ts/pull/143))
- Add Correlation heatmap in EDA notebook ([#144](https://github.com/tinkoff-ai/etna-ts/pull/144))
- Add `__repr__` for Pipeline ([#151](https://github.com/tinkoff-ai/etna-ts/pull/151))
- Defined random state for every test cases ([#155](https://github.com/tinkoff-ai/etna-ts/pull/155))

### Fixed

Expand Down
8 changes: 5 additions & 3 deletions etna/datasets/tsdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ class TSDataset:
"""

idx = pd.IndexSlice
np.random.seed(0)

def __init__(self, df: pd.DataFrame, freq: str, df_exog: Optional[pd.DataFrame] = None):
"""Init TSDataset.
Expand Down Expand Up @@ -313,7 +312,7 @@ def regressors(self) -> List[str]:
"""
return self._regressors

def plot(self, n_segments: int = 10, column: str = "target", segments: Optional[Sequence] = None):
def plot(self, n_segments: int = 10, column: str = "target", segments: Optional[Sequence] = None, seed: int = 1):
"""Plot of random or chosen segments.
Parameters
Expand All @@ -324,6 +323,8 @@ def plot(self, n_segments: int = 10, column: str = "target", segments: Optional[
feature to plot
segments:
segments to plot
seed:
seed for local random state
"""
if not segments:
segments = self.segments
Expand All @@ -332,7 +333,8 @@ def plot(self, n_segments: int = 10, column: str = "target", segments: Optional[
rows_num = math.ceil(k / columns_num)
_, ax = plt.subplots(rows_num, columns_num, figsize=(20, 5 * rows_num), squeeze=False)
ax = ax.ravel()
for i, segment in enumerate(sorted(np.random.choice(segments, size=k, replace=False))):
rnd_state = np.random.RandomState(seed)
for i, segment in enumerate(sorted(rnd_state.choice(segments, size=k, replace=False))):
df_slice = self[:, segment, column]
ax[i].plot(df_slice.index, df_slice.values)
ax[i].set_title(segment)
Expand Down
27 changes: 20 additions & 7 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,21 @@
from etna.datasets.tsdataset import TSDataset


@pytest.fixture(autouse=True)
def random_seed():
"Fixture to fix random state for every test case"
import random

import torch

SEED = 121 # noqa: N806
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)


@pytest.fixture()
def example_df():
def example_df(random_seed):
df1 = pd.DataFrame()
df1["timestamp"] = pd.date_range(start="2020-01-01", end="2020-02-01", freq="H")
df1["segment"] = "segment_1"
Expand All @@ -21,7 +34,7 @@ def example_df():


@pytest.fixture
def two_dfs_with_different_timestamps():
def two_dfs_with_different_timestamps(random_seed):
"""Generate two dataframes with the same segments and different timestamps"""

def generate_df(start_time):
Expand All @@ -44,7 +57,7 @@ def generate_df(start_time):


@pytest.fixture
def two_dfs_with_different_segments_sets():
def two_dfs_with_different_segments_sets(random_seed):
"""Generate two dataframes with the same timestamps and different segments"""

def generate_df(n_segments):
Expand All @@ -67,7 +80,7 @@ def generate_df(n_segments):


@pytest.fixture
def train_test_dfs():
def train_test_dfs(random_seed):
"""Generate two dataframes with the same segments and the same timestamps"""

def generate_df():
Expand Down Expand Up @@ -131,7 +144,7 @@ def outliers_df():


@pytest.fixture
def example_df_() -> pd.DataFrame:
def example_df_(random_seed) -> pd.DataFrame:
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df1["segment"] = ["segment_1"] * periods
Expand All @@ -150,7 +163,7 @@ def example_df_() -> pd.DataFrame:


@pytest.fixture
def example_tsds() -> TSDataset:
def example_tsds(random_seed) -> TSDataset:
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df1["segment"] = "segment_1"
Expand All @@ -168,7 +181,7 @@ def example_tsds() -> TSDataset:


@pytest.fixture
def example_reg_tsds() -> TSDataset:
def example_reg_tsds(random_seed) -> TSDataset:
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
df1["segment"] = "segment_1"
Expand Down
7 changes: 3 additions & 4 deletions tests/test_analysis/test_outliers/test_hist_outliers.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,10 @@ def test_v_optimal_hist(series: np.array, bins_number: int, expected: np.array):
np.testing.assert_almost_equal(error, expected)


@pytest.mark.parametrize(
"series,k", ((np.random.random(100), 10), (np.random.random(100), 20), (np.random.random(10), 4))
)
def test_compute_f_format(series: np.array, k: int):
@pytest.mark.parametrize("series_len,k", ((100, 10), (100, 20), (10, 4)))
def test_compute_f_format(random_seed, series_len: int, k: int):
"""Check that computeF produce the correct size output."""
series = np.random.random(size=series_len)
p, pp = np.empty_like(series), np.empty_like(series)
p[0] = series[0]
pp[0] = series[0] ** 2
Expand Down
2 changes: 1 addition & 1 deletion tests/test_clustering/test_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


@pytest.fixture
def eucl_ts() -> TSDataset:
def eucl_ts(random_seed) -> TSDataset:
df = pd.DataFrame()
for i in range(1, 8):
date_range = pd.date_range("2020-01-01", "2020-05-01")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_datasets/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


@pytest.fixture()
def tsdf_with_exog() -> TSDataset:
def tsdf_with_exog(random_seed) -> TSDataset:
df_1 = pd.DataFrame.from_dict({"timestamp": pd.date_range("2021-02-01", "2021-07-01", freq="1d")})
df_2 = pd.DataFrame.from_dict({"timestamp": pd.date_range("2021-02-01", "2021-07-01", freq="1d")})
df_1["segment"] = "Moscow"
Expand Down
8 changes: 4 additions & 4 deletions tests/test_model_selection/test_backtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@


@pytest.fixture
def imbalanced_tsdf() -> TSDataset:
def imbalanced_tsdf(random_seed) -> TSDataset:
"""Generate two series with big time range difference"""
df1 = pd.DataFrame({"timestamp": pd.date_range("2021-01-25", "2021-02-01", freq="D")})
df1["segment"] = "segment_1"
Expand All @@ -43,7 +43,7 @@ def imbalanced_tsdf() -> TSDataset:


@pytest.fixture()
def big_daily_example_tsdf() -> TSDataset:
def big_daily_example_tsdf(random_seed) -> TSDataset:
df1 = pd.DataFrame()
df1["timestamp"] = pd.date_range(start="2019-01-01", end="2020-04-01", freq="D")
df1["segment"] = "segment_1"
Expand All @@ -62,7 +62,7 @@ def big_daily_example_tsdf() -> TSDataset:


@pytest.fixture()
def example_tsdf() -> TSDataset:
def example_tsdf(random_seed) -> TSDataset:
df1 = pd.DataFrame()
df1["timestamp"] = pd.date_range(start="2020-01-01", end="2020-02-01", freq="H")
df1["segment"] = "segment_1"
Expand All @@ -81,7 +81,7 @@ def example_tsdf() -> TSDataset:


@pytest.fixture()
def big_example_tsdf() -> TSDataset:
def big_example_tsdf(random_seed) -> TSDataset:
df1 = pd.DataFrame()
df1["timestamp"] = pd.date_range(start="2020-01-01", end="2021-02-01", freq="D")
df1["segment"] = "segment_1"
Expand Down
8 changes: 0 additions & 8 deletions tests/test_models/nn/test_deepar.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
import random

import numpy as np
import pytest
import torch
from pytorch_forecasting.data import GroupNormalizer

from etna.datasets.tsdataset import TSDataset
Expand Down Expand Up @@ -39,10 +35,6 @@ def test_deepar_model_run_weekly_overfit(weekly_period_df, horizon):
When:
Then: I get {horizon} periods per dataset as a forecast and they "the same" as past
"""
SEED = 121 # noqa: N806
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)

ts_start = sorted(set(weekly_period_df.timestamp))[-horizon]
train, test = (
Expand Down
9 changes: 0 additions & 9 deletions tests/test_models/nn/test_tft.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
import random

import numpy as np
import pytest
import torch

from etna.datasets.tsdataset import TSDataset
from etna.metrics import MAE
Expand Down Expand Up @@ -41,11 +37,6 @@ def test_tft_model_run_weekly_overfit(weekly_period_df, horizon):
Then: I get {horizon} periods per dataset as a forecast and they "the same" as past
"""

SEED = 121 # noqa: N806
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)

ts_start = sorted(set(weekly_period_df.timestamp))[-horizon]
train, test = (
weekly_period_df[lambda x: x.timestamp < ts_start],
Expand Down
6 changes: 2 additions & 4 deletions tests/test_models/test_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,16 @@ def linear_segments_by_parameters(alpha_values, intercept_values):


@pytest.fixture()
def linear_segments_ts_unique():
def linear_segments_ts_unique(random_seed):
"""Create TSDataset that represents 3 segments with unique linear dependency on lags in each."""
np.random.seed(42)
alpha_values = [np.random.rand() * 4 - 2 for _ in range(3)]
intercept_values = [np.random.rand() * 4 + 1 for _ in range(3)]
return linear_segments_by_parameters(alpha_values, intercept_values)


@pytest.fixture()
def linear_segments_ts_common():
def linear_segments_ts_common(random_seed):
"""Create TSDataset that represents 3 segments with common linear dependency on lags in each."""
np.random.seed(42)
alpha_values = [np.random.rand() * 4 - 2] * 3
intercept_values = [np.random.rand() * 4 + 1 for _ in range(3)]
return linear_segments_by_parameters(alpha_values, intercept_values)
Expand Down
32 changes: 0 additions & 32 deletions tests/test_models/test_sarimax_model.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
import numpy as np

from etna.datasets import TSDataset
from etna.metrics import MAE
from etna.models import SARIMAXModel
from etna.transforms import TheilSenTrendTransform


def test_sarimax_forecaster_run(example_tsds):
Expand Down Expand Up @@ -39,30 +34,3 @@ def test_sarimax_forecaster_run_with_reg(example_reg_tsds):

assert not res.isnull().values.any()
assert len(res) == 14


def test_compare_sarimax_vanilla_reg(example_reg_tsds):
horizon = 24
example_tsds = TSDataset(example_reg_tsds[:, :, "target"], freq="D")
train, test = example_tsds.train_test_split(
train_start=None, train_end="2020-01-31", test_start="2020-02-01", test_end="2020-02-24"
)
model = SARIMAXModel()
model.fit(train)
future_ts = train.make_future(future_steps=horizon)
vanilla_result = model.forecast(future_ts)

train, test = example_reg_tsds.train_test_split(
train_start=None, train_end="2020-01-31", test_start="2020-02-01", test_end="2020-02-24"
)
prep = TheilSenTrendTransform(in_column="target")
train.fit_transform([prep])
model = SARIMAXModel()
model.fit(train)
future_ts = train.make_future(future_steps=horizon)
reg_result = model.forecast(future_ts)

van_acc = np.array(list(MAE()(test, vanilla_result).values()))
reg_acc = np.array(list(MAE()(test, reg_result).values()))

assert np.all(van_acc < reg_acc)
2 changes: 1 addition & 1 deletion tests/test_transforms/test_impute_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_wrong_init_two_segments(all_date_present_df_two_segments):


@pytest.fixture()
def df_with_missing_value_x_index(all_date_present_df: pd.DataFrame) -> Tuple[pd.DataFrame, int]:
def df_with_missing_value_x_index(random_seed, all_date_present_df: pd.DataFrame) -> Tuple[pd.DataFrame, int]:
"""Create pd.DataFrame that contains some target on given range of dates with one gap."""
# index cannot be first or last value,
# because Imputer should know starting and ending dates
Expand Down
4 changes: 2 additions & 2 deletions tests/test_transforms/test_log_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


@pytest.fixture
def non_positive_df_() -> pd.DataFrame:
def non_positive_df_(random_seed) -> pd.DataFrame:
"""Generate dataset with non-positive target."""
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
Expand All @@ -26,7 +26,7 @@ def non_positive_df_() -> pd.DataFrame:


@pytest.fixture
def positive_df_() -> pd.DataFrame:
def positive_df_(random_seed) -> pd.DataFrame:
"""Generate dataset with positive target."""
periods = 100
df1 = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", periods=periods)})
Expand Down

0 comments on commit 50f2074

Please sign in to comment.