Skip to content

Commit

Permalink
Add RelevanceTable (#249)
Browse files Browse the repository at this point in the history
* Add class

* Add fix

* Upd

* Add relevance

* Add repr

* Fix fixture typing
  • Loading branch information
julia-shenshina committed Nov 2, 2021
1 parent a51a363 commit 35db08a
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 2 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Backtest cli ([#223](https://github.com/tinkoff-ai/etna-ts/pull/223))
- TreeFeatureSelectionTransform ([#229](https://github.com/tinkoff-ai/etna-ts/pull/229))
- Feature relevance table calculation ([#227](https://github.com/tinkoff-ai/etna-ts/pull/227))
- Feature relevance table calculation ([#227](https://github.com/tinkoff-ai/etna-ts/pull/227), [#249](https://github.com/tinkoff-ai/etna-ts/pull/249))
- Method flatten to TSDataset ([#241](https://github.com/tinkoff-ai/etna-ts/pull/241)

### Changed
Expand Down
2 changes: 2 additions & 0 deletions etna/analysis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from etna.analysis.eda_utils import cross_corr_plot
from etna.analysis.eda_utils import distribution_plot
from etna.analysis.eda_utils import sample_pacf_plot
from etna.analysis.feature_relevance.relevance import RelevanceTable
from etna.analysis.feature_relevance.relevance import StatisticsRelevanceTable
from etna.analysis.feature_relevance.relevance_table import get_statistics_relevance_table
from etna.analysis.outliers.confidence_interval_outliers import get_anomalies_confidence_interval
from etna.analysis.outliers.density_outliers import get_anomalies_density
Expand Down
2 changes: 2 additions & 0 deletions etna/analysis/feature_relevance/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from etna.analysis.feature_relevance.relevance import RelevanceTable
from etna.analysis.feature_relevance.relevance import StatisticsRelevanceTable
from etna.analysis.feature_relevance.relevance_table import get_statistics_relevance_table
52 changes: 52 additions & 0 deletions etna/analysis/feature_relevance/relevance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from abc import ABC
from abc import abstractmethod

import pandas as pd

from etna.analysis.feature_relevance.relevance_table import get_statistics_relevance_table
from etna.core.mixins import BaseMixin


class RelevanceTable(ABC, BaseMixin):
"""Abstract class for relevance table computation."""

def __init__(self, greater_is_better: bool):
"""Init RelevanceTable.
Parameters
----------
greater_is_better:
bool flag, if True the biggest value in relevance table corresponds to the most important exog feature
"""
self.greater_is_better = greater_is_better

@abstractmethod
def __call__(self, df: pd.DataFrame, df_exog: pd.DataFrame, **kwargs) -> pd.DataFrame:
"""Compute relevance table.
For each series in df compute relevance of corresponding series in df_exog.
Parameters
----------
df:
dataframe with series that will be used as target
df_exog:
dataframe with series to compute relevance for df
Returns
-------
relevance table: pd.DataFrame
dataframe of shape n_segment x n_exog_series, relevance_table[i][j] contains relevance of j-th df_exog series to i-th df series
"""
pass


class StatisticsRelevanceTable(RelevanceTable):
"""StatisticsRelevanceTable builds feature relevance table with tsfresh statistics."""

def __init__(self):
super().__init__(greater_is_better=False)

def __call__(self, df: pd.DataFrame, df_exog: pd.DataFrame, **kwargs) -> pd.DataFrame:
"""Compute feature relevance table with etna.analysis.get_statistics_relevance_table method."""
table = get_statistics_relevance_table(df=df, df_exog=df_exog)
return table
4 changes: 3 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Tuple

import numpy as np
import pandas as pd
import pytest
Expand Down Expand Up @@ -377,7 +379,7 @@ def big_example_tsdf(random_seed) -> TSDataset:


@pytest.fixture
def simple_df_relevance() -> TSDataset:
def simple_df_relevance() -> Tuple[pd.DataFrame, pd.DataFrame]:
timestamp = pd.date_range("2021-01-01", "2021-02-01")
tmp = np.random.random(len(timestamp))

Expand Down
8 changes: 8 additions & 0 deletions tests/test_analysis/test_feature_relevance/test_relevance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from etna.analysis.feature_relevance import StatisticsRelevanceTable


def test_statistics_relevance_table(simple_df_relevance):
rt = StatisticsRelevanceTable()
assert not rt.greater_is_better
df, df_exog = simple_df_relevance
assert rt(df=df, df_exog=df_exog).shape == (2, 2)

0 comments on commit 35db08a

Please sign in to comment.