Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve setup #24

Merged
merged 8 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion ceruleo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@
CACHE_PATH.mkdir(parents=True, exist_ok=True)


__version__ = "2.0.3"
__version__ = "2.0.4"
9 changes: 4 additions & 5 deletions ceruleo/dataset/analysis/numerical_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@
import antropy as ant
import numpy as np
import pandas as pd

from pyparsing import col
from scipy.stats import spearmanr
from ceruleo.dataset.transformed import TransformedDataset
from tqdm.auto import tqdm
from sklearn.feature_selection import mutual_info_regression
from tqdm.auto import tqdm
from uncertainties import ufloat

from ceruleo.dataset.transformed import TransformedDataset
from ceruleo.dataset.ts_dataset import AbstractLivesDataset
from ceruleo.dataset.utils import iterate_over_features_and_target
from uncertainties import ufloat


def entropy(s: np.ndarray)-> float:
Expand Down
21 changes: 13 additions & 8 deletions ceruleo/graphics/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,18 @@
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

import seaborn as sns

from ceruleo.dataset.transformed import TransformedDataset
from ceruleo.graphics.utils.curly_brace import curlyBrace
from ceruleo.results.results import (FittedLife, PredictionResult,
models_cv_results, split_lives,
unexpected_breaks, unexploited_lifetime)
from ceruleo.results.results import (
FittedLife,
PredictionResult,
models_cv_results,
split_lives,
unexpected_breaks,
unexploited_lifetime,
)


def plot_lives(ds: TransformedDataset):
Expand Down Expand Up @@ -116,7 +121,7 @@ def set_box_color(bp, color):

max_x = np.max(ticks) + 1
ax.set_xlabel("RUL" + ("" if x_axis_label is None else x_axis_label))
ax.set_ylabel("$y - \hat{y}$" + ("" if y_axis_label is None else y_axis_label))
ax.set_ylabel(r"$y - \hat{y}$" + ("" if y_axis_label is None else y_axis_label))
ax.set_xticks(ticks)
ax.set_xticklabels(labels)
ax.legend()
Expand Down Expand Up @@ -263,7 +268,7 @@ def _cv_barplot_errors_wrt_RUL_multiple_models(
dx += bar_group_width + group_separation

ax.set_xlabel("RUL" + ("" if x_axis_label is None else x_axis_label))
ax.set_ylabel("$y - \hat{y}$" + ("" if y_axis_label is None else y_axis_label))
ax.set_ylabel(r"$y - \hat{y}$" + ("" if y_axis_label is None else y_axis_label))
ax.set_xticks(ticks)
ax.set_xticklabels(labels)
ax.legend()
Expand Down Expand Up @@ -370,7 +375,7 @@ def _cv_shadedline_plot_errors_wrt_RUL_multiple_models(
ticks.append(x)

ax.set_xlabel("RUL" + ("" if x_axis_label is None else x_axis_label))
ax.set_ylabel("$y - \hat{y}$" + ("" if y_axis_label is None else y_axis_label))
ax.set_ylabel(r"$y - \hat{y}$" + ("" if y_axis_label is None else y_axis_label))
ax.set_xticks(ticks)
ax.set_xticklabels(labels)
ax.legend()
Expand Down Expand Up @@ -737,7 +742,7 @@ def plot_predictions(
ax.set_ylabel(units)
ax.set_xlabel(units)
legend = ax.legend()
for l in legend.legendHandles:
for l in legend.legend_handles:
l.set_markersize(6)


Expand Down
3 changes: 1 addition & 2 deletions ceruleo/results/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,9 @@
"""
import logging
from dataclasses import dataclass
from typing import Callable, Dict, List, Optional, Tuple, Union
from typing import Dict, List, Optional, Tuple, Union

import numpy as np
import pandas as pd
from ceruleo.results.picewise_regression import (PiecewesieLinearFunction,
PiecewiseLinearRegression)
from sklearn.metrics import mean_absolute_error as mae
Expand Down
24 changes: 14 additions & 10 deletions ceruleo/transformation/features/denoising.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@
class SavitzkyGolayTransformer(TransformerStep):
"""Filter each feature using LOESS

Parameters:
Parameters:
window: window size of the filter
order: Order of the filter, by default 2
name: Step name
"""

def __init__(self, window: int, order: int = 2, name: Optional[str] = None):

super().__init__(name=name)
self.window = window
self.order = order
Expand Down Expand Up @@ -66,16 +65,17 @@ def __init__(
min_periods: int = 15,
name: Optional[str] = None,
):

super().__init__(name=name)
self.window = window
self.min_periods = min_periods
self.center = center

def transform(self, X: pd.DataFrame, y=None) -> pd.DataFrame:
return X.rolling(
self.window, min_periods=self.min_periods, center=self.center
).mean(skip_na=True)
self.window,
min_periods=self.min_periods,
center=self.center,
).mean(numeric_only=True)


class MedianFilter(TransformerStep):
Expand All @@ -97,7 +97,9 @@ def __init__(self, window: int, min_periods: int = 15, name: Optional[str] = Non
self.min_periods = min_periods

def transform(self, X: pd.DataFrame, y=None) -> pd.DataFrame:
return X.rolling(self.window, min_periods=self.min_periods).median(skip_na=True)
return X.rolling(self.window, min_periods=self.min_periods).median(
numeric_only=True
)


class OneDimensionalKMeans(TransformerStep):
Expand All @@ -117,7 +119,9 @@ def __init__(self, n_clusters: int = 5, name: Optional[str] = None):
def partial_fit(self, X):
if len(self.clusters) == 0:
for c in X.columns:
self.clusters[c] = MiniBatchKMeans(n_clusters=self.n_clusters)
self.clusters[c] = MiniBatchKMeans(
n_clusters=self.n_clusters, n_init="auto"
)

for c in X.columns:
self.clusters[c].partial_fit(np.atleast_2d(X[c]).T)
Expand Down Expand Up @@ -160,7 +164,7 @@ class MultiDimensionalKMeans(TransformerStep):
def __init__(self, n_clusters: int = 5, name: Optional[str] = None):
super().__init__(name=name)
self.n_clusters = n_clusters
self.clusters = MiniBatchKMeans(n_clusters=self.n_clusters)
self.clusters = MiniBatchKMeans(n_clusters=self.n_clusters, n_init="auto")

def partial_fit(self, X):
self.clusters.partial_fit(X)
Expand Down Expand Up @@ -203,7 +207,7 @@ def __init__(self, span: float, name: Optional[str] = None):
self.span = span

def transform(self, X: pd.DataFrame, y=None) -> pd.DataFrame:
return X.ewm(span=self.span).mean(skip_na=True)
return X.ewm(span=self.span, ignore_na=True).mean()


class GaussianFilter(TransformerStep):
Expand All @@ -228,7 +232,7 @@ def __init__(
min_points: int = 1,
center: bool = False,
*args,
**kwargs
**kwargs,
):
super().__init__(**kwargs)
self.window_size = window_size
Expand Down