Skip to content

Commit

Permalink
Merge pull request #38 from naveenkaushik2504/master
Browse files Browse the repository at this point in the history
Issue #27 changes
  • Loading branch information
shaypal5 committed Oct 1, 2020
2 parents 32e41e8 + 541bf90 commit 75b15e0
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 44 deletions.
14 changes: 9 additions & 5 deletions pdpipe/col_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,8 @@ def _transform(self, df, verbose):
except KeyError: # pragma: no cover
raise PipelineApplicationError((
"Missing encoder for column {} when applying a fitted "
"OneHotEncode pipeline stage!").format(colname))
"OneHotEncode pipeline stage by class {} !")
.format(colname, self.__class__))
res_cols = col.apply(encoder)
for res_col in res_cols:
assign_map[res_col] = res_cols[res_col]
Expand Down Expand Up @@ -692,8 +693,9 @@ def _transform(self, df, verbose):
new_col = self._func(df)
except Exception:
raise PipelineApplicationError(
"Exception raised applying function{} to dataframe.".format(
self._func_desc
"Exception raised applying function {} to "
"dataframe by class {}.".format(
self._func_desc, self.__class__
)
)
if self._follow_column:
Expand Down Expand Up @@ -885,7 +887,8 @@ def _transform(self, df, verbose):
except KeyError: # pragma: no cover
raise PipelineApplicationError((
"Missig column {} when applying a fitted "
"Log pipeline stage!").format(colname))
"Log pipeline stage by class {} !").format(
colname, self.__class__))
loc = df.columns.get_loc(colname) + 1
new_name = colname + "_log"
if self._drop:
Expand All @@ -900,7 +903,8 @@ def _transform(self, df, verbose):
else: # pragma: no cover
raise PipelineApplicationError((
"Missig fitted parameter for column {} when applying a"
" fitted Log pipeline stage!").format(colname))
" fitted Log pipeline stage by class {}!").format(
colname, self.__class__))
# must check not None as neg numbers eval to False
if self._const_shift is not None:
new_col = new_col + self._const_shift
Expand Down
97 changes: 63 additions & 34 deletions pdpipe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
from .exceptions import (
FailedPreconditionError,
UnfittedPipelineStageError,
PipelineApplicationError
)


Expand Down Expand Up @@ -674,16 +675,23 @@ def __timed_fit_transform(self, X, y=None, exraise=None, verbose=None):
inter_x = X
times = []
prev = time.time()
for stage in self._stages:
inter_x = stage.fit_transform(
X=inter_x,
y=None,
exraise=exraise,
verbose=verbose,
)
now = time.time()
times.append(now - prev)
prev = now
for i, stage in enumerate(self._stages):
try:
inter_x = stage.fit_transform(
X=inter_x,
y=None,
exraise=exraise,
verbose=verbose,
)
now = time.time()
times.append(now - prev)
prev = now
except Exception:
raise PipelineApplicationError(
"Exception raised in stage [ {}] {}".format(
i, stage
)
)
self.is_fitted = True
print("\nPipeline total application time: {:.3f}s.\n Details:".format(
sum(times)))
Expand Down Expand Up @@ -723,13 +731,20 @@ def fit_transform(self, X, y=None, exraise=None, verbose=None, time=False):
return self.__timed_fit_transform(
X=X, y=y, exraise=exraise, verbose=verbose)
inter_x = X
for stage in self._stages:
inter_x = stage.fit_transform(
X=inter_x,
y=None,
exraise=exraise,
verbose=verbose,
)
for i, stage in enumerate(self._stages):
try:
inter_x = stage.fit_transform(
X=inter_x,
y=None,
exraise=exraise,
verbose=verbose,
)
except Exception:
raise PipelineApplicationError(
"Exception raised in stage [ {}] {}".format(
i, stage
)
)
self.is_fitted = True
return inter_x

Expand Down Expand Up @@ -775,16 +790,23 @@ def __timed_transform(self, X, y=None, exraise=None, verbose=None):
inter_x = X
times = []
prev = time.time()
for stage in self._stages:
inter_x = stage.transform(
X=inter_x,
y=None,
exraise=exraise,
verbose=verbose,
)
now = time.time()
times.append(now - prev)
prev = now
for i, stage in enumerate(self._stages):
try:
inter_x = stage.transform(
X=inter_x,
y=None,
exraise=exraise,
verbose=verbose,
)
now = time.time()
times.append(now - prev)
prev = now
except Exception:
raise PipelineApplicationError(
"Exception raised in stage [ {}] {}".format(
i, stage
)
)
self.is_fitted = True
print("\nPipeline total application time: {:.3f}s.\n Details:".format(
sum(times)))
Expand Down Expand Up @@ -832,13 +854,20 @@ def transform(self, X, y=None, exraise=None, verbose=None, time=False):
return self.__timed_transform(
X=X, y=y, exraise=exraise, verbose=verbose)
inter_df = X
for stage in self._stages:
inter_df = stage.transform(
X=inter_df,
y=None,
exraise=exraise,
verbose=verbose,
)
for i, stage in enumerate(self._stages):
try:
inter_df = stage.transform(
X=inter_df,
y=None,
exraise=exraise,
verbose=verbose,
)
except Exception:
raise PipelineApplicationError(
"Exception raised in stage [ {}] {}".format(
i, stage
)
)
return inter_df

__call__ = apply
Expand Down
10 changes: 6 additions & 4 deletions pdpipe/sklearn_stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,9 @@ def _fit_transform(self, df, verbose):
)
except Exception:
raise PipelineApplicationError(
"Exception raised when Scale applied to columns {}".format(
self._columns_to_scale
"Exception raised when Scale applied to columns"
" {} by class {}".format(
self._columns_to_scale, self.__class__
)
)
if len(unscaled_cols) > 0:
Expand All @@ -235,8 +236,9 @@ def _transform(self, df, verbose):
)
except Exception:
raise PipelineApplicationError(
"Exception raised when Scale applied to columns {}".format(
self._columns_to_scale
"Exception raised when Scale applied to columns"
" {} by class {}".format(
self._columns_to_scale, self.__class__
)
)
if len(unscaled_cols) > 0:
Expand Down
25 changes: 24 additions & 1 deletion tests/core/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
PdPipelineStage,
PdPipeline
)
from pdpipe import make_pdpipeline
from pdpipe import make_pdpipeline, ColByFrameFunc, ColDrop
from pdpipe.exceptions import PipelineApplicationError


def _test_df():
Expand Down Expand Up @@ -185,3 +186,25 @@ def test_pipeline_slice():
assert 'num1' not in res_df.columns
assert 'num2' not in res_df.columns
assert 'char' in res_df.columns

@pytest.mark.parametrize("time", [True, False])
def test_pipeline_error(time):
"""Test exceptions at pipeline level"""

# test fit
df = _test_df()
func = lambda df: df['num1'] == df['num3']
pipeline = PdPipeline([ColByFrameFunc("Equality", func), ColDrop("B")])
with pytest.raises(PipelineApplicationError):
pipeline.fit(df, verbose=True, time=time)

# test transform
df = _test_df()
with pytest.raises(PipelineApplicationError):
pipeline.transform(df, verbose=True, time=time)

# test fit_transform
df = _test_df()
with pytest.raises(PipelineApplicationError):
pipeline.fit_transform(df, verbose=True, time=time)

0 comments on commit 75b15e0

Please sign in to comment.