Skip to content
This repository has been archived by the owner on Jan 13, 2024. It is now read-only.

Commit

Permalink
add logs to categories_to_integers
Browse files Browse the repository at this point in the history
  • Loading branch information
sdpython committed Nov 20, 2016
1 parent 314a767 commit 0c4a754
Showing 1 changed file with 9 additions and 1 deletion.
10 changes: 9 additions & 1 deletion src/ensae_teaching_cs/ml/categories_to_integers.py
Expand Up @@ -40,14 +40,18 @@ class CategoriesToIntegers(BaseEstimator, TransformerMixin):
print(newdf)
"""

def __init__(self, columns=None, remove=None, skip_errors=False, single=False):
def __init__(self, columns=None, remove=None, skip_errors=False, single=False, fLOG=None):
"""
constructor
@param columns specify a columns selection
@param remove modalities to remove
@param skip_errors skip when a new categories appear (no 1)
@param single use a single column per category, do not multiply them for each value
@param fLOG logging function
The logging function displays a message when a new dense and big matrix
is created when it should be sparse. A sparse matrix should be allocated instead.
"""
BaseEstimator.__init__(self)
TransformerMixin.__init__(self)
Expand All @@ -56,6 +60,7 @@ def __init__(self, columns=None, remove=None, skip_errors=False, single=False):
self._p_skip_errors = skip_errors
self._p_remove = remove
self._p_single = single
self.fLOG = fLOG

def __repr__(self):
"""
Expand Down Expand Up @@ -193,6 +198,9 @@ def transform(v, vec):
sch, pos, new_vector = self._schema
vec = new_vector

new_size = X.shape[0] * len(sch)
if new_size >= 2e30 and self.fLOG:
self.fLOG("Allocating {0} floats.".format(new_size))
res = numpy.zeros((X.shape[0], len(sch)))
res.fill(numpy.nan)
b = not self._p_skip_errors
Expand Down

0 comments on commit 0c4a754

Please sign in to comment.