-
Notifications
You must be signed in to change notification settings - Fork 184
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix incorrect propagation of dtype in Series normalize and other methods #46
Changes from 2 commits
a68bb18
7925a2b
f4573c1
688cd6d
a9ef01f
5876e5c
a71869a
2013f97
c20978d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,7 +43,7 @@ def populateParamsFromFirstRecord(self): | |
self._dtype = str(record[1].dtype) | ||
return record | ||
|
||
def __finalize__(self, other): | ||
def __finalize__(self, other, nopropagate=()): | ||
""" | ||
Lazily propagate attributes from other to self, only if attributes | ||
are not already defined in self | ||
|
@@ -53,11 +53,15 @@ def __finalize__(self, other): | |
other : the object from which to get the attributes that we are going | ||
to propagate | ||
|
||
nopropagate : iterable of string attribute names (with underscores), default empty tuple | ||
attributes found in nopropagate will *not* have their values propagated forward from self, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't it "to self" from other? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doh. You're right. By the time |
||
but will keep their existing values, even if these are None | ||
""" | ||
if isinstance(other, Data): | ||
for name in self._metadata: | ||
if (getattr(other, name, None) is not None) and (getattr(self, name, None) is None): | ||
object.__setattr__(self, name, getattr(other, name, None)) | ||
if not name in nopropagate: | ||
if (getattr(other, name, None) is not None) and (getattr(self, name, None) is None): | ||
object.__setattr__(self, name, getattr(other, name, None)) | ||
return self | ||
|
||
@property | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -486,7 +486,7 @@ def subtract(self, val): | |
|
||
return self.apply(lambda x: x - val) | ||
|
||
def apply(self, func): | ||
def apply(self, func, expectedDtype=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might prefer to call this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was trying to make clear with the parameter name that passing in a |
||
""" | ||
Apply a function to all images / volumes, | ||
otherwise perserving attributes | ||
|
@@ -495,8 +495,20 @@ def apply(self, func): | |
---------- | ||
func : function | ||
Function to apply | ||
expectedDtype : numpy dtype or dtype specifier, or None (default), or string 'unset' or 'same' | ||
Numpy dtype expected from output of func. This will be set as the dtype attribute | ||
of the output Data object. If 'same', then the resulting `dtype` will be the same as that of `self`. If | ||
the string 'unset' or None is passed, the `dtype` of the output will be lazily determined as needed. Note | ||
that this argument, if passed, does not *enforce* that the function output will actually be of the given | ||
dtype. If in doubt, leaving this as None is the safest thing to do. | ||
""" | ||
return self._constructor(self.rdd.mapValues(func)).__finalize__(self) | ||
rdd = self.rdd.mapValues(func) | ||
if isinstance(expectedDtype, basestring): | ||
if expectedDtype == 'same': | ||
expectedDtype = self._dtype | ||
elif expectedDtype == 'unset': | ||
expectedDtype = None | ||
return self._constructor(rdd, dtype=expectedDtype).__finalize__(self, nopropagate=('_dtype',)) | ||
|
||
|
||
class _BlockMemoryAsSequence(object): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are we formally requiring that attributes have underscores in their names?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Eh. Was just trying to indicate here that the string attribute name to be passed in should be the "private" (e.g.
_dtype
) version rather than the public (dtype
) version. Seems like a potential point of confusion. Can try to clarify this.