Skip to content

Commit

Permalink
BUG: DataFrame can handle lists of tuples just like Series, a bit of …
Browse files Browse the repository at this point in the history
…refactoring for code reuse. GH #293
  • Loading branch information
wesm committed Nov 4, 2011
1 parent 5abb534 commit e63cbd7
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 55 deletions.
20 changes: 4 additions & 16 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3199,6 +3199,8 @@ def _rec_to_dict(arr):
return columns, sdict

def _homogenize(data, index, columns, dtype=None):
from pandas.core.series import _sanitize_array

homogenized = {}

if dtype is not None:
Expand All @@ -3225,23 +3227,9 @@ def _homogenize(data, index, columns, dtype=None):
else:
if isinstance(v, dict):
v = [v.get(i, nan) for i in index]
elif np.isscalar(v):
_v = np.empty(len(index), dtype=_infer_dtype(v))
_v.fill(v)
v = _v
else:
assert(len(v) == len(index))

# only *attempt* to cast to dtype
try:
arr = np.asarray(v, dtype=dtype)

# prevent NumPy from casting things to string when it shouldn't
if issubclass(arr.dtype.type, basestring):
arr = np.array(v, dtype=object, copy=False)
v = arr
except Exception:
v = np.asarray(v)
v = _sanitize_array(v, index, dtype=dtype, copy=False,
raise_cast_failure=False)

homogenized[k] = v

Expand Down
88 changes: 49 additions & 39 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,49 +104,15 @@ def __new__(cls, data, index=None, dtype=None, name=None, copy=False):
index = Index(sorted(data.keys()))
data = [data.get(idx, np.nan) for idx in index]

try:
subarr = np.array(data, dtype=dtype, copy=copy)
except ValueError:
if dtype:
raise
else: # pragma: no cover
subarr = np.array(data, dtype=object)

if subarr.ndim == 0:
if isinstance(data, list): # pragma: no cover
subarr = np.array(data, dtype=object)
elif index is not None:
value = data

# If we create an empty array using a string to infer
# the dtype, NumPy will only allocate one character per entry
# so this is kind of bad. Alternately we could use np.repeat
# instead of np.empty (but then you still don't want things
# coming out as np.str_!
if isinstance(value, basestring) and dtype is None:
dtype = np.object_

if dtype is None:
subarr = np.empty(len(index), dtype=type(value))
else:
subarr = np.empty(len(index), dtype=dtype)
subarr.fill(value)
else:
return subarr.item()
elif subarr.ndim > 1:
if isinstance(data, np.ndarray):
raise Exception('Data must be 1-dimensional')
else:
subarr = _asarray_tuplesafe(data, dtype=dtype)
subarr = _sanitize_array(data, index, dtype, copy,
raise_cast_failure=True)

if not isinstance(subarr, np.ndarray):
return subarr

if index is None:
index = _default_index(len(subarr))

# This is to prevent mixed-type Series getting all casted to
# NumPy string type, e.g. NaN --> '-1#IND'.
if issubclass(subarr.dtype.type, basestring):
subarr = np.array(data, dtype=object, copy=copy)

# Change the class of the array to be the subclass type.
subarr = subarr.view(cls)
subarr.index = index
Expand Down Expand Up @@ -2001,6 +1967,50 @@ def remove_na(arr):
return arr[notnull(arr)]


def _sanitize_array(data, index, dtype=None, copy=False,
raise_cast_failure=False):
try:
subarr = np.array(data, dtype=dtype, copy=copy)
except (ValueError, TypeError):
if dtype and raise_cast_failure:
raise
else: # pragma: no cover
subarr = np.array(data, dtype=object)

if subarr.ndim == 0:
if isinstance(data, list): # pragma: no cover
subarr = np.array(data, dtype=object)
elif index is not None:
value = data

# If we create an empty array using a string to infer
# the dtype, NumPy will only allocate one character per entry
# so this is kind of bad. Alternately we could use np.repeat
# instead of np.empty (but then you still don't want things
# coming out as np.str_!
if isinstance(value, basestring) and dtype is None:
dtype = np.object_

if dtype is None:
subarr = np.empty(len(index), dtype=type(value))
else:
subarr = np.empty(len(index), dtype=dtype)
subarr.fill(value)
else:
return subarr.item()
elif subarr.ndim > 1:
if isinstance(data, np.ndarray):
raise Exception('Data must be 1-dimensional')
else:
subarr = _asarray_tuplesafe(data, dtype=dtype)

# This is to prevent mixed-type Series getting all casted to
# NumPy string type, e.g. NaN --> '-1#IND'.
if issubclass(subarr.dtype.type, basestring):
subarr = np.array(data, dtype=object, copy=copy)

return subarr

def _get_rename_function(mapper):
if isinstance(mapper, (dict, Series)):
def f(x):
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1119,6 +1119,11 @@ def test_constructor_mixed_dict_and_Series(self):
result = DataFrame(data)
self.assert_(result.index.is_monotonic)

def test_constructor_tuples(self):
result = DataFrame({'A': [(1, 2), (3, 4)]})
expected = DataFrame({'A': Series([(1, 2), (3, 4)])})
assert_frame_equal(result, expected)

def test_astype(self):
casted = self.frame.astype(int)
expected = DataFrame(self.frame.values.astype(int),
Expand Down

0 comments on commit e63cbd7

Please sign in to comment.