Skip to content

Commit

Permalink
Fixing Head and Tail Functions (#136)
Browse files Browse the repository at this point in the history
* fixes for head and tail

* simple fix for empty dataframes

* dataframe dtype fix

* formatting

* passing empty Index for empty dataframes

* Minor formatting change
  • Loading branch information
osalpekar authored and devin-petersohn committed Oct 8, 2018
1 parent 9203516 commit 520ebab
Showing 1 changed file with 19 additions and 4 deletions.
23 changes: 19 additions & 4 deletions modin/data_management/data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1751,6 +1751,8 @@ def head(self, n):
# We grab the front if it is transposed and flag as transposed so that
# we are not physically updating the data from this manager. This
# allows the implementation to stay modular and reduces data copying.
if n < 0:
n = max(0, len(self.index) + n)
if self._is_transposed:
# Transpose the blocks back to their original orientation first to
# ensure that we extract the correct data on each node. The index
Expand Down Expand Up @@ -1779,17 +1781,23 @@ def tail(self, n):
DataManager containing the last n rows of the original DataManager.
"""
# See head for an explanation of the transposed behavior
if n < 0:
n = max(0, len(self.index) + n)
if n == 0:
index = pandas.Index([])
else:
index = self.index[-n:]
if self._is_transposed:
result = self.__constructor__(
self.data.transpose().take(1, -n).transpose(),
self.index[-n:],
index,
self.columns,
self._dtype_cache,
)
result._is_transposed = True
else:
result = self.__constructor__(
self.data.take(0, -n), self.index[-n:], self.columns, self._dtype_cache
self.data.take(0, -n), index, self.columns, self._dtype_cache
)

return result
Expand Down Expand Up @@ -1867,8 +1875,15 @@ def to_pandas(self):
Pandas DataFrame of the DataManager.
"""
df = self.data.to_pandas(is_transposed=self._is_transposed)
df.index = self.index
df.columns = self.columns
if df.empty:
dtype_dict = {
col_name: pandas.Series(dtype=self.dtypes[col_name])
for col_name in self.columns
}
df = pandas.DataFrame(dtype_dict, self.index)
else:
df.index = self.index
df.columns = self.columns
return df

@classmethod
Expand Down

0 comments on commit 520ebab

Please sign in to comment.