Skip to content

Commit

Permalink
Changing the way we convert to pandas (#152)
Browse files Browse the repository at this point in the history
* Fixing off partitioning issue

* Removing numpy transpose
  • Loading branch information
devin-petersohn authored and osalpekar committed Oct 11, 2018
1 parent 357e2fc commit 80339c8
Showing 1 changed file with 8 additions and 8 deletions.
16 changes: 8 additions & 8 deletions modin/data_management/partitioning/partition_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,26 +424,26 @@ def to_pandas(self, is_transposed=False):
for row in retrieved_objects
for part in row
):
axis = 0
axis = 1
retrieved_objects = np.array(retrieved_objects).T
elif all(
isinstance(part, pandas.DataFrame)
for row in retrieved_objects
for part in row
):
axis = 1
axis = 0
else:
raise ValueError(
"Some partitions contain Series and some contain DataFrames"
)
df_rows = [
pandas.concat([part for part in row], axis=axis)
for row in retrieved_objects
df_columns = [
pandas.concat([part for part in col], axis=axis)
for col in zip(*retrieved_objects)
]
if len(df_rows) == 0:
if len(df_columns) == 0:
return pandas.DataFrame()
else:
return pandas.concat(df_rows)
return pandas.concat(df_columns, axis=1)

@classmethod
def from_pandas(cls, df):
Expand Down Expand Up @@ -551,7 +551,7 @@ def _get_blocks_containing_index(self, axis, index):
cumulative_column_widths = np.array(self.block_widths).cumsum()
block_idx = int(np.digitize(index, cumulative_column_widths))
if block_idx == len(cumulative_column_widths):
block_idx -= 1
block_idx = np.argmax(cumulative_column_widths)
# Compute the internal index based on the previous lengths. This
# is a global index, so we must subtract the lengths first.
internal_idx = (
Expand Down

0 comments on commit 80339c8

Please sign in to comment.