Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set Ray dependency to 0.5.2 #78

Merged
merged 9 commits into from
Sep 17, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 7 additions & 53 deletions .travis/install-dependencies.sh
Original file line number Diff line number Diff line change
@@ -1,62 +1,16 @@
#!/usr/bin/env bash

ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)

echo "PYTHON is $PYTHON"

platform="unknown"
unamestr="$(uname)"
if [[ "$unamestr" == "Linux" ]]; then
echo "Platform is linux."
platform="linux"
elif [[ "$unamestr" == "Darwin" ]]; then
echo "Platform is macosx."
platform="macosx"
else
echo "Unrecognized platform."
exit 1
fi

if [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "linux" ]]; then
# Install miniconda.
if [[ "$PYTHON" == "2.7" ]]; then
wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh -nv
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
pip install -q pandas==0.22 feather-format lxml openpyxl xlrd
# Install ray from its latest wheels
pip install -q -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp27-cp27mu-manylinux1_x86_64.whl
elif [[ "$PYTHON" == "3.6" ]] && [[ "$platform" == "linux" ]]; then
# Install miniconda.
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh -nv
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
pip install -q pandas==0.22 feather-format lxml openpyxl xlrd
# Install ray from its latest wheels
pip install -q -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp36-cp36m-manylinux1_x86_64.whl
elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then
# Install miniconda.
wget https://repo.continuum.io/miniconda/Miniconda2-latest-MacOSX-x86_64.sh -O miniconda.sh -nv
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
pip install -q pandas==0.22 feather-format lxml openpyxl xlrd
# Install ray from its latest wheels
pip install -q -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp27-cp27m-macosx_10_6_intel.whl
elif [[ "$PYTHON" == "3.6" ]] && [[ "$platform" == "macosx" ]]; then
# Install miniconda.
wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh -nv
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
pip install -q pandas==0.22 feather-format lxml openpyxl xlrd
# Install ray from its latest wheels
pip install -q -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp36-cp36m-macosx_10_6_intel.whl
elif [[ "$LINT" == "1" ]]; then
# Install miniconda.

elif [[ "$PYTHON" == "3.6" ]] || [[ "$LINT" == "1" ]]; then
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh -nv
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
# Install Python linting tools.
pip install -q flake8 flake8-comprehensions yapf
else
echo "Unrecognized environment."
exit 1
conda install -y python==3.6.5
fi

pip install -r requirements.txt
pip install -q pytest flake8 flake8-comprehensions yapf feather-format lxml openpyxl xlrd numpy
4 changes: 2 additions & 2 deletions modin/pandas/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ def name_incrementer(i):
# Put all of the DataFrames into Ray format
# TODO just partition the DataFrames instead of building a new Ray DF.
objs = [
DataFrame(obj)
if isinstance(obj, (pandas.DataFrame, pandas.Series)) else obj
DataFrame(obj) if isinstance(obj, (pandas.DataFrame,
pandas.Series)) else obj
for obj in objs
]

Expand Down
29 changes: 16 additions & 13 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,9 @@ def col_dots_builder(full_front, full_back):
col_dots = pandas.Series(["..." for _ in range(len(full_front))])
col_dots.index = index_of_head
col_dots.name = "..."
return pandas.concat(
[full_front, col_dots, full_back], axis=1, copy=False)
return pandas.concat([full_front, col_dots, full_back],
axis=1,
copy=False)

# If we don't exceed the maximum number of values on either dimension
if len(self.index) <= 60 and len(self.columns) <= 20:
Expand Down Expand Up @@ -524,8 +525,8 @@ def _arithmetic_helper(self, remote_func, axis, level=None):

oid_series = ray.get(
_map_partitions(
remote_func, self._col_partitions
if axis == 0 else self._row_partitions))
remote_func,
self._col_partitions if axis == 0 else self._row_partitions))

if axis == 0:
# We use the index to get the internal index.
Expand All @@ -538,8 +539,9 @@ def _arithmetic_helper(self, remote_func, axis, level=None):
df.index = \
this_partition[this_partition.isin(df.index)].index

result_series = pandas.concat(
[obj[0] for obj in oid_series], axis=0, copy=False)
result_series = pandas.concat([obj[0] for obj in oid_series],
axis=0,
copy=False)
else:
result_series = pandas.concat(oid_series, axis=0, copy=False)
result_series.index = self.index
Expand Down Expand Up @@ -1507,9 +1509,9 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs):
raise KeyError("Only a column name can be used for the key in"
"a dtype mappings argument.")
columns = list(dtype.keys())
col_idx = [(self.columns.get_loc(columns[i]), columns[i]) if
columns[i] in self.columns else (columns[i], columns[i])
for i in range(len(columns))]
col_idx = [(self.columns.get_loc(columns[i]),
columns[i]) if columns[i] in self.columns else
(columns[i], columns[i]) for i in range(len(columns))]
new_dict = {}
for idx, key in col_idx:
new_dict[idx] = dtype[key]
Expand Down Expand Up @@ -3667,8 +3669,9 @@ def check_bad_dtype(t):
if next((True for t in self.dtypes if check_bad_dtype(t)),
False):
dtype = next(t for t in self.dtypes if check_bad_dtype(t))
raise ValueError("Cannot compare type '{}' with type '{}'"
.format(type(dtype), float))
raise ValueError(
"Cannot compare type '{}' with type '{}'".format(
type(dtype), float))
else:
# Normally pandas returns this near the end of the quantile, but we
# can't afford the overhead of running the entire operation before
Expand Down Expand Up @@ -5896,8 +5899,8 @@ def __neg__(self):
for t in self.dtypes:
if not (is_bool_dtype(t) or is_numeric_dtype(t)
or is_timedelta64_dtype(t)):
raise TypeError("Unary negative expects numeric dtype, not {}"
.format(t))
raise TypeError(
"Unary negative expects numeric dtype, not {}".format(t))

new_block_partitions = np.array([
_map_partitions(lambda df: df.__neg__(), block)
Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,8 +568,8 @@ def _apply_df_function(self, f, concat_axis=None):
new_df._block_partitions = np.array([
_reindex_helper._submit(
args=tuple([
new_df.columns, self._columns, 0,
new_df._block_partitions.shape[1]
new_df.columns, self._columns, 0, new_df.
_block_partitions.shape[1]
] + block.tolist()),
num_return_vals=new_df._block_partitions.shape[1])
for block in new_df._block_partitions
Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/index_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@ def groupby(self,
return assignments_df

def partition_series(self, partition):
return self[self._coord_df['partition'] == partition,
'index_within_partition']
return self[self._coord_df['partition'] ==
partition, 'index_within_partition']

def __len__(self):
return int(sum(self._lengths))
Expand Down
13 changes: 7 additions & 6 deletions modin/pandas/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,9 @@ def _write_items(self, row_lookup, col_lookup, item):
row_idx = row_data['index_within_partition']
col_idx = col_data['index_within_partition']

item_to_write = item[row_item_index:row_item_index + row_len,
col_item_index:col_item_index + col_len]
item_to_write = item[row_item_index:row_item_index +
row_len, col_item_index:col_item_index +
col_len]

result_oid = writer.remote(block_oid, row_idx, col_idx,
item_to_write)
Expand Down Expand Up @@ -396,15 +397,15 @@ def _enlarge_axis(self, locator, axis):
]])
nan_blks = nan_blks.T if not row_based_bool else nan_blks

self.block_oids = np.concatenate(
[self.block_oids, nan_blks], axis=0 if row_based_bool else 1)
self.block_oids = np.concatenate([self.block_oids, nan_blks],
axis=0 if row_based_bool else 1)

# 3. Prepare metadata to return
nan_coord_df = pandas.DataFrame(data=[
{
'': name,
'partition': blk_part_n_row
if row_based_bool else blk_part_n_col,
'partition':
blk_part_n_row if row_based_bool else blk_part_n_col,
'index_within_partition': i
} for name, i in zip(nan_labels, np.arange(num_nan_labels))
]).set_index('')
Expand Down
20 changes: 10 additions & 10 deletions modin/pandas/test/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1870,13 +1870,12 @@ def test_fillna_dataframe():
ray_df = pd.DataFrame(frame_data, index=list('VWXYZ'))

# df2 may have different index and columns
df2 = pandas.DataFrame(
{
'a': [np.nan, 10, 20, 30, 40],
'b': [50, 60, 70, 80, 90],
'foo': ['bar'] * 5
},
index=list('VWXuZ'))
df2 = pandas.DataFrame({
'a': [np.nan, 10, 20, 30, 40],
'b': [50, 60, 70, 80, 90],
'foo': ['bar'] * 5
},
index=list('VWXuZ'))

# only those columns and indices which are shared get filled
assert ray_df_equals_pandas(ray_df.fillna(df2), df.fillna(df2))
Expand Down Expand Up @@ -2567,7 +2566,8 @@ def test_rename_sanity():
}).index)

tm.assert_index_equal(
ray_df.rename(index=str.upper).index, df.rename(index=str.upper).index)
ray_df.rename(index=str.upper).index,
df.rename(index=str.upper).index)

# have to pass something
pytest.raises(TypeError, ray_df.rename)
Expand Down Expand Up @@ -3145,8 +3145,8 @@ def test_update():
df = pd.DataFrame([[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3],
[1.5, np.nan, 3]])

other = pd.DataFrame(
[[3.6, 2., np.nan], [np.nan, np.nan, 7]], index=[1, 3])
other = pd.DataFrame([[3.6, 2., np.nan], [np.nan, np.nan, 7]],
index=[1, 3])

df.update(other)

Expand Down
20 changes: 9 additions & 11 deletions modin/pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,8 @@ def _repartition_coord_df(old_coord_df, npartition):
passed in.
"""
length = len(old_coord_df)
chunksize = (len(old_coord_df) // npartition
if len(old_coord_df) % npartition == 0 else
len(old_coord_df) // npartition + 1)
chunksize = (len(old_coord_df) // npartition if len(old_coord_df) %
npartition == 0 else len(old_coord_df) // npartition + 1)

# genereate array([0, 0, 0, 1, 1, 1, 2])
partitions = np.repeat(np.arange(npartition), chunksize)[:length]
Expand All @@ -314,12 +313,11 @@ def _repartition_coord_df(old_coord_df, npartition):
final_n_partition = np.max(partitions)
idx_in_part = np.tile(np.arange(chunksize), final_n_partition + 1)[:length]

final_df = pandas.DataFrame(
{
'partition': partitions,
'index_within_partition': idx_in_part
},
index=old_coord_df.index)
final_df = pandas.DataFrame({
'partition': partitions,
'index_within_partition': idx_in_part
},
index=old_coord_df.index)

return final_df

Expand Down Expand Up @@ -767,8 +765,8 @@ def apply_suffix(s):
block_df_oid = blk_partitions[row_idx, col_idx]
block_df = ray.get(block_df_oid)
chunk = block_df.iloc[row_df[apply_suffix(
'index_within_partition')], col_df[apply_suffix(
'index_within_partition')]]
'index_within_partition'
)], col_df[apply_suffix('index_within_partition')]]
this_column.append(chunk)
df_columns.append(pandas.concat(this_column, axis=1))
final_df = pandas.concat(df_columns)
Expand Down
11 changes: 1 addition & 10 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,2 @@
pandas==0.22
# Install Ray from latest wheels
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp27-cp27mu-manylinux1_x86_64.whl ; sys_platform == "linux2" and python_version == "2.7"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp33-cp33m-manylinux1_x86_64.whl ; sys_platform == "linux" and python_version == "3.3"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp34-cp34m-manylinux1_x86_64.whl ; sys_platform == "linux" and python_version == "3.4"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp35-cp35m-manylinux1_x86_64.whl ; sys_platform == "linux" and python_version == "3.5"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp36-cp36m-manylinux1_x86_64.whl ; sys_platform == "linux" and python_version == "3.6"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp27-cp27m-macosx_10_6_intel.whl ; sys_platform == "darwin" and python_version == "2.7"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp34-cp34m-macosx_10_6_intel.whl ; sys_platform == "darwin" and python_version == "3.4"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp35-cp35m-macosx_10_6_intel.whl ; sys_platform == "darwin" and python_version == "3.5"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp36-cp36m-macosx_10_6_intel.whl ; sys_platform == "darwin" and python_version == "3.6"
ray==0.5.2