-
Notifications
You must be signed in to change notification settings - Fork 647
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
TEST-#6996: Update tests in test_io.py
#6997
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -259,28 +259,36 @@ def _make_parquet_dir( | |
@pytest.mark.filterwarnings(default_to_pandas_ignore_string) | ||
class TestCsv: | ||
# delimiter tests | ||
@pytest.mark.parametrize("sep", [None, "_", ",", ".", "\n"]) | ||
@pytest.mark.parametrize("delimiter", ["_", ",", ".", "\n"]) | ||
@pytest.mark.parametrize("sep", ["_", ",", "."]) | ||
@pytest.mark.parametrize("decimal", [".", "_"]) | ||
@pytest.mark.parametrize("thousands", [None, ",", "_", " "]) | ||
def test_read_csv_delimiters( | ||
self, make_csv_file, sep, delimiter, decimal, thousands | ||
): | ||
def test_read_csv_seps(self, make_csv_file, sep, decimal, thousands): | ||
unique_filename = make_csv_file( | ||
delimiter=delimiter, | ||
delimiter=sep, | ||
thousands_separator=thousands, | ||
decimal_separator=decimal, | ||
) | ||
eval_io( | ||
fn_name="read_csv", | ||
# read_csv kwargs | ||
filepath_or_buffer=unique_filename, | ||
delimiter=delimiter, | ||
sep=sep, | ||
decimal=decimal, | ||
thousands=thousands, | ||
) | ||
|
||
@pytest.mark.parametrize("sep", [None, "_"]) | ||
@pytest.mark.parametrize("delimiter", [".", "_"]) | ||
def test_read_csv_seps_except(self, make_csv_file, sep, delimiter): | ||
unique_filename = make_csv_file(delimiter=delimiter) | ||
eval_io( | ||
fn_name="read_csv", | ||
# read_csv kwargs | ||
filepath_or_buffer=unique_filename, | ||
delimiter=delimiter, | ||
sep=sep, | ||
) | ||
|
||
@pytest.mark.parametrize( | ||
"dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"] | ||
) | ||
|
@@ -303,7 +311,7 @@ def comparator(df1, df2): | |
@pytest.mark.parametrize("header", ["infer", None, 0]) | ||
@pytest.mark.parametrize("index_col", [None, "col1"]) | ||
@pytest.mark.parametrize( | ||
"names", [lib.no_default, ["col1"], ["c1", "c2", "c3", "c4", "c5", "c6", "c7"]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The last column is missing, this is a test to reproduce the same error as in pandas. It is redundant to test this situation with all combinations of parameters in this test. |
||
"names", [lib.no_default, ["col1"], ["c1", "c2", "c3", "c4", "c5", "c6"]] | ||
) | ||
@pytest.mark.parametrize( | ||
"usecols", [None, ["col1"], ["col1", "col2", "col6"], [0, 1, 5]] | ||
|
@@ -794,75 +802,64 @@ def test_read_csv_error_handling(self, on_bad_lines): | |
on_bad_lines=on_bad_lines, | ||
) | ||
|
||
@pytest.mark.parametrize("float_precision", [None, "high", "legacy", "round_trip"]) | ||
def test_python_engine_float_precision_except(self, float_precision): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you elaborate on why python engine only? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We already tested python engine in |
||
eval_io( | ||
fn_name="read_csv", | ||
# read_csv kwargs | ||
filepath_or_buffer=pytest.csvs_names["test_read_csv_regular"], | ||
engine="python", | ||
float_precision=float_precision, | ||
) | ||
|
||
@pytest.mark.parametrize("low_memory", [False, True]) | ||
def test_python_engine_low_memory_except(self, low_memory): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We already tested python engine in |
||
eval_io( | ||
fn_name="read_csv", | ||
# read_csv kwargs | ||
filepath_or_buffer=pytest.csvs_names["test_read_csv_regular"], | ||
engine="python", | ||
low_memory=low_memory, | ||
) | ||
|
||
@pytest.mark.parametrize("delim_whitespace", [True, False]) | ||
def test_delim_whitespace(self, delim_whitespace, tmp_path): | ||
if StorageFormat.get() == "Hdk" and delim_whitespace: | ||
pytest.xfail(reason="https://github.com/modin-project/modin/issues/6999") | ||
str_delim_whitespaces = "col1 col2 col3 col4\n5 6 7 8\n9 10 11 12\n" | ||
unique_filename = get_unique_filename(data_dir=tmp_path) | ||
eval_io_from_str( | ||
str_delim_whitespaces, | ||
unique_filename, | ||
delim_whitespace=delim_whitespace, | ||
) | ||
|
||
# Internal parameters tests | ||
@pytest.mark.parametrize("use_str_data", [True, False]) | ||
@pytest.mark.parametrize("engine", [None, "python", "c"]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Many parameters are not implemented for the python engine (and exceptions are thrown), moved to separate tests above. |
||
@pytest.mark.parametrize("engine", ["c"]) | ||
@pytest.mark.parametrize("delimiter", [",", " "]) | ||
@pytest.mark.parametrize("delim_whitespace", [True, False]) | ||
@pytest.mark.parametrize("low_memory", [True, False]) | ||
@pytest.mark.parametrize("memory_map", [True, False]) | ||
@pytest.mark.parametrize("float_precision", [None, "high", "round_trip"]) | ||
def test_read_csv_internal( | ||
self, | ||
make_csv_file, | ||
use_str_data, | ||
engine, | ||
delimiter, | ||
delim_whitespace, | ||
low_memory, | ||
memory_map, | ||
float_precision, | ||
tmp_path, | ||
): | ||
# In this case raised TypeError: cannot use a string pattern on a bytes-like object, | ||
# so TypeError should be excluded from raising_exceptions list in order to check, that | ||
# the same exceptions are raised by Pandas and Modin | ||
case_with_TypeError_exc = ( | ||
engine == "python" | ||
and delimiter == "," | ||
and delim_whitespace | ||
and low_memory | ||
and memory_map | ||
and float_precision is None | ||
unique_filename = make_csv_file(delimiter=delimiter) | ||
eval_io( | ||
filepath_or_buffer=unique_filename, | ||
fn_name="read_csv", | ||
engine=engine, | ||
delimiter=delimiter, | ||
low_memory=low_memory, | ||
memory_map=memory_map, | ||
float_precision=float_precision, | ||
) | ||
|
||
raising_exceptions = io_ops_bad_exc # default value | ||
if case_with_TypeError_exc: | ||
raising_exceptions = list(io_ops_bad_exc) | ||
raising_exceptions.remove(TypeError) | ||
|
||
kwargs = { | ||
"engine": engine, | ||
"delimiter": delimiter, | ||
"delim_whitespace": delim_whitespace, | ||
"low_memory": low_memory, | ||
"memory_map": memory_map, | ||
"float_precision": float_precision, | ||
} | ||
|
||
if use_str_data: | ||
str_delim_whitespaces = ( | ||
"col1 col2 col3 col4\n5 6 7 8\n9 10 11 12\n" | ||
) | ||
unique_filename = get_unique_filename(data_dir=tmp_path) | ||
eval_io_from_str( | ||
str_delim_whitespaces, | ||
unique_filename, | ||
raising_exceptions=raising_exceptions, | ||
**kwargs, | ||
) | ||
else: | ||
unique_filename = make_csv_file( | ||
delimiter=delimiter, | ||
) | ||
|
||
eval_io( | ||
filepath_or_buffer=unique_filename, | ||
fn_name="read_csv", | ||
raising_exceptions=raising_exceptions, | ||
**kwargs, | ||
) | ||
|
||
# Issue related, specific or corner cases | ||
@pytest.mark.parametrize("nrows", [2, None]) | ||
def test_read_csv_bad_quotes(self, nrows): | ||
|
@@ -2650,6 +2647,9 @@ def test_to_sql(self, tmp_path, make_sql_connection, index, conn_type): | |
assert df_modin_sql.sort_index().equals(df_pandas_sql.sort_index()) | ||
|
||
|
||
@pytest.mark.skipif( | ||
StorageFormat.get() == "Hdk", reason="Missing optional dependency 'lxml'." | ||
) | ||
@pytest.mark.filterwarnings(default_to_pandas_ignore_string) | ||
class TestHtml: | ||
def test_read_html(self, make_html_file): | ||
|
@@ -2756,20 +2756,19 @@ def test_fwf_file_usecols(self, make_fwf_file, usecols): | |
"dtype_backend", [lib.no_default, "numpy_nullable", "pyarrow"] | ||
) | ||
def test_read_fwf_dtype_backend(self, make_fwf_file, dtype_backend): | ||
with ensure_clean(".fwf") as unique_filename: | ||
make_fwf_file(filename=unique_filename) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
unique_filename = make_fwf_file() | ||
|
||
def comparator(df1, df2): | ||
df_equals(df1, df2) | ||
df_equals(df1.dtypes, df2.dtypes) | ||
def comparator(df1, df2): | ||
df_equals(df1, df2) | ||
df_equals(df1.dtypes, df2.dtypes) | ||
|
||
eval_io( | ||
fn_name="read_fwf", | ||
# read_csv kwargs | ||
filepath_or_buffer=unique_filename, | ||
dtype_backend=dtype_backend, | ||
comparator=comparator, | ||
) | ||
eval_io( | ||
fn_name="read_fwf", | ||
# read_csv kwargs | ||
filepath_or_buffer=unique_filename, | ||
dtype_backend=dtype_backend, | ||
comparator=comparator, | ||
) | ||
|
||
def test_fwf_file_chunksize(self, make_fwf_file): | ||
unique_filename = make_fwf_file() | ||
|
@@ -3079,6 +3078,9 @@ def test_to_pickle(self, tmp_path): | |
df_equals(modin_df, recreated_modin_df) | ||
|
||
|
||
@pytest.mark.skipif( | ||
StorageFormat.get() == "Hdk", reason="Missing optional dependency 'lxml'." | ||
) | ||
@pytest.mark.filterwarnings(default_to_pandas_ignore_string) | ||
class TestXml: | ||
def test_read_xml(self): | ||
|
@@ -3095,6 +3097,12 @@ def test_read_xml(self): | |
<degrees>360</degrees> | ||
<sides/> | ||
</row> | ||
<row> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this added? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The example is outdated, I took the updated one from https://pandas.pydata.org/docs/reference/api/pandas.read_xml.html#pandas-read-xml. |
||
<shape>triangle</shape> | ||
<degrees>180</degrees> | ||
<sides>3.0</sides> | ||
</row> | ||
</data> | ||
""" | ||
eval_io("read_xml", path_or_buffer=data) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These parameters mean the same thing. When using them at the same time, an exception occurs - I put it in a separate test.