Skip to content

Commit

Permalink
further trim redundant test
Browse files Browse the repository at this point in the history
  • Loading branch information
omatthew98 committed Jan 26, 2024
1 parent 06a00fd commit c4e1980
Showing 1 changed file with 0 additions and 4 deletions.
4 changes: 0 additions & 4 deletions python/ray/data/tests/test_huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,16 @@ def test_from_huggingface(hf_dataset, ray_start_regular_shared, num_par):

ray_datasets = {
"train": ray.data.from_huggingface(hf_dataset["train"], parallelism=num_par),
"test": ray.data.from_huggingface(hf_dataset["test"], parallelism=num_par),
}

assert isinstance(ray_datasets["train"], ray.data.Dataset)
hfds_assert_equals(hf_dataset["train"], ray_datasets["train"])
hfds_assert_equals(hf_dataset["test"], ray_datasets["test"])

# Test reading in a split Hugging Face dataset yields correct individual datasets
base_hf_dataset = hf_dataset["train"]
hf_dataset_split = base_hf_dataset.train_test_split(test_size=0.2)
ray_dataset_split_train = ray.data.from_huggingface(hf_dataset_split["train"])
ray_dataset_split_test = ray.data.from_huggingface(hf_dataset_split["test"])
assert ray_dataset_split_train.count() == hf_dataset_split["train"].num_rows
assert ray_dataset_split_test.count() == hf_dataset_split["test"].num_rows


@pytest.mark.skipif(
Expand Down

0 comments on commit c4e1980

Please sign in to comment.