Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bugfix for #277 : load_results properly handles experiments dtypes #280

Merged
merged 2 commits into from
Jun 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions ema_workbench/util/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ def load_results(file_name):
except TypeError:
dtype = pd.api.types.pandas_dtype(dtype)

if experiments[name].dtype is not dtype:
experiments[name] = experiments[name].astype(dtype)
EwoutH marked this conversation as resolved.
Show resolved Hide resolved
# this check is for backward compatability with data stored with 2.4.
if pd.api.types.is_object_dtype(dtype):
experiments[name] = experiments[name].astype("category")

Expand Down
Binary file added test/data/test.tar.gz
Binary file not shown.
26 changes: 17 additions & 9 deletions test/test_util/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@ def test_save_results(self):

# test for 1d
nr_experiments = 10000
experiments = pd.DataFrame(
index=np.arange(nr_experiments), columns={"x": float, "y": float}
cases = np.empty(
nr_experiments, dtype=[("x", float), ("y", int), ("z", bool), ("q", object)]
)
experiments = pd.DataFrame.from_records(cases)
experiments["q"] = experiments["q"].astype("category")
outcome_q = np.random.rand(nr_experiments, 1)

outcomes = {ScalarOutcome("q").name: outcome_q}
Expand Down Expand Up @@ -85,19 +87,19 @@ def test_save_results(self):

class LoadResultsTestCase(unittest.TestCase):
def test_load_results(self):
# test for 1d

# test for 3d

nr_experiments = 10000

# test for 2d
experiments = pd.DataFrame(
index=np.arange(nr_experiments), columns={"x": float, "y": float}
cases = np.empty(
nr_experiments, dtype=[("x", float), ("y", int), ("z", bool), ("q", object)]
)
experiments = pd.DataFrame.from_records(cases)

experiments["x"] = np.random.rand(nr_experiments)
experiments["y"] = np.random.rand(nr_experiments)
experiments["y"] = np.random.randint(0, 10, size=nr_experiments)
experiments["z"] = np.random.randint(0, 1, size=nr_experiments, dtype=bool)
experiments["q"] = np.random.randint(0, 10, size=nr_experiments).astype(object)
experiments["q"] = experiments["q"].astype("category")

outcome_a = np.zeros((nr_experiments, 1))

Expand All @@ -111,6 +113,12 @@ def test_load_results(self):
self.assertTrue(np.all(np.allclose(experiments["x"], loaded_experiments["x"])))
self.assertTrue(np.all(np.allclose(experiments["y"], loaded_experiments["y"])))

for name, dtype in experiments.dtypes.items():
self.assertTrue(
dtype == loaded_experiments[name].dtype,
msg=f"{name}, {dtype}, {loaded_experiments[name].dtype}",
)

os.remove("../data/test.tar.gz")

# test 3d
Expand Down