Skip to content

Commit

Permalink
Merge pull request #68 from khaeru/enh/2022-W30
Browse files Browse the repository at this point in the history
Miscellaneous enhancements for 2022-W30
  • Loading branch information
khaeru committed Aug 15, 2022
2 parents 0543103 + 362e743 commit 7fbf2b3
Show file tree
Hide file tree
Showing 13 changed files with 238 additions and 157 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,5 @@ jobs:
- name: Check typing with mypy
# Also install packages that provide type hints
run: |
pip install mypy ixmp pytest types-PyYAML xarray
pip install mypy ixmp pytest types-PyYAML "xarray!=2022.6.0"
mypy .
3 changes: 3 additions & 0 deletions .github/workflows/pytest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ jobs:
# pyam-iamc (IAMconsortium/pyam#651) forces pint < 0.19; override
pip install --upgrade pint
- name: TEMPORARY downgrade xarray pending khaeru/genno#67
run: pip install xarray!=2022.6.0

- name: Run test suite using pytest
run: pytest genno --trace-config --verbose --cov-report=xml --cov-report=term --color=yes

Expand Down
10 changes: 8 additions & 2 deletions doc/whatsnew.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,14 @@ What's new
:backlinks: none
:depth: 1

.. Next release
.. ============
Next release
============

- 1-dimensional quantities are handled in :func:`.relabel` and as weights in :func:`.sum` (:pull:`68`).
- :func:`.load_file` will read a header comment like ``# Units: kg / s`` and apply the indicated units to the resulting quantity (:pull:`68`).
- :func:`.div` and :func:`.mul` become the canonical names, matching :mod:`.operator` and other parts of the Python standard library (:pull:`68`).
:func:`.ratio` and :func:`.product` are retained as aliases, for compatibility.
- Ensure data passed to :meth:`.Plot.generate` has a "value" column; use short units format by default (:pull:`68`).

v1.12.0 (2022-07-18)
====================
Expand Down
11 changes: 5 additions & 6 deletions genno/compat/plotnine/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ def save(self, config, *args, **kwargs):
missing = tuple(filter(lambda arg: isinstance(arg, str), args))
if len(missing):
log.error(
f"Missing input(s) {repr(missing)} to plot {repr(self.basename)}; no "
"output"
f"Missing input(s) {missing!r} to plot {self.basename!r}; no output"
)
return

Expand All @@ -44,18 +43,18 @@ def save(self, config, *args, **kwargs):
lambda arg: arg
if not isinstance(arg, Quantity)
else arg.to_series()
.rename(arg.name)
.rename(arg.name or "value")
.reset_index()
.assign(unit=arg.attrs.get("_unit", "")),
.assign(unit=f"{arg.units:~}"),
args,
)

plot_or_plots = self.generate(*args, **kwargs)

if not plot_or_plots:
log.info(
f"{self.__class__.__name__}.generate() returned {repr(plot_or_plots)}; "
"no output"
f"{self.__class__.__name__}.generate() returned {plot_or_plots!r}; no "
"output"
)
return

Expand Down
215 changes: 127 additions & 88 deletions genno/computations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,18 @@
# - To avoid ambiguity, computations should not have default arguments. Define default
# values for the corresponding methods on the Computer class.
import logging
import re
from pathlib import Path
from typing import Any, Hashable, Mapping, Optional, Union, cast
from typing import Any, Collection, Hashable, Mapping, Optional, Union, cast

import pandas as pd
import pint
from xarray.core.utils import either_dict_or_kwargs

from genno.core.attrseries import AttrSeries
from genno.core.attrseries import AttrSeries, _multiindex_of
from genno.core.quantity import Quantity, assert_quantity, maybe_densify
from genno.core.sparsedataarray import SparseDataArray
from genno.util import collect_units, filter_concat_args
from genno.util import UnitLike, collect_units, filter_concat_args

__all__ = [
"add",
Expand Down Expand Up @@ -295,6 +296,40 @@ def disaggregate_shares(quantity, shares):
return result


def div(numerator, denominator):
"""Compute the ratio `numerator` / `denominator`.
Parameters
----------
numerator : .Quantity
denominator : .Quantity
"""
# Handle units
u_num, u_denom = collect_units(numerator, denominator)

if isinstance(numerator, AttrSeries):
result = numerator / denominator.align_levels(numerator)
else:
result = numerator / denominator

# This shouldn't be necessary; would instead prefer:
# result.attrs["_unit"] = u_num / u_denom
# … but is necessary to avoid an issue when the operands are different Unit classes
ureg = pint.get_application_registry()
result.attrs["_unit"] = ureg.Unit(u_num) / ureg.Unit(u_denom)

if isinstance(result, AttrSeries):
result.dropna(inplace=True)

return result


#: Alias of :func:`mul`, for backwards compatibility.
#:
#: .. note:: This may be deprecated and possibly removed in a future version.
ratio = div


def group_sum(qty, group, sum):
"""Group by dimension *group*, then sum across dimension *sum*.
Expand Down Expand Up @@ -327,7 +362,12 @@ def interpolate(
return qty.interp(coords, method, assume_sorted, kwargs, **coords_kwargs)


def load_file(path, dims={}, units=None, name=None):
def load_file(
path: Path,
dims: Union[Collection[Hashable], Mapping[Hashable, Hashable]] = {},
units: UnitLike = None,
name: Optional[str] = None,
) -> Any:
"""Read the file at *path* and return its contents as a :class:`.Quantity`.
Some file formats are automatically converted into objects for direct use in genno
Expand All @@ -354,46 +394,8 @@ def load_file(path, dims={}, units=None, name=None):
# TODO optionally cache: if the same Computer is used repeatedly, then the file will
# be read each time; instead cache the contents in memory.
# TODO strip leading/trailing whitespace from column names
# TODO read units from header
if path.suffix == ".csv":
data = pd.read_csv(path, comment="#", skipinitialspace=True)

# Index columns
index_columns = data.columns.tolist()
index_columns.remove("value")

try:
# Retrieve the unit column from the file
units_col = data.pop("unit").unique()
index_columns.remove("unit")
except KeyError:
pass # No such column; use None or argument value
else:
# Use a unique value for units of the quantity
if len(units_col) > 1:
raise ValueError(
f"Cannot load {path} with non-unique units {repr(units_col)}"
)
elif units and units not in units_col:
raise ValueError(
f"Explicit units {units} do not match {units_col[0]} in {path}"
)
units = units_col[0]

if len(dims):
# Convert a list, set, etc. to a dict
dims = dims if isinstance(dims, Mapping) else {d: d for d in dims}

# - Drop columns not mentioned in *dims*
# - Rename columns according to *dims*
data = data.drop(columns=set(index_columns) - set(dims.keys())).rename(
columns=dims
)

index_columns = list(data.columns)
index_columns.pop(index_columns.index("value"))

return Quantity(data.set_index(index_columns)["value"], units=units, name=name)
return _load_file_csv(path, dims, units, name)
elif path.suffix in (".xls", ".xlsx"):
# TODO define expected Excel data input format
raise NotImplementedError # pragma: no cover
Expand All @@ -405,6 +407,80 @@ def load_file(path, dims={}, units=None, name=None):
return open(path).read()


UNITS_RE = re.compile(r"# Units?: (.*)\s+")


def _load_file_csv(
path: Path,
dims: Union[Collection[Hashable], Mapping[Hashable, Hashable]] = {},
units: UnitLike = None,
name: Optional[str] = None,
) -> Quantity:
# Peek at the header, if any, and match a units expression
with open(path, "r", encoding="utf-8") as f:
for line, match in map(lambda l: (l, UNITS_RE.fullmatch(l)), f):
if match:
if units:
log.warning(f"Replace {match.group(1)!r} from file with {units!r}")
else:
units = match.group(1)
break
elif not line.startswith("#"):
break # Give up at first non-commented line

# Read the data
data = pd.read_csv(path, comment="#", skipinitialspace=True)

# Index columns
index_columns = data.columns.tolist()
index_columns.remove("value")

try:
# Retrieve the unit column from the file
units_col = data.pop("unit").unique()
index_columns.remove("unit")
except KeyError:
pass # No such column; use None or argument value
else:
# Use a unique value for units of the quantity
if len(units_col) > 1:
raise ValueError(
f"Cannot load {path} with non-unique units {repr(units_col)}"
)
elif units and units not in units_col:
raise ValueError(
f"Explicit units {units} do not match {units_col[0]} in {path}"
)
units = units_col[0]

if dims:
# Convert a list, set, etc. to a dict
dims = dims if isinstance(dims, Mapping) else {d: d for d in dims}

# - Drop columns not mentioned in *dims*
# - Rename columns according to *dims*
data = data.drop(columns=set(index_columns) - set(dims.keys())).rename(
columns=dims
)

index_columns = list(data.columns)
index_columns.pop(index_columns.index("value"))

# Prepare a Quantity object with the (bare) units and any conversion factor
registry = pint.get_application_registry()
units = units or "1.0 dimensionless"
if isinstance(units, str):
uq = registry(units)
elif isinstance(units, pint.Unit):
uq = registry.Quantity(1.0, units)
else:
uq = units

return Quantity(
uq.magnitude * data.set_index(index_columns)["value"], units=uq.units, name=name
)


def index_to(
qty: Quantity,
dim_or_selector: Union[str, Mapping],
Expand Down Expand Up @@ -485,7 +561,7 @@ def pow(a, b):
return result


def product(*quantities):
def mul(*quantities: Quantity) -> Quantity:
"""Compute the product of any number of *quantities*."""
# Iterator over (quantity, unit) tuples
items = zip(quantities, collect_units(*quantities))
Expand All @@ -507,12 +583,10 @@ def product(*quantities):
return result


#: Identical to :func:`product`, but using a name aligned with the Python standard
#: library, e.g. in :mod:`operator`.
#: Alias of :func:`mul`, for backwards compatibility.
#:
#: .. note:: In the future, this will be the canonical name, and :func:`product` will be
#: deprecated and possibly removed.
mul = product
#: .. note:: This may be deprecated and possibly removed in a future version.
product = mul


def relabel(
Expand Down Expand Up @@ -550,7 +624,7 @@ def map_labels(mapper, values):

if isinstance(qty, AttrSeries):
# Prepare a new index
idx = qty.index
idx = _multiindex_of(qty)
for dim, label_map in iter:
# - Look up numerical index of the dimension in `idx`
# - Retrieve the existing levels.
Expand Down Expand Up @@ -583,42 +657,6 @@ def rename_dims(
return qty.rename(new_name_or_name_dict, **names)


def ratio(numerator, denominator):
"""Compute the ratio `numerator` / `denominator`.
Parameters
----------
numerator : .Quantity
denominator : .Quantity
"""
# Handle units
u_num, u_denom = collect_units(numerator, denominator)

if isinstance(numerator, AttrSeries):
result = numerator / denominator.align_levels(numerator)
else:
result = numerator / denominator

# This shouldn't be necessary; would instead prefer:
# result.attrs["_unit"] = u_num / u_denom
# … but is necessary to avoid an issue when the operands are different Unit classes
ureg = pint.get_application_registry()
result.attrs["_unit"] = ureg.Unit(u_num) / ureg.Unit(u_denom)

if isinstance(result, AttrSeries):
result.dropna(inplace=True)

return result


#: Identical to :func:`ratio`, but using a name aligned with the Python standard
#: library, e.g. in :mod:`operator`.
#:
#: .. note:: In the future, this will be the canonical name, and :func:`ratio` will be
#: deprecated and possibly removed.
div = ratio


def select(qty, indexers, inverse=False):
"""Select from *qty* based on *indexers*.
Expand Down Expand Up @@ -659,10 +697,11 @@ def sum(quantity, weights=None, dimensions=None):
weights, w_total = 1, 1
else:
w_total = weights.sum(dim=dimensions)
if 0 == len(w_total.dims):
w_total = w_total.item()

result = (quantity * weights).sum(dim=dimensions) / w_total
result.attrs["_unit"] = collect_units(quantity)[0]

result.units = collect_units(quantity)[0]
return result


Expand Down
2 changes: 1 addition & 1 deletion genno/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def general(c: Computer, info):
# Inputs
inputs = c.infer_keys(info.get("inputs", []))

if info["comp"] == "product":
if info["comp"] in ("mul", "product"):
key = c.add_product(info["key"], *inputs)
log.info(f"Add {repr(key)} using .add_product()")
else:
Expand Down
3 changes: 3 additions & 0 deletions genno/core/quantity.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ def units(self, value):
def __len__(self) -> int:
... # pragma: no cover

def __mul__(self, other) -> "Quantity":
... # pragma: no cover

def __radd__(self, other):
... # pragma: no cover

Expand Down
2 changes: 1 addition & 1 deletion genno/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def assert_qty_equal(
try:
a = a.sort_index().dropna()
b = b.sort_index().dropna()
except TypeError:
except TypeError: # pragma: no cover
pass
assert_series_equal(a, b, check_dtype=False, **kwargs)
else:
Expand Down

0 comments on commit 7fbf2b3

Please sign in to comment.