diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index a4da30d5c..d3b0913fb 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -100,4 +100,4 @@ jobs: - name: Install pyaerocom run: python -m pip install . --no-deps - name: Run pytest - run: python -m pytest -ra -q --cov --no-cov-on-fail --cov-report xml + run: python -m pytest -ra -q --cov --no-cov-on-fail --cov-report xml \ No newline at end of file diff --git a/pyaerocom/aeroval/experiment_processor.py b/pyaerocom/aeroval/experiment_processor.py index f0599d866..2601d1020 100644 --- a/pyaerocom/aeroval/experiment_processor.py +++ b/pyaerocom/aeroval/experiment_processor.py @@ -70,7 +70,8 @@ def _run_single_entry(self, model_name, obs_name, var_list): if self.cfg.processing_opts.only_json: files_to_convert = col.get_available_coldata_files(var_list) else: - col.run(var_list) + model_read_kwargs = self.cfg.model_cfg[model_name]["kwargs"] + col.run(var_list, model_read_kwargs=model_read_kwargs) files_to_convert = col.files_written if self.cfg.processing_opts.only_colocation: @@ -122,11 +123,7 @@ def run(self, model_name=None, obs_name=None, var_list=None, update_interface=Tr if not self.cfg.model_cfg: logger.info("No model found, will make dummy model data") self.cfg.webdisp_opts.hide_charts = ["scatterplot"] - self.cfg.webdisp_opts.hide_pages = [ - "maps.php", - "intercomp.php", - "overall.php", - ] + self.cfg.webdisp_opts.pages = ["evaluation", "infos"] model_id = make_dummy_model(obs_list, self.cfg) self.cfg.processing_opts.obs_only = True use_dummy_model = True diff --git a/pyaerocom/aeroval/modelentry.py b/pyaerocom/aeroval/modelentry.py index d1f7a08de..f5ff75019 100644 --- a/pyaerocom/aeroval/modelentry.py +++ b/pyaerocom/aeroval/modelentry.py @@ -1,3 +1,4 @@ +import inspect from copy import deepcopy from pyaerocom._lowlevel_helpers import BrowseDict, DictStrKeysListVals, DictType, StrType @@ -55,6 +56,8 @@ def __init__(self, model_id, **kwargs): self.model_rename_vars = {} self.model_read_aux = {} + self.kwargs = kwargs + self.update(**kwargs) @property @@ -64,6 +67,16 @@ def aux_funs_required(self): """ return True if bool(self.model_read_aux) else False + def json_repr(self) -> dict: + sup_rep = super().json_repr() + + for key in sup_rep["model_read_aux"]: + sup_rep["model_read_aux"][key]["fun"] = inspect.getsource( + deepcopy(sup_rep["model_read_aux"][key]["fun"]) + ) + + return sup_rep + def get_vars_to_process(self, obs_vars: list) -> tuple: """ Get lists of obs / mod variables to be processed diff --git a/pyaerocom/aeroval/setupclasses.py b/pyaerocom/aeroval/setupclasses.py index 6652aae78..5c1a8da19 100644 --- a/pyaerocom/aeroval/setupclasses.py +++ b/pyaerocom/aeroval/setupclasses.py @@ -261,7 +261,7 @@ class WebDisplaySetup(BaseModel): hide_charts: tuple[str, ...] = () hide_pages: tuple[str, ...] = () ts_annotations: dict[str, str] = Field(default_factory=dict) - add_pages: tuple[str, ...] = () + pages: tuple[str, ...] = ["maps", "evaluation", "intercomp", "overall", "infos"] class EvalRunOptions(BaseModel): diff --git a/pyaerocom/colocation_auto.py b/pyaerocom/colocation_auto.py index 790a7c640..35fb1e880 100644 --- a/pyaerocom/colocation_auto.py +++ b/pyaerocom/colocation_auto.py @@ -582,7 +582,10 @@ def _instantiate_gridded_reader(self, what): data_id = self.colocation_setup.obs_id data_dir = self.colocation_setup.obs_data_dir reader_class = self._get_gridded_reader_class(what=what) - reader = reader_class(data_id=data_id, data_dir=data_dir) + if what == "model" and reader_class in self.MODELS_WITH_KWARGS: + reader = reader_class(data_id=data_id, data_dir=data_dir, **self.model_read_kwargs) + else: + reader = reader_class(data_id=data_id, data_dir=data_dir) return reader def _get_gridded_reader_class(self, what): diff --git a/pyaerocom/config.py b/pyaerocom/config.py index 8a05355a2..a66357feb 100644 --- a/pyaerocom/config.py +++ b/pyaerocom/config.py @@ -3,6 +3,7 @@ import os from configparser import ConfigParser from pathlib import Path +from typing import Union import numpy as np @@ -18,6 +19,7 @@ from pyaerocom.grid_io import GridIO from pyaerocom.region_defs import ALL_REGION_NAME, HTAP_REGIONS, OLD_AEROCOM_REGIONS from pyaerocom.varcollection import VarCollection +from pyaerocom.variable import Variable logger = logging.getLogger(__name__) @@ -220,6 +222,9 @@ def __init__(self, config_file=None, try_infer_environment=True): self._var_param = None self._coords = None + # Custom variables + self._custom_var_dict = None + # Attributes that are used to store search directories self.OBSLOCS_UNGRIDDED = {} self.OBS_UNGRIDDED_POST = {} @@ -308,6 +313,25 @@ def infer_basedir_and_config(self): return (basedir, self._config_files[cfg_id]) raise FileNotFoundError("Could not establish access to any registered database") + def register_custom_variables( + self, vars: Union[dict[str, Variable], dict[str, dict[str, str]]] + ) -> None: + var_dict = {} + for key, item in vars.items(): + if isinstance(item, Variable): + var_dict[key] = item + elif isinstance(item, dict): + if "var_name" in item and "units" in item: + var_dict[key] = Variable(**item) + else: + raise ValueError( + f"Dict item {item} must atleast have the keys 'var_name' and 'units'" + ) + else: + raise ValueError(f"Item {item} must be either dict or Variable") + self._custom_var_dict = var_dict.copy() + self._var_param = None + @property def has_access_users_database(self): chk_dir = self._check_subdirs_cfg["users-db"] @@ -482,6 +506,10 @@ def VARS(self): """Instance of class VarCollection (for default variable information)""" if self._var_param is None: # has not been accessed before self._var_param = VarCollection(self._var_info_file) + + if self._custom_var_dict is not None: + for var in self._custom_var_dict: + self._var_param.add_var(self._custom_var_dict[var]) return self._var_param @property diff --git a/pyaerocom/data/emep_variables.ini b/pyaerocom/data/emep_variables.ini index 2fcdcc26a..c828f6bba 100644 --- a/pyaerocom/data/emep_variables.ini +++ b/pyaerocom/data/emep_variables.ini @@ -105,4 +105,7 @@ concCocCoarse = "SURF_ugC_PM_OMCOARSE" concecFine = "SURF_ug_ECFINE" concecCoarse = "SURF_ug_ECCOARSE" concoxn = "SURF_ugN_OXN" -vmrno = "SURF_ppb_NO" \ No newline at end of file +vmrno = "SURF_ppb_NO" + +#Data used for Pollen data +concspores = "SURF_ug_FUNGAL_SPORES" \ No newline at end of file diff --git a/pyaerocom/data/variables.ini b/pyaerocom/data/variables.ini index 15844ef58..8d626e036 100644 --- a/pyaerocom/data/variables.ini +++ b/pyaerocom/data/variables.ini @@ -3078,6 +3078,22 @@ unit = ug C m-3 description=Mass concentration of total carbon unit = ug m-3 +[conchoa] +description=Mass concentration hydrocarbon like OA +unit = ug m-3 + +[concbcbb] +description=Mass concentration elemental carbon, biomass burning +unit = ug m-3 + +[concspores] +description=Mass concentration fungal spores +unit = ug m-3 + +[concpolyol] +description=Mass concentration Polyol +unit = ug m-3 + [conco3] description=Mass concentration of ozone unit = ug m-3 diff --git a/pyaerocom/io/mscw_ctm/additional_variables.py b/pyaerocom/io/mscw_ctm/additional_variables.py index 07ffcc74e..64e4ed6e8 100644 --- a/pyaerocom/io/mscw_ctm/additional_variables.py +++ b/pyaerocom/io/mscw_ctm/additional_variables.py @@ -385,3 +385,12 @@ def calc_concSso2(concso2): concSso2.attrs["units"] = "ug S m-3" return concSso2 + + +def calc_concpolyol(concspores): + # polyol is 4.5% of spores. Spores is in ug/cm3 in Gunnars run, eventhough the unit is marked as ugm-3, so a factor of 1000 is needed for unit change + factor = 45.0 / 1000.0 + + concpolyol = concspores.copy(deep=True) * factor + concpolyol.attrs["units"] = "ug m-3" + return concpolyol diff --git a/pyaerocom/io/mscw_ctm/emep_variables.toml b/pyaerocom/io/mscw_ctm/emep_variables.toml index 0dda9c5dc..050c6974f 100644 --- a/pyaerocom/io/mscw_ctm/emep_variables.toml +++ b/pyaerocom/io/mscw_ctm/emep_variables.toml @@ -105,4 +105,5 @@ concCocCoarse = "SURF_ugC_PM_OMCOARSE" concecFine = "SURF_ug_ECFINE" concecCoarse = "SURF_ug_ECCOARSE" concoxn = "SURF_ugN_OXN" -vmrno = "SURF_ppb_NO" \ No newline at end of file +vmrno = "SURF_ppb_NO" +concspores = "SURF_ug_FUNGAL_SPORES" \ No newline at end of file diff --git a/pyaerocom/io/mscw_ctm/reader.py b/pyaerocom/io/mscw_ctm/reader.py index 00106ecf8..b880d81c6 100755 --- a/pyaerocom/io/mscw_ctm/reader.py +++ b/pyaerocom/io/mscw_ctm/reader.py @@ -24,6 +24,7 @@ calc_concno3pm10, calc_concno3pm25, calc_concNtnh, + calc_concpolyol, calc_concso4t, calc_concSso2, calc_concsspm25, @@ -103,6 +104,8 @@ class ReadMscwCtm: "concNno2": ["concno2"], "concSso2": ["concso2"], "vmro3": ["conco3"], + # For Pollen + # "concpolyol": ["concspores"], } # Functions that are used to compute additional variables (i.e. one @@ -145,6 +148,7 @@ class ReadMscwCtm: "concNno2": calc_concNno2, "concSso2": calc_concSso2, "vmro3": calc_vmro3, + # "concpolyol": calc_concpolyol, } #: supported filename masks, placeholder is for frequencies @@ -167,7 +171,7 @@ class ReadMscwCtm: DEFAULT_FILE_NAME = "Base_day.nc" - def __init__(self, data_id=None, data_dir=None): + def __init__(self, data_id=None, data_dir=None, **kwargs): self._data_dir = None # opened dataset (for performance boost), will be reset if data_dir is # changed @@ -180,6 +184,12 @@ def __init__(self, data_id=None, data_dir=None): self._files = None self.var_map = emep_variables() + if "emep_vars" in kwargs: + new_map = kwargs["emep_vars"] + if isinstance(new_map, dict): + self.var_map.update(new_map) + else: + logger.warn(f"New map {new_map} is not a dict. Skipping") if data_dir is not None: if not isinstance(data_dir, str) or not os.path.exists(data_dir): @@ -765,6 +775,30 @@ def preprocess_units(units, prefix): return "m-1" return units + def add_aux_compute(self, var_name, vars_required, fun): + """Register new variable to be computed + + Parameters + ---------- + var_name : str + variable name to be computed + vars_required : list + list of variables to read, that are required to compute `var_name` + fun : callable + function that takes a list of `GriddedData` objects as input and + that are read using variable names specified by `vars_required`. + """ + if isinstance(vars_required, str): + vars_required = [vars_required] + if not isinstance(vars_required, list): + raise ValueError( + f"Invalid input for vars_required. Need str or list. Got: {vars_required}" + ) + elif not callable(fun): + raise ValueError("Invalid input for fun. Input is not a callable object") + self.AUX_REQUIRES[var_name] = vars_required + self.AUX_FUNS[var_name] = fun + class ReadEMEP(ReadMscwCtm): """Old name of :class:`ReadMscwCtm`.""" diff --git a/pyaerocom/io/pyaro/read_pyaro.py b/pyaerocom/io/pyaro/read_pyaro.py index 936b1d287..8c319338e 100644 --- a/pyaerocom/io/pyaro/read_pyaro.py +++ b/pyaerocom/io/pyaro/read_pyaro.py @@ -99,6 +99,17 @@ class PyaroToUngriddedData: _STOPTIMEINDEX = 10 # can be used to store stop time of acq. _TRASHINDEX = 11 # index where invalid data can be moved to (e.g. when outliers are removed) + # List of keys needed by every station from Pyaro. Used to find extra metadata + STATION_KEYS = ( + "station", + "latitude", + "longitude", + "altitude", + "long_name", + "country", + "url", + ) + def __init__(self, config: PyaroConfig) -> None: self.data: UngriddedData = UngriddedData() self.config = config @@ -218,6 +229,9 @@ def _get_metadata_from_pyaro(self, station: Station) -> list[dict[str, str]]: return metadata + def _get_additional_metadata(self, station: Station) -> list[dict[str, str]]: + return station.metadata + def _make_single_ungridded_metadata( self, station: Station, name: str, ts_type: Optional[TsType], units: dict[str, str] ) -> MetadataEntry: @@ -233,34 +247,11 @@ def _make_single_ungridded_metadata( country=station["country"], ts_type=str(ts_type) if ts_type is not None else "undefined", ) - entry.update(self._get_metadata_from_pyaro(station)) + entry.update(self._get_metadata_from_pyaro(station=station)) + entry.update(self._get_additional_metadata(station=station)) return MetadataEntry(entry) - def _make_ungridded_metadata( - self, stations: dict[str, Station], var_idx: dict[str, int], units: dict[str, str] - ) -> Metadata: - idx = 0 - metadata = {} - for name, station in stations.items(): - metadata[idx] = dict( - data_id=self.config.name, - variables=list(self.get_variables()), - var_info=units, - latitude=station["latitude"], - longitude=station["longitude"], - altitude=station["altitude"], - station_name=station["long_name"], - station_id=name, - country=station["country"], - ts_type="undefined", # TEMP: Changes dynamically below - ) - - metadata[idx].update(self._get_metadata_from_pyaro(station)) - idx += 1 - - return Metadata(metadata) - def _pyaro_dataline_to_ungriddeddata_dataline( self, data: np.void, idx: int, var_idx: int ) -> np.ndarray: diff --git a/pyaerocom/scripts/cams2_83/evaluation.py b/pyaerocom/scripts/cams2_83/evaluation.py index f34c56189..a8037cefb 100644 --- a/pyaerocom/scripts/cams2_83/evaluation.py +++ b/pyaerocom/scripts/cams2_83/evaluation.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging -import time from concurrent.futures import ProcessPoolExecutor, as_completed from datetime import date, timedelta from enum import Enum @@ -179,7 +178,7 @@ def runnermos( logger.info("Running Statistics (MOS)") ExperimentProcessor(stp).run() - print("Done Running Statistics (MOS)") + logger.info("Done Running Statistics (MOS)") def runnermedianscores( @@ -198,8 +197,6 @@ def runnermedianscores( stp = EvalSetup(**cfg) - start = time.time() - logger.info( "Running CAMS2_83 Specific Statistics, cache is not cleared, colocated data is assumed in place, regular statistics are assumed to have been run" ) @@ -216,4 +213,4 @@ def runnermedianscores( logger.info(f"Making median scores plot with pool {pool} and analysis {analysis}") CAMS2_83_Processer(stp).run(analysis=analysis) - print(f"Long run: {time.time() - start} sec") + logger.info("Median scores run finished") diff --git a/pyaerocom_env.yml b/pyaerocom_env.yml index 86599b61e..e7a060184 100644 --- a/pyaerocom_env.yml +++ b/pyaerocom_env.yml @@ -28,7 +28,7 @@ dependencies: - pip: - geojsoncontour - geocoder_reverse_natural_earth >= 0.0.2 - - pyaro + - pyaro >= 0.0.8 ## testing - pytest >=7.4 - pytest-dependency diff --git a/pyproject.toml b/pyproject.toml index d48f8ecad..f90f913b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ dependencies = [ 'typing-extensions>=4.0.1; python_version < "3.11"', # https://github.com/SciTools/cf-units/issues/218 'cf-units>=3.1', - "pyaro", + "pyaro>=0.0.8", "pydantic>2", ] diff --git a/tests/cams2_83/test_cams2_83_cli.py b/tests/cams2_83/test_cams2_83_cli.py index 31f413954..5c8761ecf 100644 --- a/tests/cams2_83/test_cams2_83_cli.py +++ b/tests/cams2_83/test_cams2_83_cli.py @@ -10,14 +10,7 @@ runner = CliRunner() -@pytest.fixture() -def fake_config(monkeypatch, patched_config): - def fake_make_config(*args, **kwargs): - return patched_config - - monkeypatch.setattr("pyaerocom.scripts.cams2_83.cli.make_config", fake_make_config) - - +@pytest.mark.usefixtures("fake_ExperimentProcessor", "reset_cachedir") def test_clearcache( monkeypatch, fake_cache_path: Path, @@ -26,16 +19,7 @@ def test_clearcache( ): assert list(fake_cache_path.glob("*.pkl")) - def do_not_run(self, model_name=None, obs_name=None, var_list=None, update_interface=True): - assert model_name is None - assert obs_name is None - assert var_list is None - assert update_interface is True - - monkeypatch.setattr( - "pyaerocom.scripts.cams2_83.evaluation.ExperimentProcessor.run", do_not_run - ) - options = f"forecast week 2024-03-16 2024-03-23 --model-path {tmp_path} --obs-path {tmp_path} --data-path {tmp_path} --coldata-path {tmp_path} --name 'Test'" + options = f"forecast week 2024-03-16 2024-03-23 --model-path {tmp_path} --obs-path {tmp_path} --data-path {tmp_path} --coldata-path {tmp_path} --cache {fake_cache_path} --name 'Test'" result = runner.invoke(app, options.split()) assert "Running Statistics" in caplog.text assert result.exit_code == 0 @@ -43,6 +27,7 @@ def do_not_run(self, model_name=None, obs_name=None, var_list=None, update_inter assert not list(fake_cache_path.glob("*.pkl")) +@pytest.mark.usefixtures("fake_CAMS2_83_Processer", "reset_cachedir") def test_not_cleared_cache( monkeypatch, fake_cache_path: Path, @@ -51,21 +36,6 @@ def test_not_cleared_cache( ): assert list(fake_cache_path.glob("*.pkl")) - def do_not_run( - self, - model_name=None, - obs_name=None, - var_list=None, - update_interface=True, - analysis=False, - ): - assert model_name is None - assert obs_name is None - assert var_list is None - assert analysis is False - assert update_interface is True - - monkeypatch.setattr("pyaerocom.scripts.cams2_83.evaluation.CAMS2_83_Processer.run", do_not_run) options = f"forecast long 2024-03-16 2024-03-23 --model-path {tmp_path} --obs-path {tmp_path} --data-path {tmp_path} --coldata-path {tmp_path} --name 'Test' --medianscores" result = runner.invoke(app, options.split()) assert "Running CAMS2_83 Specific Statistics, cache is not cleared" in caplog.text @@ -75,12 +45,9 @@ def do_not_run( def test_eval_dummy( - fake_cache_path: Path, tmp_path: Path, caplog, ): - assert list(fake_cache_path.glob("*.pkl")) - options = f"forecast day 2024-03-16 2024-03-16 --model-path {tmp_path} --obs-path {tmp_path} --data-path {tmp_path} --coldata-path {tmp_path} --name 'Test'" result = runner.invoke(app, options.split()) assert result.exit_code == 0 @@ -88,12 +55,9 @@ def test_eval_dummy( def test_eval_medianscores_dummy( - fake_cache_path: Path, tmp_path: Path, caplog, ): - assert list(fake_cache_path.glob("*.pkl")) - options = f"analysis long 2023-03-01 2024-02-28 --model-path {tmp_path} --obs-path {tmp_path} --data-path {tmp_path} --coldata-path {tmp_path} --name 'Test' --medianscores" result = runner.invoke(app, options.split()) assert result.exit_code == 0 diff --git a/tests/cams2_83/test_cams2_83_cli_mos.py b/tests/cams2_83/test_cams2_83_cli_mos.py index e6e45eb05..5c2f8f2d3 100644 --- a/tests/cams2_83/test_cams2_83_cli_mos.py +++ b/tests/cams2_83/test_cams2_83_cli_mos.py @@ -11,21 +11,75 @@ @pytest.fixture() -def fake_config(monkeypatch, patched_config): +def fake_config(monkeypatch, patched_config_mos): def fake_make_config(*args, **kwargs): - return patched_config + return patched_config_mos monkeypatch.setattr("pyaerocom.scripts.cams2_83.cli_mos.make_config_mos", fake_make_config) def test_eval_mos_dummy( - fake_cache_path: Path, tmp_path: Path, caplog, ): - assert list(fake_cache_path.glob("*.pkl")) - options = f"season 2024-03-01 2024-05-12 --data-path {tmp_path} --coldata-path {tmp_path} --name 'Test'" result = runner.invoke(app, options.split()) assert result.exit_code == 0 assert "no output available" in caplog.text + + +@pytest.mark.usefixtures("fake_CAMS2_83_Processer", "reset_cachedir") +def test_eval_mos_standard(tmp_path: Path, coldata_mos: Path, caplog): + options = f"day 2024-03-01 2024-03-01 --data-path {tmp_path} --coldata-path {coldata_mos} --cache {tmp_path} --id mos-colocated-data --name 'Test'" + result = runner.invoke(app, options.split()) + assert result.exit_code == 0 + + map_dir = tmp_path / "cams2-83/mos-colocated-data/map" + assert map_dir.is_dir() + + ts_st1 = tmp_path / "cams2-83/mos-colocated-data/ts/AT0ENK1_EEA-NRT-concno2_Surface.json" + assert ts_st1.is_file() + + ts_st2 = tmp_path / "cams2-83/mos-colocated-data/ts/AT0ILL1_EEA-NRT-concno2_Surface.json" + assert ts_st2.is_file() + + ts_st3 = tmp_path / "cams2-83/mos-colocated-data/ts/XK0012A_EEA-NRT-concno2_Surface.json" + assert ts_st3.is_file() + + hm_dir = tmp_path / "cams2-83/mos-colocated-data/hm" + assert hm_dir.is_dir() + + scat_dir = tmp_path / "cams2-83/mos-colocated-data/scat" + assert scat_dir.is_dir() + + contour_dir = tmp_path / "cams2-83/mos-colocated-data/contour" + assert contour_dir.is_dir() + + fc_dir = tmp_path / "cams2-83/mos-colocated-data/forecast" + assert fc_dir.is_dir() + + cfg_out = tmp_path / "cams2-83/mos-colocated-data/cfg_cams2-83_mos-colocated-data.json" + assert cfg_out.is_file() + + colfileE = f"{coldata_mos}/cams2-83/mos-colocated-data/ENS/concno2_concno2_MOD-ENS_REF-EEA-NRT_20240301_20240301_hourly_ALL-wMOUNTAINS.nc" + colfileM = f"{coldata_mos}/cams2-83/mos-colocated-data/MOS/concno2_concno2_MOD-MOS_REF-EEA-NRT_20240301_20240301_hourly_ALL-wMOUNTAINS.nc" + + assert "Running Statistics (MOS)" in caplog.text + assert f"Processing: {colfileE}" in caplog.text + assert f"Processing: {colfileM}" in caplog.text + assert "Finished processing" in caplog.text + assert "Done Running Statistics (MOS)" in caplog.text + + +@pytest.mark.usefixtures("fake_ExperimentProcessor", "reset_cachedir") +def test_eval_mos_medianscores(tmp_path: Path, coldata_mos: Path, caplog): + options = f"season 2024-03-01 2024-03-05 --data-path {tmp_path} --coldata-path {coldata_mos} --cache {tmp_path} --id mos-colocated-data --name 'Test'" + result = runner.invoke(app, options.split()) + assert result.exit_code == 0 + fc_out = tmp_path / "cams2-83/mos-colocated-data/forecast/ALL_EEA-NRT-concno2_Surface.json" + assert fc_out.is_file() + assert "Running CAMS2_83 Specific Statistics" in caplog.text + assert "Processing Component: concno2" + assert "Making subset for ALL, 2024/03/01-2024/03/05 and all" in caplog.text + assert "Finished processing" in caplog.text + assert "Median scores run finished" in caplog.text diff --git a/tests/fixtures/cams2_83/cfg_test_mos.py b/tests/fixtures/cams2_83/cfg_test_mos.py new file mode 100644 index 000000000..503ae9fcd --- /dev/null +++ b/tests/fixtures/cams2_83/cfg_test_mos.py @@ -0,0 +1,6 @@ +CFG = { + "proj_id": "cams2-83", + "exp_id": "mos-colocated-data", + "use_cams2_83": True, + "species_list": ["concno2"], +} diff --git a/tests/fixtures/cams2_83/config.py b/tests/fixtures/cams2_83/config.py index 711900244..d9c99db8f 100644 --- a/tests/fixtures/cams2_83/config.py +++ b/tests/fixtures/cams2_83/config.py @@ -1,10 +1,18 @@ from __future__ import annotations +import os +from datetime import date, timedelta +from itertools import product from pathlib import Path +import numpy as np +import pandas as pd import pytest +import xarray as xr -from . import cfg_test +from pyaerocom import const + +from . import cfg_test, cfg_test_mos @pytest.fixture() @@ -18,8 +26,130 @@ def fake_cache_path(monkeypatch, tmp_path: Path): return tmp_path +@pytest.fixture +def reset_cachedir(): + cache = const.CACHEDIR + yield + const.CACHEDIR = cache + + @pytest.fixture def patched_config(): cfg = cfg_test.CFG assert cfg["proj_id"] == "cams2-83" return cfg + + +@pytest.fixture +def patched_config_mos(): + cfg = cfg_test_mos.CFG + assert cfg["exp_id"] == "mos-colocated-data" + return cfg + + +@pytest.fixture +def fake_CAMS2_83_Processer(monkeypatch): + def do_not_run( + self, + model_name=None, + obs_name=None, + var_list=None, + update_interface=True, + analysis=False, + ): + assert model_name is None + assert obs_name is None + assert var_list is None + assert analysis is False + assert update_interface is True + + monkeypatch.setattr("pyaerocom.scripts.cams2_83.evaluation.CAMS2_83_Processer.run", do_not_run) + + +@pytest.fixture +def fake_ExperimentProcessor(monkeypatch): + def do_not_run(self, model_name=None, obs_name=None, var_list=None, update_interface=True): + assert model_name is None + assert obs_name is None + assert var_list is None + assert update_interface is True + + monkeypatch.setattr( + "pyaerocom.scripts.cams2_83.evaluation.ExperimentProcessor.run", do_not_run + ) + + +@pytest.fixture(scope="module") +def coldata_mos(tmp_path_factory) -> Path: + root: Path = tmp_path_factory.mktemp("data") + + def dataset(model: str, day: int, start: date, end: date) -> xr.Dataset: + hours = (end - start) // timedelta(hours=1) + 1 + ds = xr.Dataset( + data_vars=dict( + concno2=xr.Variable( + ("data_source", "time", "station_name"), + np.zeros((2, hours, 3)), + { + "ts_type": "hourly", + "filter_name": "ALL-wMOUNTAINS", + "ts_type_src": ["hourly", "hourly"], + "var_units": ["ug m-3", "ug m-3"], + "data_level": 3, + "revision_ref": "n/a", + "from_files": "", + "from_files_ref": "None", + "colocate_time": 0, + "obs_is_clim": 0, + "pyaerocom": "0.18.dev0", + "CONV!min_num_obs": str(dict(daily=dict(hourly=18))), + "resample_how": "None", + "obs_name": "EEA-NRT", + "vert_code": "Surface", + "diurnal_only": 0, + "zeros_to_nan": 1, + }, + ) + ), + coords=dict( + data_source=xr.Variable( + "data_source", ["CAMS2_83.NRT", f"CAMS2-83.{model}.day{day}.FC"] + ), + station_name=xr.Variable("station_name", ["AT0ENK1", "AT0ILL1", "XK0012A"]), + latitude=xr.Variable("station_name", [48.39, 47.77, 42.66]), + longitude=xr.Variable("station_name", [13.67, 16.77, 21.08]), + altitude=xr.Variable("station_name", [525, 117, 529]), + time=xr.Variable("time", pd.date_range(start, end, freq="1h")), + ), + ) + + ds["concno2"].attrs.update( + data_source=ds["data_source"].values.tolist(), + var_name=["concno2", "concno2"], + var_name_input=["concno2", "concno2"], + model_name=f"CAMS2-83-{model}-day{day}-FC", + ) + + return ds + + start, end = date(2024, 3, 1), date(2024, 3, 5) + for model, day in product(("ENS", "MOS"), range(4)): + path = ( + root + / f"cams2-83/mos-colocated-data/CAMS2-83-{model}-day{day}-FC/concno2_concno2_MOD-CAMS2-83-{model}-day{day}-FC_REF-EEA-NRT_{start:%Y%m%d}_{end:%Y%m%d}_hourly_ALL-wMOUNTAINS.nc" + ) + path.parent.mkdir(exist_ok=True, parents=True) + dataset(model, day, start, end).to_netcdf(path) + + start, end = date(2024, 3, 1), date(2024, 3, 2) + for model in ("ENS", "MOS"): + path = ( + root + / f"cams2-83/mos-colocated-data/{model}/concno2_concno2_MOD-{model}_REF-EEA-NRT_{start:%Y%m%d}_{start:%Y%m%d}_hourly_ALL-wMOUNTAINS.nc" + ) + path.parent.mkdir(exist_ok=True, parents=True) + ds = dataset(model, 0, start, end) + ds["concno2"].attrs.update(model_name=model) + ds.to_netcdf(path) + + return root diff --git a/tests/fixtures/pyaro.py b/tests/fixtures/pyaro.py index 312d672ca..94e15dd6b 100644 --- a/tests/fixtures/pyaro.py +++ b/tests/fixtures/pyaro.py @@ -34,7 +34,7 @@ def make_csv_test_file(tmp_path: Path) -> Path: j % 4 ] # Rotates over the freqs in a deterministic fashion f.write( - f"{s}, {station}, {coords[i][1]}, {coords[i][0]}, {np.random.normal(10, 5)}, Gg, {date}, {date+pd.Timedelta(delta_t)}, {countries[i]} \n" + f"{s}, {station}, {coords[i][1]}, {coords[i][0]}, {np.random.normal(10, 5)}, Gg, {date}, {date+pd.Timedelta(delta_t)},{countries[i]},{area_type[i]} \n" ) return file @@ -76,6 +76,7 @@ def testconfig_kwargs(tmp_path: Path) -> PyaroConfig: "country": 8, "standard_deviation": "NaN", "flag": "0", + "area_classification": 9, } config = PyaroConfig( @@ -105,6 +106,7 @@ def pyaro_kwargs() -> dict: "country": 8, "standard_deviation": "NaN", "flag": "0", + "area_classification": 9, } return columns diff --git a/tests/io/mscw_ctm/test_reader.py b/tests/io/mscw_ctm/test_reader.py index 8478ee923..7f9679fb7 100644 --- a/tests/io/mscw_ctm/test_reader.py +++ b/tests/io/mscw_ctm/test_reader.py @@ -83,6 +83,7 @@ "concss": "SURF_ug_SS", "concssf": "SURF_ug_SEASALT_F", "concCocpm25": "SURF_ugC_PM_OM25", + "concspores": "SURF_ug_FUNGAL_SPORES", "vmro32m": "SURF_2MO3", "vmro3max": "SURF_MAXO3", "vmro3": "SURF_ppb_O3", @@ -673,3 +674,37 @@ def test_ts_types(data_path: Path, year: str, freq: list[str]): reader.data_dir = str(data_path / year) ts_types = reader.ts_types assert len(ts_types) == len(freq) + + +def test_add_aux_compute(tmp_path: Path): + data_path = emep_data_path( + tmp_path, "day", vars_and_units={"concno3c": "ug m-3", "concno3f": "ug m-3"} + ) + reader = ReadMscwCtm(data_dir=str(data_path / "2017")) + + def calc_concno3(concno3c, concno3f): + concno3 = concno3c.copy(deep=True) + concno3f.copy(deep=True) + concno3.attrs["units"] = "ug m-3" + return concno3 + + new_var_name = "concno3" + vars_required = ["concno3c", "concno3f"] + func = calc_concno3 + + reader.add_aux_compute(new_var_name, vars_required=vars_required, fun=func) + + assert reader.has_var(new_var_name) + + data = reader.read_var(new_var_name, "daily") + + assert data.var_name == new_var_name + + +def test_emep_vars(): + new_var_name = "concno3" + new_mapping = "SURF_ug_NO3_C" + + reader = ReadMscwCtm(emep_vars={new_var_name: new_mapping}) + + assert new_var_name in reader.var_map + assert reader.var_map[new_var_name] == new_mapping diff --git a/tests/io/pyaro/test_read_pyaro.py b/tests/io/pyaro/test_read_pyaro.py index a786ebd4d..eb2e2b921 100644 --- a/tests/io/pyaro/test_read_pyaro.py +++ b/tests/io/pyaro/test_read_pyaro.py @@ -65,6 +65,18 @@ def test_pyarotoungriddeddata_reading_kwargs(pyaro_testdata_kwargs): assert all_stations["stats"][0]["country"].strip() == countries[1] +def test_pyarotoungriddeddata_reading_extra_metadata(pyaro_testdata_kwargs): + obj = pyaro_testdata_kwargs.converter + data = obj.read() + assert isinstance(data, UngriddedData) + + # Checks if stations have correct countries + all_stations = data.to_station_data_all("concso4", add_meta_keys=["area_classification"]) + area_type = ["Rural", "Urban"] + assert all_stations["stats"][1]["area_classification"].strip() == area_type[0] + assert all_stations["stats"][0]["area_classification"].strip() == area_type[1] + + def test_pyarotoungriddeddata_stations(pyaro_testdata): obj = pyaro_testdata.converter diff --git a/tests/test_config.py b/tests/test_config.py index 8b455675f..d40c1d4a2 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -11,6 +11,7 @@ from pyaerocom.data import resources from pyaerocom.grid_io import GridIO from pyaerocom.varcollection import VarCollection +from pyaerocom.variable import Variable from tests.conftest import lustre_avail USER = getpass.getuser() @@ -342,3 +343,33 @@ def test_default_config(): assert cfg.WRITE_FILEIO_ERR_LOG assert isinstance(cfg.GRID_IO, GridIO) + + +def test_register_variable_with_dict(): + test_var_name = "conctestvariabledict" + variables = { + test_var_name: { + "var_name": test_var_name, + "units": "ug m-3", + } + } + const.register_custom_variables(variables) + + vars = const.VARS + + assert test_var_name in vars.find(test_var_name) + + +def test_register_variable_with_Variable(): + test_var_name = "testvariableVariable" + variables = { + test_var_name: Variable( + var_name=test_var_name, + units="ug m-3", + ), + } + const.register_custom_variables(variables) + + vars = const.VARS + + assert test_var_name in vars.all_vars diff --git a/tests/test_varcollection.py b/tests/test_varcollection.py index 71f4b1e2f..8e091b4e9 100644 --- a/tests/test_varcollection.py +++ b/tests/test_varcollection.py @@ -83,7 +83,7 @@ def test_VarCollection_get_var_error(collection: VarCollection): ("*blaaaaaaa*", 0), ("dep*", 9), ("od*", 26), - ("conc*", 90), + ("conc*", 94), ], ) def test_VarCollection_find(collection: VarCollection, search_pattern: str, num: int):