Climate data interface: better PDF file use; fixes

* Add auto-downloaded built-in PDF file * Clean up some warnings * Minor refactor * Minor README fixes
renewables-ninja · Sep 25, 2018 · cb63a06 · cb63a06
1 parent c3e6380
commit cb63a06
Show file tree

Hide file tree

Showing 8 changed files with 61 additions and 41 deletions.
diff --git a/README.md b/README.md
@@ -1,17 +1,18 @@
 [![Build Status](https://img.shields.io/travis/com/renewables-ninja/gsee/master.svg?style=flat-square)](https://travis-ci.com/renewables-ninja/gsee) [![Coverage](https://img.shields.io/coveralls/renewables-ninja/gsee.svg?style=flat-square)](https://coveralls.io/r/renewables-ninja/gsee) [![PyPI version](https://img.shields.io/pypi/v/gsee.svg?style=flat-square)](https://pypi.python.org/pypi/gsee)
 
-# GSEE: global solar energy estimator
+# GSEE: Global Solar Energy Estimator
 
 `GSEE` is a solar energy simulation library designed for rapid calculations and ease of use. [Renewables.ninja](https://www.renewables.ninja/) uses `GSEE`.
 
 ## Requirements
 
 Works only with Python 3. Required libraries:
 
-* [pyephem](https://pypi.org/project/ephem/)
+* [joblib](https://joblib.readthedocs.io/en/latest/)
 * [numpy](https://numpy.org/)
-* [scipy](https://scipy.org/)
 * [pandas](https://pandas.pydata.org/)
+* [pyephem](https://pypi.org/project/ephem/)
+* [scipy](https://scipy.org/)
 * [xarray](https://xarray.pydata.org/)
 
 ## Installation
@@ -67,24 +68,37 @@ plane_irradiance = gsee.trigon.aperture_irradiance(
 Example use directly reading NetCDF files with GHI, diffuse irradiance fraction, and temperature data:
 
 ```python
+from gsee.climatedata_interface.interface import run_interface
+
 run_interface(
     ghi_tuple=('ghi_input.nc', 'ghi'),  # Tuple of (input file path, variable name)
     diffuse_tuple=('diffuse_fraction_input.nc', 'diff_frac'),
     temp_tuple=('temperature_input.nc', 't2m'),
     outfile='output_file.nc',
-    params=dict(tilt=35, azim=180, tracking=0, capacity=1000)
+    params=dict(tilt=35, azim=180, tracking=0, capacity=1000),
+    frequency='detect'
 )
 ```
 
+Tilt can be given as a latitude-dependent function instead of static value:
+
+```python
+params = dict(tilt=lambda lat: 0.35396 * lat + 16.84775, ...)
+```
+
 Instead of letting the climate data interface read and prepare data from NetCDF files, an `xarray.Dataset` can also be passed directly (e.g. when using the module in combination with a larger application):
 
 ```python
+from gsee.climatedata_interface.interface import run_interface_from_dataset
+
 result = run_interface_from_dataset(
     data=my_dataset,  # my_dataset is an xarray.Dataset
     params=dict(tilt=35, azim=180, tracking=0, capacity=1000)
 )
 ```
 
+By default, a built-in file with monthly probability density functions is automatically downloaded and used to generate synthetic daily irradiance.
+
 For more information, see the [climate data interface documentation](docs/climatedata-interface.md).
 
 ## Development

diff --git a/docs/climatedata_interface_example.py b/docs/climatedata_interface_example.py
diff --git a/gsee/climatedata_interface/monthly_pdfs.py → gsee/climatedata_interface/generate_pdfs.py b/gsee/climatedata_interface/monthly_pdfs.py → gsee/climatedata_interface/generate_pdfs.py
diff --git a/gsee/climatedata_interface/interface.py b/gsee/climatedata_interface/interface.py
@@ -10,13 +10,14 @@
 import xarray as xr
 
 from gsee.climatedata_interface.pre_gsee_processing import resample_for_gsee
+from gsee.climatedata_interface import util
 
 
 def run_interface_from_dataset(
         data: xr.Dataset,
         params: dict,
         frequency='detect',
-        pdfs_file=None,
+        pdfs_file='builtin',
         num_cores=multiprocessing.cpu_count()) -> xr.Dataset:
     """
     Parameters
@@ -37,6 +38,8 @@ def run_interface_from_dataset(
     pdfs_file: str, optional
         Path to a NetCDF file with probability density functions to use
         for each month. Only for annual, seasonal and monthly data.
+        Default is 'builtin', which automatically downloads and uses a
+        built-in global PDF based on MERRA-2 data. Set to None to disable.
     num_cores: int, optional
         Number of cores that should be used for the computation.
         Default is all available cores.
@@ -68,12 +71,18 @@ def run_interface_from_dataset(
 
     if pdfs_file is not None:
         if frequency in ['A', 'S', 'M']:
-            pdfs = xr.open_dataset(pdfs_file, autoclose=True)
+            if pdfs_file == 'builtin':
+                pdfs = xr.open_dataset(util.return_pdf_path(), autoclose=True)
+            else:
+                pdfs = xr.open_dataset(pdfs_file, autoclose=True)
             pdf_coords = list(product(pdfs['lat'].values, pdfs['lon'].values))
             tree = spatial.KDTree(pdf_coords)
             coord_list_nn = [pdf_coords[int(tree.query([x])[1])] for x in coord_list]
         else:
-            raise ValueError('If `pdfs_file` given, frequency must be "A", "M", or "D"')
+            raise ValueError(
+                'For frequencies other than "A", "M", or "D", '
+                '`pdfs_file` must be explicitly set to None.'
+            )
 
     if num_cores > 1:
         print('Parallel mode: {} cores'.format(num_cores))
@@ -118,7 +127,7 @@ def run_interface(
         diffuse_tuple=('', ''),
         temp_tuple=('', ''),
         timeformat=None,
-        pdfs_file=None,
+        pdfs_file='builtin',
         num_cores=multiprocessing.cpu_count()):
     """
     Input file must include 'time', 'lat' and 'lon' dimensions.
@@ -154,6 +163,8 @@ def run_interface(
     pdfs_file: str, optional
         Path to a NetCDF file with probability density functions to use
         for each month. Only for annual, seasonal and monthly data.
+        Default is 'builtin', which automatically downloads and uses a
+        built-in global PDF based on MERRA-2 data. Set to None to disable.
     num_cores: int, optional
         Number of cores that should be used for the computation.
         Default is all available cores.
@@ -319,7 +330,7 @@ def _parse_cmip_time_data(ds: xr.Dataset):
     """
     # Translates date-string used in CMIP5 data to datetime-objects
     timestr = [str(ti) for ti in ds['time'].values]
-    vfunc = np.vectorize(lambda x: np.datetime64('{}-{}-{}T{:02d}-{}'.format(
+    vfunc = np.vectorize(lambda x: np.datetime64('{}-{}-{}T{:02d}:{}'.format(
         x[:4], x[4:6], x[6:8], int(24 * float('0.' + x[9:])), '00'))
     )
     return vfunc(timestr)

diff --git a/gsee/climatedata_interface/pre_gsee_processing.py b/gsee/climatedata_interface/pre_gsee_processing.py
@@ -125,7 +125,7 @@ def resample_for_gsee(
 def _resample_without_pdfs(df, frequency, params, i, coords, shr_mem, prog_mem):
     if frequency == 'A':
         # Create 2 days, one in spring and one in autumn, which are then calculated by GSEE
-        df.ix[df.index[-1] + pd.DateOffset(years=1)] = np.full(len(df.columns), 0)
+        df.loc[df.index[-1] + pd.DateOffset(years=1)] = np.full(len(df.columns), 0)
         df_yearly12 = df.resample(rule='Q').pad()
         df_yearly12 = df_yearly12[0:-1:2]
         pv = add_kd_run_gsee(df_yearly12, coords, frequency, params)
@@ -283,7 +283,7 @@ def _upsample_df_single_day(indf):
         """
         df = indf.copy()
         # add line at the end so resample treats it like a whole day:
-        df.ix[df.index[-1] + pd.Timedelta('1D')] = np.full(len(df.columns), 0)
+        df.loc[df.index[-1] + pd.Timedelta('1D')] = np.full(len(df.columns), 0)
         df = df.resample(rule='1H').pad(limit=23)
         # removing last line again:
         df = df.drop(df.index[-1])

diff --git a/gsee/climatedata_interface/test_interface.py b/gsee/climatedata_interface/test_interface.py
@@ -22,7 +22,7 @@ def test_run_interface_from_dataset():
             }
         )
         params = {'tilt': 35, 'azim': 180, 'tracking': 0, 'capacity': 1}
-        result = interface.run_interface_from_dataset(ds, params, freq)
+        result = interface.run_interface_from_dataset(ds, params, freq, pdfs_file=None)
 
         assert type(result) == xr.Dataset
         assert ds.dims == result.dims

diff --git a/gsee/climatedata_interface/util.py b/gsee/climatedata_interface/util.py
@@ -0,0 +1,22 @@
+import os
+import urllib.request
+
+
+PDFS_URL = 'https://agitated-khorana-6ac3a1.netlify.com/'
+PDFS_PATH = os.path.join(os.path.dirname(__file__), 'pdf_files')
+PDF_FILENAME = 'MERRA2_rad3x3_2011-2015-PDFs_land_prox.nc'
+
+
+def return_pdf_path():
+    pdf_path = os.path.join(PDFS_PATH, PDF_FILENAME)
+    if not os.path.exists(pdf_path):
+        pdf_url = PDFS_URL + PDF_FILENAME
+        retrieve_resource(pdf_url, pdf_path)
+    return pdf_path
+
+
+def retrieve_resource(url, out_path):
+    pathname, filename = os.path.split(out_path)
+    print('File not yet downloaded, retrieving: {}'.format(filename))
+    os.makedirs(pathname, exist_ok=True)
+    urllib.request.urlretrieve(url, out_path)
diff --git a/setup.py b/setup.py
@@ -46,6 +46,7 @@
     ext_modules=ext_modules,
     zip_safe=False,
     install_requires=[
+        "joblib >= 0.12",
         "numpy >= 1.15.0",
         "pandas >= 0.23.4",
         "pyephem >= 3.7.6",
@@ -57,8 +58,7 @@
         "numpy >= 1.15",
     ],
     extras_require={
-        'multicore': ["joblib >= 0.12"],
-        'monthly_pdfs': ["basemap >= 1.1.0", "seaborn >= 0.9.0"],
+        'generate_pdfs': ["basemap >= 1.1.0", "seaborn >= 0.9.0"],
     },
     classifiers=[
         'Development Status :: 4 - Beta',