Add command line interface module "cli.py"

This adds an entrypoint program "dwd" which can be used to explore the features of this library on the command line.
panodata · Jun 16, 2020 · 4809e0a · 4809e0a
1 parent 94e9a2e
commit 4809e0a
Show file tree

Hide file tree

Showing 4 changed files with 227 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,6 @@
 *.code-workspace
 __pycache__/
 dwd_data/
-.idea/
+.idea/
+.venv*
+*.egg-info
diff --git a/python_dwd/additionals/util.py b/python_dwd/additionals/util.py
@@ -0,0 +1,49 @@
+""" A set of utility functions """
+import sys
+import logging
+
+from docopt import docopt
+from munch import Munch, munchify
+
+
+def setup_logging(level=logging.INFO):
+    log_format = '%(asctime)-15s [%(name)-30s] %(levelname)-7s: %(message)s'
+    logging.basicConfig(
+        format=log_format,
+        stream=sys.stderr,
+        level=level)
+
+    # Silence INFO messages from numexpr.
+    numexpr_logger = logging.getLogger('numexpr')
+    numexpr_logger.setLevel(logging.WARN)
+
+
+def normalize_options(options):
+    normalized = {}
+    for key, value in options.items():
+
+        # Add primary variant.
+        key = key.strip('--<>')
+        normalized[key] = value
+
+        # Add secondary variant.
+        key = key.replace('-', '_')
+        normalized[key] = value
+
+    return munchify(normalized, factory=OptionMunch)
+
+
+def read_list(data, separator=u','):
+    if data is None:
+        return []
+    result = list(map(lambda x: x.strip(), data.split(separator)))
+    if len(result) == 1 and not result[0]:
+        result = []
+    return result
+
+
+class OptionMunch(Munch):
+
+    def __setattr__(self, k, v):
+        super().__setattr__(k.replace('-', '_'), v)
+        super().__setattr__(k.replace('_', '-'), v)
diff --git a/python_dwd/cli.py b/python_dwd/cli.py
@@ -0,0 +1,169 @@
+# -*- coding: utf-8 -*-
+import sys
+import logging
+from docopt import docopt
+from dateparser import parse as parsedate
+import pandas as pd
+
+from python_dwd import __version__, metadata_for_dwd_data
+from python_dwd.additionals.util import normalize_options, setup_logging, read_list
+from python_dwd.dwd_station_request import DWDStationRequest
+from python_dwd.enumerations.parameter_enumeration import Parameter
+from python_dwd.enumerations.period_type_enumeration import PeriodType
+from python_dwd.enumerations.time_resolution_enumeration import TimeResolution
+
+log = logging.getLogger(__name__)
+
+
+def run():
+    """
+    Usage:
+      dwd stations --parameter=<parameter> --resolution=<resolution> --period=<period> [--persist] [--format=<format>]
+      dwd readings --station=<station> --parameter=<parameter> --resolution=<resolution> --period=<period> [--persist] [--date=<date>] [--format=<format>]
+      dwd about [parameters] [resolutions] [periods]
+      dwd --version
+      dwd (-h | --help)
+
+    Options:
+      --station=<station>           Comma-separated list of station identifiers
+      --parameter=<parameter>       Parameter/variable, e.g. "kl", "air_temperature", "precipitation", etc.
+      --resolution=<resolution>     Dataset resolution: "annual", "monthly", "daily", "hourly", "minute_10", "minute_1"
+      --period=<period>             Dataset period: "historical", "recent", "now"
+      --persist                     Save and restore data to filesystem w/o going to the network
+      --date=<date>                 Date for filtering data. Can be either a single date(time) or
+                                    an ISO-8601 time interval, see https://en.wikipedia.org/wiki/ISO_8601#Time_intervals.
+      --format=<format>             Output format. [Default: json]
+      --version                     Show version information
+      --debug                       Enable debug messages
+      -h --help                     Show this screen
+
+
+    Examples:
+
+      # Get list of stations for daily climate summary data in JSON format
+      dwd stations --parameter=kl --resolution=daily --period=recent
+
+      # Get list of stations for daily climate summary data in CSV format
+      dwd stations --parameter=kl --resolution=daily --period=recent --format=csv
+
+      # Get daily climate summary data for stations 44 and 1048
+      dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent
+
+      # Optionally save/restore to/from disk in order to avoid asking upstream servers each time
+      dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --persist
+
+      # Limit output to specific date
+      dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --date=2020-05-01
+
+      # Limit output to specified date range in ISO-8601 time interval format
+      dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --date=2020-05-01/2020-05-05
+
+      # The real power horse: Acquire data across historical+recent data sets
+      dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=historical,recent --date=1969-01-01/2020-06-11
+
+    """
+
+    # Read command line options.
+    options = normalize_options(docopt(run.__doc__, version=f'dwd {__version__}'))
+
+    # Setup logging.
+    debug = options.get('debug')
+    log_level = logging.INFO
+    if debug:
+        log_level = logging.DEBUG
+    setup_logging(log_level)
+
+    if options.about:
+        about(options)
+        return
+
+    if options.stations:
+        df = metadata_for_dwd_data(
+            parameter=options.parameter,
+            time_resolution=options.resolution,
+            period_type=options.period,
+            write_file=options.persist,
+        )
+
+    elif options.readings:
+        request = DWDStationRequest(
+            station_ids=read_list(options.station),
+            # TODO: Would like to say "climate_summary" instead of "kl" here.
+            parameter=options.parameter,
+            time_resolution=options.resolution,
+            period_type=read_list(options.period),
+            humanized_column_names=True,
+        )
+        data = request.collect_data(
+            write_file=options.persist,
+            prefer_local=options.persist,
+        )
+        data = list(data)
+        if not data:
+            log.error('No data available for given constraints')
+            sys.exit(1)
+        df = pd.concat(data)
+
+    if options.readings:
+
+        # Filter by station.
+        #print(df[df['STATION_ID'] == 1048])
+
+        if options.date:
+
+            # Filter by time interval.
+            if '/' in options.date:
+                date_from, date_to = options.date.split('/')
+                date_from = parsedate(date_from)
+                date_to = parsedate(date_to)
+                df = df[(date_from <= df['DATE']) & (df['DATE'] <= date_to)]
+
+            # Filter by date.
+            else:
+                date = parsedate(options.date)
+                df = df[date == df['DATE']]
+
+    # Make column names lowercase.
+    df = df.rename(columns=str.lower)
+
+    # Output as JSON.
+    if options.format == 'json':
+        output = df.to_json(orient='records', date_format='iso', indent=4)
+
+    # Output as CSV.
+    elif options.format == 'csv':
+        output = df.to_csv(index=False, date_format='%Y-%m-%dT%H-%M-%S')
+
+    # Output as XLSX.
+    elif options.format == 'excel':
+        # TODO: Obtain output file name from command line.
+        log.info('Writing "output.xlsx"')
+        df.to_excel('output.xlsx', index=False)
+        return
+
+    else:
+        log.error('Output format must be one of "json", "csv", "excel".')
+        sys.exit(1)
+
+    print(output)
+
+
+def about(options):
+
+    def output(thing):
+        for item in thing:
+            if item:
+                print('-', item.value)
+
+    if options.parameters:
+        output(Parameter)
+
+    elif options.resolutions:
+        output(TimeResolution)
+
+    elif options.periods:
+        output(PeriodType)
+
+    else:
+        log.error('Invoke "dwd about" with one of "parameter", "resolution" or "period"')
+        sys.exit(1)
diff --git a/setup.py b/setup.py
@@ -15,5 +15,10 @@
     author_email='gutzemann@gmail.com',
     packages=['python_dwd'],  # , 'python_dwd.additionals'
     install_requires=['pandas', 'pathlib',
-                      'zipfile', 'scipy', 'numpy']
+                      'scipy', 'numpy'],
+    entry_points={
+        'console_scripts': [
+            'dwd = python_dwd.cli:run',
+        ]
+    },
 )