diff --git a/.gitignore b/.gitignore index f16dd4df9..e59c8d3e8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ *.code-workspace __pycache__/ dwd_data/ -.idea/ \ No newline at end of file +.idea/ +.venv* +*.egg-info diff --git a/python_dwd/additionals/util.py b/python_dwd/additionals/util.py new file mode 100644 index 000000000..250ea0207 --- /dev/null +++ b/python_dwd/additionals/util.py @@ -0,0 +1,49 @@ +""" A set of utility functions """ +import sys +import logging + +from docopt import docopt +from munch import Munch, munchify + + +def setup_logging(level=logging.INFO): + log_format = '%(asctime)-15s [%(name)-30s] %(levelname)-7s: %(message)s' + logging.basicConfig( + format=log_format, + stream=sys.stderr, + level=level) + + # Silence INFO messages from numexpr. + numexpr_logger = logging.getLogger('numexpr') + numexpr_logger.setLevel(logging.WARN) + + +def normalize_options(options): + normalized = {} + for key, value in options.items(): + + # Add primary variant. + key = key.strip('--<>') + normalized[key] = value + + # Add secondary variant. + key = key.replace('-', '_') + normalized[key] = value + + return munchify(normalized, factory=OptionMunch) + + +def read_list(data, separator=u','): + if data is None: + return [] + result = list(map(lambda x: x.strip(), data.split(separator))) + if len(result) == 1 and not result[0]: + result = [] + return result + + +class OptionMunch(Munch): + + def __setattr__(self, k, v): + super().__setattr__(k.replace('-', '_'), v) + super().__setattr__(k.replace('_', '-'), v) diff --git a/python_dwd/cli.py b/python_dwd/cli.py new file mode 100644 index 000000000..f97fda15a --- /dev/null +++ b/python_dwd/cli.py @@ -0,0 +1,169 @@ +# -*- coding: utf-8 -*- +import sys +import logging +from docopt import docopt +from dateparser import parse as parsedate +import pandas as pd + +from python_dwd import __version__, metadata_for_dwd_data +from python_dwd.additionals.util import normalize_options, setup_logging, read_list +from python_dwd.dwd_station_request import DWDStationRequest +from python_dwd.enumerations.parameter_enumeration import Parameter +from python_dwd.enumerations.period_type_enumeration import PeriodType +from python_dwd.enumerations.time_resolution_enumeration import TimeResolution + +log = logging.getLogger(__name__) + + +def run(): + """ + Usage: + dwd stations --parameter= --resolution= --period= [--persist] [--format=] + dwd readings --station= --parameter= --resolution= --period= [--persist] [--date=] [--format=] + dwd about [parameters] [resolutions] [periods] + dwd --version + dwd (-h | --help) + + Options: + --station= Comma-separated list of station identifiers + --parameter= Parameter/variable, e.g. "kl", "air_temperature", "precipitation", etc. + --resolution= Dataset resolution: "annual", "monthly", "daily", "hourly", "minute_10", "minute_1" + --period= Dataset period: "historical", "recent", "now" + --persist Save and restore data to filesystem w/o going to the network + --date= Date for filtering data. Can be either a single date(time) or + an ISO-8601 time interval, see https://en.wikipedia.org/wiki/ISO_8601#Time_intervals. + --format= Output format. [Default: json] + --version Show version information + --debug Enable debug messages + -h --help Show this screen + + + Examples: + + # Get list of stations for daily climate summary data in JSON format + dwd stations --parameter=kl --resolution=daily --period=recent + + # Get list of stations for daily climate summary data in CSV format + dwd stations --parameter=kl --resolution=daily --period=recent --format=csv + + # Get daily climate summary data for stations 44 and 1048 + dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent + + # Optionally save/restore to/from disk in order to avoid asking upstream servers each time + dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --persist + + # Limit output to specific date + dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --date=2020-05-01 + + # Limit output to specified date range in ISO-8601 time interval format + dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --date=2020-05-01/2020-05-05 + + # The real power horse: Acquire data across historical+recent data sets + dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=historical,recent --date=1969-01-01/2020-06-11 + + """ + + # Read command line options. + options = normalize_options(docopt(run.__doc__, version=f'dwd {__version__}')) + + # Setup logging. + debug = options.get('debug') + log_level = logging.INFO + if debug: + log_level = logging.DEBUG + setup_logging(log_level) + + if options.about: + about(options) + return + + if options.stations: + df = metadata_for_dwd_data( + parameter=options.parameter, + time_resolution=options.resolution, + period_type=options.period, + write_file=options.persist, + ) + + elif options.readings: + request = DWDStationRequest( + station_ids=read_list(options.station), + # TODO: Would like to say "climate_summary" instead of "kl" here. + parameter=options.parameter, + time_resolution=options.resolution, + period_type=read_list(options.period), + humanized_column_names=True, + ) + data = request.collect_data( + write_file=options.persist, + prefer_local=options.persist, + ) + data = list(data) + if not data: + log.error('No data available for given constraints') + sys.exit(1) + df = pd.concat(data) + + if options.readings: + + # Filter by station. + #print(df[df['STATION_ID'] == 1048]) + + if options.date: + + # Filter by time interval. + if '/' in options.date: + date_from, date_to = options.date.split('/') + date_from = parsedate(date_from) + date_to = parsedate(date_to) + df = df[(date_from <= df['DATE']) & (df['DATE'] <= date_to)] + + # Filter by date. + else: + date = parsedate(options.date) + df = df[date == df['DATE']] + + # Make column names lowercase. + df = df.rename(columns=str.lower) + + # Output as JSON. + if options.format == 'json': + output = df.to_json(orient='records', date_format='iso', indent=4) + + # Output as CSV. + elif options.format == 'csv': + output = df.to_csv(index=False, date_format='%Y-%m-%dT%H-%M-%S') + + # Output as XLSX. + elif options.format == 'excel': + # TODO: Obtain output file name from command line. + log.info('Writing "output.xlsx"') + df.to_excel('output.xlsx', index=False) + return + + else: + log.error('Output format must be one of "json", "csv", "excel".') + sys.exit(1) + + print(output) + + +def about(options): + + def output(thing): + for item in thing: + if item: + print('-', item.value) + + if options.parameters: + output(Parameter) + + elif options.resolutions: + output(TimeResolution) + + elif options.periods: + output(PeriodType) + + else: + log.error('Invoke "dwd about" with one of "parameter", "resolution" or "period"') + sys.exit(1) diff --git a/setup.py b/setup.py index 4aa67c56c..ad1561b7d 100644 --- a/setup.py +++ b/setup.py @@ -15,5 +15,10 @@ author_email='gutzemann@gmail.com', packages=['python_dwd'], # , 'python_dwd.additionals' install_requires=['pandas', 'pathlib', - 'zipfile', 'scipy', 'numpy'] + 'scipy', 'numpy'], + entry_points={ + 'console_scripts': [ + 'dwd = python_dwd.cli:run', + ] + }, )