Skip to content

Commit

Permalink
Add command line interface module "cli.py"
Browse files Browse the repository at this point in the history
This adds an entrypoint program "dwd" which can be used
to explore the features of this library on the command line.
  • Loading branch information
amotl committed Jun 16, 2020
1 parent 94e9a2e commit 4809e0a
Show file tree
Hide file tree
Showing 4 changed files with 227 additions and 2 deletions.
4 changes: 3 additions & 1 deletion .gitignore
@@ -1,4 +1,6 @@
*.code-workspace
__pycache__/
dwd_data/
.idea/
.idea/
.venv*
*.egg-info
49 changes: 49 additions & 0 deletions python_dwd/additionals/util.py
@@ -0,0 +1,49 @@
""" A set of utility functions """
import sys
import logging

from docopt import docopt
from munch import Munch, munchify


def setup_logging(level=logging.INFO):
log_format = '%(asctime)-15s [%(name)-30s] %(levelname)-7s: %(message)s'
logging.basicConfig(
format=log_format,
stream=sys.stderr,
level=level)

# Silence INFO messages from numexpr.
numexpr_logger = logging.getLogger('numexpr')
numexpr_logger.setLevel(logging.WARN)


def normalize_options(options):
normalized = {}
for key, value in options.items():

# Add primary variant.
key = key.strip('--<>')
normalized[key] = value

# Add secondary variant.
key = key.replace('-', '_')
normalized[key] = value

return munchify(normalized, factory=OptionMunch)


def read_list(data, separator=u','):
if data is None:
return []
result = list(map(lambda x: x.strip(), data.split(separator)))
if len(result) == 1 and not result[0]:
result = []
return result


class OptionMunch(Munch):

def __setattr__(self, k, v):
super().__setattr__(k.replace('-', '_'), v)
super().__setattr__(k.replace('_', '-'), v)
169 changes: 169 additions & 0 deletions python_dwd/cli.py
@@ -0,0 +1,169 @@
# -*- coding: utf-8 -*-
import sys
import logging
from docopt import docopt
from dateparser import parse as parsedate
import pandas as pd

from python_dwd import __version__, metadata_for_dwd_data
from python_dwd.additionals.util import normalize_options, setup_logging, read_list
from python_dwd.dwd_station_request import DWDStationRequest
from python_dwd.enumerations.parameter_enumeration import Parameter
from python_dwd.enumerations.period_type_enumeration import PeriodType
from python_dwd.enumerations.time_resolution_enumeration import TimeResolution

log = logging.getLogger(__name__)


def run():
"""
Usage:
dwd stations --parameter=<parameter> --resolution=<resolution> --period=<period> [--persist] [--format=<format>]
dwd readings --station=<station> --parameter=<parameter> --resolution=<resolution> --period=<period> [--persist] [--date=<date>] [--format=<format>]
dwd about [parameters] [resolutions] [periods]
dwd --version
dwd (-h | --help)
Options:
--station=<station> Comma-separated list of station identifiers
--parameter=<parameter> Parameter/variable, e.g. "kl", "air_temperature", "precipitation", etc.
--resolution=<resolution> Dataset resolution: "annual", "monthly", "daily", "hourly", "minute_10", "minute_1"
--period=<period> Dataset period: "historical", "recent", "now"
--persist Save and restore data to filesystem w/o going to the network
--date=<date> Date for filtering data. Can be either a single date(time) or
an ISO-8601 time interval, see https://en.wikipedia.org/wiki/ISO_8601#Time_intervals.
--format=<format> Output format. [Default: json]
--version Show version information
--debug Enable debug messages
-h --help Show this screen
Examples:
# Get list of stations for daily climate summary data in JSON format
dwd stations --parameter=kl --resolution=daily --period=recent
# Get list of stations for daily climate summary data in CSV format
dwd stations --parameter=kl --resolution=daily --period=recent --format=csv
# Get daily climate summary data for stations 44 and 1048
dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent
# Optionally save/restore to/from disk in order to avoid asking upstream servers each time
dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --persist
# Limit output to specific date
dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --date=2020-05-01
# Limit output to specified date range in ISO-8601 time interval format
dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --date=2020-05-01/2020-05-05
# The real power horse: Acquire data across historical+recent data sets
dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=historical,recent --date=1969-01-01/2020-06-11
"""

# Read command line options.
options = normalize_options(docopt(run.__doc__, version=f'dwd {__version__}'))

# Setup logging.
debug = options.get('debug')
log_level = logging.INFO
if debug:
log_level = logging.DEBUG
setup_logging(log_level)

if options.about:
about(options)
return

if options.stations:
df = metadata_for_dwd_data(
parameter=options.parameter,
time_resolution=options.resolution,
period_type=options.period,
write_file=options.persist,
)

elif options.readings:
request = DWDStationRequest(
station_ids=read_list(options.station),
# TODO: Would like to say "climate_summary" instead of "kl" here.
parameter=options.parameter,
time_resolution=options.resolution,
period_type=read_list(options.period),
humanized_column_names=True,
)
data = request.collect_data(
write_file=options.persist,
prefer_local=options.persist,
)
data = list(data)
if not data:
log.error('No data available for given constraints')
sys.exit(1)
df = pd.concat(data)

if options.readings:

# Filter by station.
#print(df[df['STATION_ID'] == 1048])

if options.date:

# Filter by time interval.
if '/' in options.date:
date_from, date_to = options.date.split('/')
date_from = parsedate(date_from)
date_to = parsedate(date_to)
df = df[(date_from <= df['DATE']) & (df['DATE'] <= date_to)]

# Filter by date.
else:
date = parsedate(options.date)
df = df[date == df['DATE']]

# Make column names lowercase.
df = df.rename(columns=str.lower)

# Output as JSON.
if options.format == 'json':
output = df.to_json(orient='records', date_format='iso', indent=4)

# Output as CSV.
elif options.format == 'csv':
output = df.to_csv(index=False, date_format='%Y-%m-%dT%H-%M-%S')

# Output as XLSX.
elif options.format == 'excel':
# TODO: Obtain output file name from command line.
log.info('Writing "output.xlsx"')
df.to_excel('output.xlsx', index=False)
return

else:
log.error('Output format must be one of "json", "csv", "excel".')
sys.exit(1)

print(output)


def about(options):

def output(thing):
for item in thing:
if item:
print('-', item.value)

if options.parameters:
output(Parameter)

elif options.resolutions:
output(TimeResolution)

elif options.periods:
output(PeriodType)

else:
log.error('Invoke "dwd about" with one of "parameter", "resolution" or "period"')
sys.exit(1)
7 changes: 6 additions & 1 deletion setup.py
Expand Up @@ -15,5 +15,10 @@
author_email='gutzemann@gmail.com',
packages=['python_dwd'], # , 'python_dwd.additionals'
install_requires=['pandas', 'pathlib',
'zipfile', 'scipy', 'numpy']
'scipy', 'numpy'],
entry_points={
'console_scripts': [
'dwd = python_dwd.cli:run',
]
},
)

0 comments on commit 4809e0a

Please sign in to comment.