Skip to content

Commit

Permalink
Merge pull request earthobservations#55 from panodata/cli-ng
Browse files Browse the repository at this point in the history
Add command line interface
  • Loading branch information
gutzbenj committed Jun 16, 2020
2 parents 4c5f85d + 4809e0a commit d6a8ab5
Show file tree
Hide file tree
Showing 19 changed files with 484 additions and 94 deletions.
4 changes: 3 additions & 1 deletion .gitignore
@@ -1,4 +1,6 @@
*.code-workspace
__pycache__/
dwd_data/
.idea/
.idea/
.venv*
*.egg-info
4 changes: 4 additions & 0 deletions pytest.ini
@@ -0,0 +1,4 @@
[pytest]

markers =
remote: Tests accessing the internet.
26 changes: 13 additions & 13 deletions python_dwd/additionals/helpers.py
Expand Up @@ -14,7 +14,7 @@
from python_dwd.constants.metadata import METADATA_COLUMNS, METADATA_MATCHSTRINGS, FILELIST_NAME, FTP_METADATA_NAME, \
ARCHIVE_FORMAT, DATA_FORMAT, METADATA_FIXED_COLUMN_WIDTH, STATIONDATA_SEP, NA_STRING, TRIES_TO_DOWNLOAD_FILE, \
STATID_REGEX, METADATA_1MIN_GEO_PREFIX, METADATA_1MIN_PAR_PREFIX
from python_dwd.enumerations.column_names_enumeration import DWDColumns
from python_dwd.enumerations.column_names_enumeration import DWDMetaColumns
from python_dwd.download.download_services import create_remote_file_name
from python_dwd.download.ftp_handling import FTP
from python_dwd.enumerations.parameter_enumeration import Parameter
Expand Down Expand Up @@ -120,7 +120,7 @@ def metaindex_for_1minute_data(parameter: Parameter,

metaindex_df = metaindex_df.astype(METADATA_DTYPE_MAPPING)

return metaindex_df.sort_values(DWDColumns.STATION_ID.value).reset_index(drop=True)
return metaindex_df.sort_values(DWDMetaColumns.STATION_ID.value).reset_index(drop=True)


def download_metadata_file_for_1minute_data(metadatafile: str) -> BytesIO:
Expand Down Expand Up @@ -174,10 +174,10 @@ def combine_geo_and_par_file_to_metadata_df(metadata_file_and_statid: Tuple[Byte
metadata_par_df = metadata_par_df.rename(columns=str.upper).rename(columns=GERMAN_TO_ENGLISH_COLUMNS_MAPPING)

metadata_geo_df = metadata_geo_df.iloc[[-1], :]
metadata_par_df = metadata_par_df.loc[:, [DWDColumns.FROM_DATE.value, DWDColumns.TO_DATE.value]].dropna()
metadata_par_df = metadata_par_df.loc[:, [DWDMetaColumns.FROM_DATE.value, DWDMetaColumns.TO_DATE.value]].dropna()

metadata_geo_df[DWDColumns.FROM_DATE.value] = metadata_par_df[DWDColumns.FROM_DATE.value].min()
metadata_geo_df[DWDColumns.TO_DATE.value] = metadata_par_df[DWDColumns.TO_DATE.value].max()
metadata_geo_df[DWDMetaColumns.FROM_DATE.value] = metadata_par_df[DWDMetaColumns.FROM_DATE.value].min()
metadata_geo_df[DWDMetaColumns.TO_DATE.value] = metadata_par_df[DWDMetaColumns.TO_DATE.value].max()

return metadata_geo_df.reindex(columns=METADATA_COLUMNS)

Expand Down Expand Up @@ -236,27 +236,27 @@ def create_fileindex(parameter: Parameter,
f"{str(e)}")

files_server = pd.DataFrame(files_server,
columns=[DWDColumns.FILENAME.value],
columns=[DWDMetaColumns.FILENAME.value],
dtype='str')

files_server.loc[:, DWDColumns.FILENAME.value] = files_server.loc[:, DWDColumns.FILENAME.value].apply(
files_server.loc[:, DWDMetaColumns.FILENAME.value] = files_server.loc[:, DWDMetaColumns.FILENAME.value].apply(
lambda filename: filename.replace(DWD_PATH + '/', ''))

files_server = files_server[files_server.FILENAME.str.contains(
ARCHIVE_FORMAT)]

files_server.loc[:, DWDColumns.FILEID.value] = files_server.index
files_server.loc[:, DWDMetaColumns.FILEID.value] = files_server.index

file_names = files_server.iloc[:, 0].str.split("/").apply(
lambda string: string[-1])

files_server.loc[:, DWDColumns.STATION_ID.value] = file_names.apply(lambda x: re.findall(STATID_REGEX, x).pop(0))
files_server.loc[:, DWDMetaColumns.STATION_ID.value] = file_names.apply(lambda x: re.findall(STATID_REGEX, x).pop(0))

files_server = files_server.iloc[:, [1, 2, 0]]

files_server.iloc[:, 1] = files_server.iloc[:, 1].astype(int)

files_server = files_server.sort_values(by=[DWDColumns.STATION_ID.value])
files_server = files_server.sort_values(by=[DWDMetaColumns.STATION_ID.value])

remove_old_file(file_type=FILELIST_NAME,
parameter=parameter,
Expand Down Expand Up @@ -293,11 +293,11 @@ def create_stationdata_dtype_mapping(columns: List[str]) -> dict:
""" Possible columns: STATION_ID, DATETIME, EOR, QN_ and other, measured values like rainfall """

for column in columns:
if column == DWDColumns.STATION_ID.value:
if column == DWDMetaColumns.STATION_ID.value:
stationdata_dtype_mapping[column] = int
elif column == DWDColumns.DATE.value:
elif column == DWDMetaColumns.DATE.value:
stationdata_dtype_mapping[column] = "datetime64"
elif column == DWDColumns.EOR.value:
elif column == DWDMetaColumns.EOR.value:
stationdata_dtype_mapping[column] = str
else:
stationdata_dtype_mapping[column] = float
Expand Down
49 changes: 49 additions & 0 deletions python_dwd/additionals/util.py
@@ -0,0 +1,49 @@
""" A set of utility functions """
import sys
import logging

from docopt import docopt
from munch import Munch, munchify


def setup_logging(level=logging.INFO):
log_format = '%(asctime)-15s [%(name)-30s] %(levelname)-7s: %(message)s'
logging.basicConfig(
format=log_format,
stream=sys.stderr,
level=level)

# Silence INFO messages from numexpr.
numexpr_logger = logging.getLogger('numexpr')
numexpr_logger.setLevel(logging.WARN)


def normalize_options(options):
normalized = {}
for key, value in options.items():

# Add primary variant.
key = key.strip('--<>')
normalized[key] = value

# Add secondary variant.
key = key.replace('-', '_')
normalized[key] = value

return munchify(normalized, factory=OptionMunch)


def read_list(data, separator=u','):
if data is None:
return []
result = list(map(lambda x: x.strip(), data.split(separator)))
if len(result) == 1 and not result[0]:
result = []
return result


class OptionMunch(Munch):

def __setattr__(self, k, v):
super().__setattr__(k.replace('-', '_'), v)
super().__setattr__(k.replace('_', '-'), v)
169 changes: 169 additions & 0 deletions python_dwd/cli.py
@@ -0,0 +1,169 @@
# -*- coding: utf-8 -*-
import sys
import logging
from docopt import docopt
from dateparser import parse as parsedate
import pandas as pd

from python_dwd import __version__, metadata_for_dwd_data
from python_dwd.additionals.util import normalize_options, setup_logging, read_list
from python_dwd.dwd_station_request import DWDStationRequest
from python_dwd.enumerations.parameter_enumeration import Parameter
from python_dwd.enumerations.period_type_enumeration import PeriodType
from python_dwd.enumerations.time_resolution_enumeration import TimeResolution

log = logging.getLogger(__name__)


def run():
"""
Usage:
dwd stations --parameter=<parameter> --resolution=<resolution> --period=<period> [--persist] [--format=<format>]
dwd readings --station=<station> --parameter=<parameter> --resolution=<resolution> --period=<period> [--persist] [--date=<date>] [--format=<format>]
dwd about [parameters] [resolutions] [periods]
dwd --version
dwd (-h | --help)
Options:
--station=<station> Comma-separated list of station identifiers
--parameter=<parameter> Parameter/variable, e.g. "kl", "air_temperature", "precipitation", etc.
--resolution=<resolution> Dataset resolution: "annual", "monthly", "daily", "hourly", "minute_10", "minute_1"
--period=<period> Dataset period: "historical", "recent", "now"
--persist Save and restore data to filesystem w/o going to the network
--date=<date> Date for filtering data. Can be either a single date(time) or
an ISO-8601 time interval, see https://en.wikipedia.org/wiki/ISO_8601#Time_intervals.
--format=<format> Output format. [Default: json]
--version Show version information
--debug Enable debug messages
-h --help Show this screen
Examples:
# Get list of stations for daily climate summary data in JSON format
dwd stations --parameter=kl --resolution=daily --period=recent
# Get list of stations for daily climate summary data in CSV format
dwd stations --parameter=kl --resolution=daily --period=recent --format=csv
# Get daily climate summary data for stations 44 and 1048
dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent
# Optionally save/restore to/from disk in order to avoid asking upstream servers each time
dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --persist
# Limit output to specific date
dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --date=2020-05-01
# Limit output to specified date range in ISO-8601 time interval format
dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=recent --date=2020-05-01/2020-05-05
# The real power horse: Acquire data across historical+recent data sets
dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=historical,recent --date=1969-01-01/2020-06-11
"""

# Read command line options.
options = normalize_options(docopt(run.__doc__, version=f'dwd {__version__}'))

# Setup logging.
debug = options.get('debug')
log_level = logging.INFO
if debug:
log_level = logging.DEBUG
setup_logging(log_level)

if options.about:
about(options)
return

if options.stations:
df = metadata_for_dwd_data(
parameter=options.parameter,
time_resolution=options.resolution,
period_type=options.period,
write_file=options.persist,
)

elif options.readings:
request = DWDStationRequest(
station_ids=read_list(options.station),
# TODO: Would like to say "climate_summary" instead of "kl" here.
parameter=options.parameter,
time_resolution=options.resolution,
period_type=read_list(options.period),
humanized_column_names=True,
)
data = request.collect_data(
write_file=options.persist,
prefer_local=options.persist,
)
data = list(data)
if not data:
log.error('No data available for given constraints')
sys.exit(1)
df = pd.concat(data)

if options.readings:

# Filter by station.
#print(df[df['STATION_ID'] == 1048])

if options.date:

# Filter by time interval.
if '/' in options.date:
date_from, date_to = options.date.split('/')
date_from = parsedate(date_from)
date_to = parsedate(date_to)
df = df[(date_from <= df['DATE']) & (df['DATE'] <= date_to)]

# Filter by date.
else:
date = parsedate(options.date)
df = df[date == df['DATE']]

# Make column names lowercase.
df = df.rename(columns=str.lower)

# Output as JSON.
if options.format == 'json':
output = df.to_json(orient='records', date_format='iso', indent=4)

# Output as CSV.
elif options.format == 'csv':
output = df.to_csv(index=False, date_format='%Y-%m-%dT%H-%M-%S')

# Output as XLSX.
elif options.format == 'excel':
# TODO: Obtain output file name from command line.
log.info('Writing "output.xlsx"')
df.to_excel('output.xlsx', index=False)
return

else:
log.error('Output format must be one of "json", "csv", "excel".')
sys.exit(1)

print(output)


def about(options):

def output(thing):
for item in thing:
if item:
print('-', item.value)

if options.parameters:
output(Parameter)

elif options.resolutions:
output(TimeResolution)

elif options.periods:
output(PeriodType)

else:
log.error('Invoke "dwd about" with one of "parameter", "resolution" or "period"')
sys.exit(1)
58 changes: 38 additions & 20 deletions python_dwd/constants/column_name_mapping.py
@@ -1,28 +1,46 @@
""" mapping from german column names to english column names"""
from numpy import datetime64
from python_dwd.enumerations.column_names_enumeration import DWDOrigColumns, DWDColumns
from python_dwd.enumerations.column_names_enumeration import DWDOrigColumns, DWDMetaColumns, DWDDataColumns

GERMAN_TO_ENGLISH_COLUMNS_MAPPING = {
DWDOrigColumns.STATION_ID.value: DWDColumns.STATION_ID.value,
DWDOrigColumns.DATE.value: DWDColumns.DATE.value,
DWDOrigColumns.FROM_DATE.value: DWDColumns.FROM_DATE.value,
DWDOrigColumns.TO_DATE.value: DWDColumns.TO_DATE.value,
DWDOrigColumns.STATIONHEIGHT.value: DWDColumns.STATIONHEIGHT.value,
DWDOrigColumns.LATITUDE.value: DWDColumns.LATITUDE.value,
DWDOrigColumns.LATITUDE_ALTERNATIVE.value: DWDColumns.LATITUDE.value,
DWDOrigColumns.LONGITUDE.value: DWDColumns.LONGITUDE.value,
DWDOrigColumns.LONGITUDE_ALTERNATIVE.value: DWDColumns.LONGITUDE.value,
DWDOrigColumns.STATIONNAME.value: DWDColumns.STATIONNAME.value,
DWDOrigColumns.STATE.value: DWDColumns.STATE.value
DWDOrigColumns.STATION_ID.value: DWDMetaColumns.STATION_ID.value,
DWDOrigColumns.DATE.value: DWDMetaColumns.DATE.value,
DWDOrigColumns.FROM_DATE.value: DWDMetaColumns.FROM_DATE.value,
DWDOrigColumns.TO_DATE.value: DWDMetaColumns.TO_DATE.value,
DWDOrigColumns.STATIONHEIGHT.value: DWDMetaColumns.STATIONHEIGHT.value,
DWDOrigColumns.LATITUDE.value: DWDMetaColumns.LATITUDE.value,
DWDOrigColumns.LATITUDE_ALTERNATIVE.value: DWDMetaColumns.LATITUDE.value,
DWDOrigColumns.LONGITUDE.value: DWDMetaColumns.LONGITUDE.value,
DWDOrigColumns.LONGITUDE_ALTERNATIVE.value: DWDMetaColumns.LONGITUDE.value,
DWDOrigColumns.STATIONNAME.value: DWDMetaColumns.STATIONNAME.value,
DWDOrigColumns.STATE.value: DWDMetaColumns.STATE.value,
}

GERMAN_TO_ENGLISH_COLUMNS_MAPPING_HUMANIZED = {
# Daily climate summary
DWDOrigColumns.FX.value: DWDDataColumns.FX.value,
DWDOrigColumns.FM.value: DWDDataColumns.FM.value,
DWDOrigColumns.RSK.value: DWDDataColumns.RSK.value,
DWDOrigColumns.RSKF.value: DWDDataColumns.RSKF.value,
DWDOrigColumns.SDK.value: DWDDataColumns.SDK.value,
DWDOrigColumns.SHK_TAG.value: DWDDataColumns.SHK_TAG.value,
DWDOrigColumns.NM.value: DWDDataColumns.NM.value,
DWDOrigColumns.VPM.value: DWDDataColumns.VPM.value,
DWDOrigColumns.PM.value: DWDDataColumns.PM.value,
DWDOrigColumns.TMK.value: DWDDataColumns.TMK.value,
DWDOrigColumns.UPM.value: DWDDataColumns.UPM.value,
DWDOrigColumns.TXK.value: DWDDataColumns.TXK.value,
DWDOrigColumns.TNK.value: DWDDataColumns.TNK.value,
DWDOrigColumns.TGK.value: DWDDataColumns.TGK.value,
}

METADATA_DTYPE_MAPPING = {
DWDColumns.STATION_ID.value: int,
DWDColumns.FROM_DATE.value: datetime64,
DWDColumns.TO_DATE.value: datetime64,
DWDColumns.STATIONHEIGHT.value: float,
DWDColumns.LATITUDE.value: float,
DWDColumns.LONGITUDE.value: float,
DWDColumns.STATIONNAME.value: str,
DWDColumns.STATE.value: str
DWDMetaColumns.STATION_ID.value: int,
DWDMetaColumns.FROM_DATE.value: datetime64,
DWDMetaColumns.TO_DATE.value: datetime64,
DWDMetaColumns.STATIONHEIGHT.value: float,
DWDMetaColumns.LATITUDE.value: float,
DWDMetaColumns.LONGITUDE.value: float,
DWDMetaColumns.STATIONNAME.value: str,
DWDMetaColumns.STATE.value: str
}

0 comments on commit d6a8ab5

Please sign in to comment.