Skip to content

Commit

Permalink
Improve command line interface for monthly and annual data
Browse files Browse the repository at this point in the history
This adds some DWIM-magic to improve behaviour when acquiring
data from the monthly and annual time resolutions.

It is mostly about appropriate time range floor/ceil computations
but also adds the respective names for DATE_FROM and DATE_TO columns
within monthly and annual data.
  • Loading branch information
amotl committed Jun 16, 2020
1 parent 7637615 commit 233bce8
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 5 deletions.
2 changes: 1 addition & 1 deletion python_dwd/additionals/helpers.py
Expand Up @@ -300,7 +300,7 @@ def create_stationdata_dtype_mapping(columns: List[str]) -> dict:
for column in columns:
if column == DWDMetaColumns.STATION_ID.value:
stationdata_dtype_mapping[column] = int
elif column == DWDMetaColumns.DATE.value:
elif column in (DWDMetaColumns.DATE.value, DWDMetaColumns.FROM_DATE.value, DWDMetaColumns.TO_DATE.value):
stationdata_dtype_mapping[column] = "datetime64"
elif column == DWDMetaColumns.EOR.value:
stationdata_dtype_mapping[column] = str
Expand Down
40 changes: 39 additions & 1 deletion python_dwd/additionals/time_handling.py
@@ -1,7 +1,11 @@
""" date time handling functions """
from typing import Optional
from datetime import datetime
from typing import Optional, Tuple, Union
import pandas as pd
from pandas import Timestamp
from pandas.tseries.offsets import YearBegin, YearEnd, MonthBegin, MonthEnd

from python_dwd.enumerations.time_resolution_enumeration import TimeResolution


def parse_date(date_string: str) -> Optional[Timestamp]:
Expand All @@ -19,3 +23,37 @@ def parse_date(date_string: str) -> Optional[Timestamp]:
return None

return date


def mktimerange(time_resolution: TimeResolution,
date_from: Union[datetime, str],
date_to: Union[datetime, str] = None) -> Tuple[Timestamp, Timestamp]:
"""
Compute appropriate time ranges for monthly and annual time resolutions.
This takes into account to properly floor/ceil the date_from/date_to
values to respective "begin of month/year" and "end of month/year" values.
Args:
time_resolution: time resolution as enumeration
date_from: datetime string or object
date_to: datetime string or object
Returns:
Tuple of two Timestamps: "date_from" and "date_to"
"""

if date_to is None:
date_to = date_from

if time_resolution == TimeResolution.ANNUAL:
date_from = pd.to_datetime(date_from) - YearBegin(1)
date_to = pd.to_datetime(date_to) + YearEnd(1)

elif time_resolution == TimeResolution.MONTHLY:
date_from = pd.to_datetime(date_from) - MonthBegin(1)
date_to = pd.to_datetime(date_to) + MonthEnd(1)

else:
raise NotImplementedError("mktimerange only implemented for annual and monthly time ranges")

return date_from, date_to
29 changes: 27 additions & 2 deletions python_dwd/cli.py
@@ -1,13 +1,16 @@
# -*- coding: utf-8 -*-
import sys
import logging

from docopt import docopt
from dateparser import parse as parsedate
import pandas as pd

from python_dwd import __version__, metadata_for_dwd_data
from python_dwd.additionals.time_handling import mktimerange
from python_dwd.additionals.util import normalize_options, setup_logging, read_list
from python_dwd.dwd_station_request import DWDStationRequest
from python_dwd.enumerations.column_names_enumeration import DWDMetaColumns
from python_dwd.enumerations.parameter_enumeration import Parameter
from python_dwd.enumerations.period_type_enumeration import PeriodType
from python_dwd.enumerations.time_resolution_enumeration import TimeResolution
Expand Down Expand Up @@ -61,6 +64,18 @@ def run():
# The real power horse: Acquire data across historical+recent data sets
dwd readings --station=44,1048 --parameter=kl --resolution=daily --period=historical,recent --date=1969-01-01/2020-06-11
# Acquire monthly data for 2020-05
dwd readings --station=44,1048 --parameter=kl --resolution=monthly --period=recent,historical --date=2020-05
# Acquire monthly data from 2017-01 to 2019-12
dwd readings --station=44,1048 --parameter=kl --resolution=monthly --period=recent,historical --date=2017-01/2019-12
# Acquire annual data for 2019
dwd readings --station=44,1048 --parameter=kl --resolution=annual --period=recent,historical --date=2019
# Acquire annual data from 2010 to 2020
dwd readings --station=44,1048 --parameter=kl --resolution=annual --period=recent,historical --date=2010/2020
"""

# Read command line options.
Expand Down Expand Up @@ -116,12 +131,22 @@ def run():
date_from, date_to = options.date.split('/')
date_from = parsedate(date_from)
date_to = parsedate(date_to)
df = df[(date_from <= df['DATE']) & (df['DATE'] <= date_to)]
if request.time_resolution in (TimeResolution.ANNUAL, TimeResolution.MONTHLY):
date_from, date_to = mktimerange(request.time_resolution, date_from, date_to)
expression = (date_from <= df[DWDMetaColumns.FROM_DATE.value]) & (df[DWDMetaColumns.TO_DATE.value] <= date_to)
else:
expression = (date_from <= df[DWDMetaColumns.DATE.value]) & (df[DWDMetaColumns.DATE.value] <= date_to)
df = df[expression]

# Filter by date.
else:
date = parsedate(options.date)
df = df[date == df['DATE']]
if request.time_resolution in (TimeResolution.ANNUAL, TimeResolution.MONTHLY):
date_from, date_to = mktimerange(request.time_resolution, date)
expression = (date_from <= df[DWDMetaColumns.FROM_DATE.value]) & (df[DWDMetaColumns.TO_DATE.value] <= date_to)
else:
expression = (date == df[DWDMetaColumns.DATE.value])
df = df[expression]

# Make column names lowercase.
df = df.rename(columns=str.lower)
Expand Down
2 changes: 2 additions & 0 deletions python_dwd/constants/column_name_mapping.py
Expand Up @@ -7,6 +7,8 @@
DWDOrigColumns.DATE.value: DWDMetaColumns.DATE.value,
DWDOrigColumns.FROM_DATE.value: DWDMetaColumns.FROM_DATE.value,
DWDOrigColumns.TO_DATE.value: DWDMetaColumns.TO_DATE.value,
DWDOrigColumns.FROM_DATE_ALTERNATIVE.value: DWDMetaColumns.FROM_DATE.value,
DWDOrigColumns.TO_DATE_ALTERNATIVE.value: DWDMetaColumns.TO_DATE.value,
DWDOrigColumns.STATIONHEIGHT.value: DWDMetaColumns.STATIONHEIGHT.value,
DWDOrigColumns.LATITUDE.value: DWDMetaColumns.LATITUDE.value,
DWDOrigColumns.LATITUDE_ALTERNATIVE.value: DWDMetaColumns.LATITUDE.value,
Expand Down
2 changes: 2 additions & 0 deletions python_dwd/enumerations/column_names_enumeration.py
Expand Up @@ -16,6 +16,8 @@ class DWDOrigColumns(Enum):
DATE = "MESS_DATUM"
FROM_DATE = "VON_DATUM"
TO_DATE = "BIS_DATUM"
FROM_DATE_ALTERNATIVE = "MESS_DATUM_BEGINN"
TO_DATE_ALTERNATIVE = "MESS_DATUM_ENDE"
STATIONHEIGHT = "STATIONSHOEHE"
LATITUDE = "GEOBREITE"
LATITUDE_ALTERNATIVE = "GEOGR.BREITE"
Expand Down
2 changes: 1 addition & 1 deletion python_dwd/exceptions/start_date_end_date_exception.py
Expand Up @@ -2,4 +2,4 @@


class StartDateEndDateError(Exception):
print("Error: 'start_date' must be smaller or equal to 'end_date'.")
pass
30 changes: 30 additions & 0 deletions tests/additionals/test_time_handling.py
@@ -0,0 +1,30 @@
import pytest
from dateparser import parse as parsedate
from pandas import Timestamp

from python_dwd.additionals.time_handling import mktimerange
from python_dwd.enumerations.time_resolution_enumeration import TimeResolution


def test_mktimerange_annual():

assert mktimerange(TimeResolution.ANNUAL, parsedate('2019')) == \
(Timestamp('2019-01-01 00:00:00'), Timestamp('2019-12-31 00:00:00'))

assert mktimerange(TimeResolution.ANNUAL, parsedate('2010'), parsedate('2020')) == \
(Timestamp('2010-01-01 00:00:00'), Timestamp('2020-12-31 00:00:00'))


def test_mktimerange_monthly():

assert mktimerange(TimeResolution.MONTHLY, parsedate('2020-05')) == \
(Timestamp('2020-05-01 00:00:00'), Timestamp('2020-05-31 00:00:00'))

assert mktimerange(TimeResolution.MONTHLY, parsedate('2017-01'), parsedate('2019-12')) == \
(Timestamp('2017-01-01 00:00:00'), Timestamp('2019-12-31 00:00:00'))


def test_mktimerange_invalid():

with pytest.raises(NotImplementedError):
mktimerange(TimeResolution.DAILY, parsedate('2020-05-01'))

0 comments on commit 233bce8

Please sign in to comment.