Skip to content

Commit

Permalink
Properly handle timestamps from "hourly" resolution data set.
Browse files Browse the repository at this point in the history
Hourly data has a timestamp format of e.g. "2018121300".
So, let's parse it using a pattern like "%Y%m%d%H".
  • Loading branch information
amotl committed Jun 16, 2020
1 parent d2ebabb commit 67b88ce
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
4 changes: 4 additions & 0 deletions python_dwd/additionals/helpers.py
Expand Up @@ -303,3 +303,7 @@ def create_stationdata_dtype_mapping(columns: List[str]) -> dict:
stationdata_dtype_mapping[column] = float

return stationdata_dtype_mapping


def convert_datetime_hourly(value):
return pd.to_datetime(value, format='%Y%m%d%H')
6 changes: 5 additions & 1 deletion python_dwd/parsing_data/parse_data_from_files.py
Expand Up @@ -4,9 +4,10 @@
from io import BytesIO
import pandas as pd

from python_dwd.additionals.helpers import create_stationdata_dtype_mapping
from python_dwd.additionals.helpers import create_stationdata_dtype_mapping, convert_datetime_hourly
from python_dwd.constants.column_name_mapping import GERMAN_TO_ENGLISH_COLUMNS_MAPPING
from python_dwd.constants.metadata import NA_STRING, STATIONDATA_SEP
from python_dwd.enumerations.column_names_enumeration import DWDMetaColumns

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -78,6 +79,9 @@ def _parse_dwd_data(filename_and_file: Tuple[str, BytesIO]) -> pd.DataFrame:
# Assign meaningful column names (baseline).
data = data.rename(columns=GERMAN_TO_ENGLISH_COLUMNS_MAPPING)

# Properly handle timestamps from "hourly" resolution.
data[DWDMetaColumns.DATE.value] = data[DWDMetaColumns.DATE.value].apply(convert_datetime_hourly)

# Coerce the data types appropriately.
data = data.astype(create_stationdata_dtype_mapping(data.columns))

Expand Down

0 comments on commit 67b88ce

Please sign in to comment.