Skip to content

Commit

Permalink
Coerce some columns designated as integer types
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Jul 6, 2020
1 parent e548436 commit 6e6a4f0
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 1 deletion.
9 changes: 9 additions & 0 deletions tests/test_data_collection.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" Tests for data_collection """
import mock
import numpy
import pytest
from mock import patch
from pathlib import Path
Expand Down Expand Up @@ -150,6 +151,10 @@ def test_collect_daily_vanilla():
'TGK',
]

assert isinstance(data.iloc[0]['QN_3'], numpy.int64)
assert isinstance(data.iloc[0]['QN_4'], numpy.int64)
assert isinstance(data.iloc[0]['RSKF'], numpy.int64)


@pytest.mark.remote
def test_collect_daily_humanized():
Expand Down Expand Up @@ -184,6 +189,8 @@ def test_collect_daily_humanized():
'TEMPERATURE_MIN_005',
]

assert isinstance(data.iloc[0]['PRECIPITATION_FORM'], numpy.int64)


@pytest.mark.remote
def test_collect_hourly_vanilla():
Expand All @@ -203,3 +210,5 @@ def test_collect_hourly_vanilla():
'TT_TU',
'RF_TU',
]

assert isinstance(data.iloc[0]['QN_9'], numpy.int64)
32 changes: 32 additions & 0 deletions wetterdienst/additionals/functions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
A set of more general functions used for the organization
"""
import pandas as pd
from typing import Tuple, List, Optional, Union, Callable

from wetterdienst.constants.parameter_mapping import TIME_RESOLUTION_PARAMETER_MAPPING
Expand Down Expand Up @@ -219,6 +220,37 @@ def create_station_data_dtype_mapping(columns: List[str]) -> dict:
return station_data_dtype_mapping


def coerce_integer_columns(df: pd.DataFrame):
"""
Coerce specific columns designated as Integer types.
:param df:
:return:
"""
integer_fields = [

# Different quality level fields.
'QN',
'QN_2', # Soil temperature (daily)
'QN_3', # Daily observations (daily), Wind (hourly)
'QN_4', # Daily observations (daily)
'QN_7', # Sun (hourly)
'QN_8', # Precipitation, pressure, cloudiness, visibility (hourly)
'QN_9', # Air temperature, pressure (hourly)
'QN_592', # Solar (hourly)

# Column "precipitation_form" from "daily observations".
'RSKF',
]

# In version 0.24.+ pandas has gained the ability to hold integer dtypes with missing values.
# https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
# https://stackoverflow.com/questions/21287624/convert-pandas-column-containing-nans-to-dtype-int/21290084
for integer_field in integer_fields:
if integer_field in df.columns:
df[integer_field] = df[integer_field].astype('Int64')


def cast_to_list(iterable_) -> list:
"""
A function that either converts an existing iterable to a list or simply puts the item into a list to make an
Expand Down
5 changes: 4 additions & 1 deletion wetterdienst/parsing_data/parse_data_from_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from functools import partial
from multiprocessing import Pool

from wetterdienst.additionals.functions import create_station_data_dtype_mapping
from wetterdienst.additionals.functions import create_station_data_dtype_mapping, coerce_integer_columns
from wetterdienst.constants.column_name_mapping import GERMAN_TO_ENGLISH_COLUMNS_MAPPING
from wetterdienst.constants.metadata import NA_STRING, STATION_DATA_SEP
from wetterdienst.enumerations.column_names_enumeration import DWDMetaColumns, DWDOrigColumns
Expand Down Expand Up @@ -136,4 +136,7 @@ def _parse_dwd_data(filename_and_file: Tuple[str, BytesIO],
# Coerce the data types appropriately.
data = data.astype(create_station_data_dtype_mapping(data.columns))

# Coerce Integer data types.
coerce_integer_columns(data)

return data

0 comments on commit 6e6a4f0

Please sign in to comment.