Skip to content

Commit

Permalink
Coerce some columns designated as integer types
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Jul 13, 2020
1 parent 3eea1b3 commit ff2c36d
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 0 deletions.
9 changes: 9 additions & 0 deletions tests/test_data_collection.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" Tests for data_collection """
import mock
import numpy
import pytest
from mock import patch
from pathlib import Path
Expand Down Expand Up @@ -146,6 +147,10 @@ def test_collect_daily_vanilla():
'TGK',
]

assert isinstance(data.iloc[0]['QN_3'], numpy.int64)
assert isinstance(data.iloc[0]['QN_4'], numpy.int64)
assert isinstance(data.iloc[0]['RSKF'], numpy.int64)


@pytest.mark.remote
def test_collect_daily_humanized():
Expand Down Expand Up @@ -180,6 +185,8 @@ def test_collect_daily_humanized():
'TEMPERATURE_MIN_005',
]

assert isinstance(data.iloc[0]['PRECIPITATION_FORM'], numpy.int64)


@pytest.mark.remote
def test_collect_hourly_vanilla():
Expand All @@ -199,3 +206,5 @@ def test_collect_hourly_vanilla():
'TT_TU',
'RF_TU',
]

assert isinstance(data.iloc[0]['QN_9'], numpy.int64)
36 changes: 36 additions & 0 deletions wetterdienst/additionals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,45 @@ def coerce_column_types(df: pd.DataFrame,
else:
df[column] = df[column].astype(float)

# FIXME: Should be based on the upcoming knowledgebase subsystem.
coerce_integer_columns(df)

return df


def coerce_integer_columns(df: pd.DataFrame):
"""
Coerce specific columns designated as Integer types.
FIXME: Should be based on the upcoming knowledgebase subsystem.
:param df:
:return:
"""
integer_fields = [

# Different quality level fields.
'QN',
'QN_2', # Soil temperature (daily)
'QN_3', # Daily observations (daily), Wind (hourly)
'QN_4', # Daily observations (daily)
'QN_7', # Sun (hourly)
'QN_8', # Precipitation, pressure, cloudiness, visibility (hourly)
'QN_9', # Air temperature, pressure (hourly)
'QN_592', # Solar (hourly)

# Column "precipitation_form" from "daily observations".
'RSKF',
]

# In version 0.24.+ pandas has gained the ability to hold integer dtypes with missing values.
# https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
# https://stackoverflow.com/questions/21287624/convert-pandas-column-containing-nans-to-dtype-int/21290084
for integer_field in integer_fields:
if integer_field in df.columns:
df[integer_field] = df[integer_field].astype('Int64')


def cast_to_list(iterable_) -> list:
"""
A function that either converts an existing iterable to a list or simply puts the item into a list to make an
Expand Down

0 comments on commit ff2c36d

Please sign in to comment.