Skip to content

Commit

Permalink
Coerce some columns designated as integer types
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Jul 6, 2020
1 parent e548436 commit 2652feb
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 1 deletion.
27 changes: 27 additions & 0 deletions wetterdienst/additionals/functions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
A set of more general functions used for the organization
"""
import pandas as pd
from typing import Tuple, List, Optional, Union, Callable

from wetterdienst.constants.parameter_mapping import TIME_RESOLUTION_PARAMETER_MAPPING
Expand Down Expand Up @@ -219,6 +220,32 @@ def create_station_data_dtype_mapping(columns: List[str]) -> dict:
return station_data_dtype_mapping


def coerce_integer_columns(df: pd.DataFrame):
"""
Coerce columns designated as Integer types.
:param df:
:return:
"""
integer_fields = [

# Different fields designating quality levels.
'QN',
'QN_3',
'QN_4',

# "precipitation_form" from "daily observations"
'RSKF',
]

# In version 0.24.+ pandas has gained the ability to hold integer dtypes with missing values.
# https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
# https://stackoverflow.com/questions/21287624/convert-pandas-column-containing-nans-to-dtype-int/21290084
for integer_field in integer_fields:
if integer_field in df.columns:
df[integer_field] = df[integer_field].astype('Int64')


def cast_to_list(iterable_) -> list:
"""
A function that either converts an existing iterable to a list or simply puts the item into a list to make an
Expand Down
5 changes: 4 additions & 1 deletion wetterdienst/parsing_data/parse_data_from_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from functools import partial
from multiprocessing import Pool

from wetterdienst.additionals.functions import create_station_data_dtype_mapping
from wetterdienst.additionals.functions import create_station_data_dtype_mapping, coerce_integer_columns
from wetterdienst.constants.column_name_mapping import GERMAN_TO_ENGLISH_COLUMNS_MAPPING
from wetterdienst.constants.metadata import NA_STRING, STATION_DATA_SEP
from wetterdienst.enumerations.column_names_enumeration import DWDMetaColumns, DWDOrigColumns
Expand Down Expand Up @@ -136,4 +136,7 @@ def _parse_dwd_data(filename_and_file: Tuple[str, BytesIO],
# Coerce the data types appropriately.
data = data.astype(create_station_data_dtype_mapping(data.columns))

# Coerce Integer data types.
coerce_integer_columns(data)

return data

0 comments on commit 2652feb

Please sign in to comment.