Finish implementation of new methods plus testing

panodata · Jun 10, 2020 · 0c7dded · 0c7dded
1 parent 527ba8f
commit 0c7dded
Show file tree

Hide file tree

Showing 6 changed files with 200 additions and 81 deletions.
diff --git a/python_dwd/data_collection.py b/python_dwd/data_collection.py
@@ -58,6 +58,8 @@ def collect_dwd_data(station_ids: List[int],
         remote_files = create_file_list_for_dwd_server(
             station_ids, parameter, time_resolution, period_type, folder, create_new_filelist)
 
+        print(remote_files)
+
         filenames_and_files = download_dwd_data(remote_files, parallel_download)
 
         station_data = parse_dwd_data(filenames_and_files)

diff --git a/python_dwd/data_storing.py b/python_dwd/data_storing.py
@@ -1,6 +1,6 @@
 """ Data storing/restoring methods"""
 from pathlib import Path
-from typing import Tuple
+from typing import Tuple, Union
 import pandas as pd
 
 from python_dwd.additionals.helpers import create_stationdata_dtype_mapping
@@ -80,11 +80,10 @@ def restore_dwd_data(station_id: int,
         f"{STATIONDATA_NAME}{H5_FORMAT}"
 
     try:
-        station_data = pd.DataFrame(
-            pd.read_hdf(
-                path_or_buf=local_filepath,
-                key=request_string
-            )
+        # typing required as pandas.read_hdf returns an object by typing
+        station_data: Union[object, pd.DataFrame] = pd.read_hdf(
+            path_or_buf=local_filepath,
+            key=request_string
         )
     except FileNotFoundError:
         print(f"Error: There seems to be no file at {str(local_filepath)}. "
@@ -99,7 +98,7 @@ def restore_dwd_data(station_id: int,
     return True, station_data.astype(create_stationdata_dtype_mapping(station_data.columns))
 
 
-def _build_local_store_key(station_id: int,
+def _build_local_store_key(station_id: Union[str, int],
                            parameter: Parameter,
                            time_resolution: TimeResolution,
                            period_type: PeriodType) -> str:
@@ -116,6 +115,6 @@ def _build_local_store_key(station_id: int,
         a string building a key that is used to identify the request
     """
     request_string = f"{parameter.value}/{time_resolution.value}/" \
-                     f"{period_type.value}/{station_id}"
+                     f"{period_type.value}/station_id_{int(station_id)}"
 
     return request_string
diff --git a/python_dwd/file_path_handling/file_list_creation.py b/python_dwd/file_path_handling/file_list_creation.py
@@ -1,9 +1,9 @@
 """ file list creation for requested files """
 from pathlib import Path
-from typing import List, Union
+from typing import List
 import pandas as pd
 
-from python_dwd.additionals.functions import check_parameters, cast_to_list
+from python_dwd.additionals.functions import check_parameters
 from python_dwd.file_path_handling.path_handling import correct_folder_path
 from python_dwd.additionals.helpers import create_fileindex
 from python_dwd.constants.access_credentials import DWD_FOLDER_MAIN, DWD_FOLDER_METADATA
@@ -14,7 +14,7 @@
 from python_dwd.enumerations.time_resolution_enumeration import TimeResolution
 
 
-def create_file_list_for_dwd_server(station_id: Union[str, int, List[int]],
+def create_file_list_for_dwd_server(station_ids: List[int],
                                     parameter: Parameter,
                                     time_resolution: TimeResolution,
                                     period_type: PeriodType,
@@ -27,7 +27,7 @@ def create_file_list_for_dwd_server(station_id: Union[str, int, List[int]],
     available online.
 
     Args:
-        station_id: id(s) for the weather station to ask for data
+        station_ids: id(s) for the weather station to ask for data
         parameter: observation measure
         time_resolution: frequency/granularity of measurement interval
         period_type: recent or historical files
@@ -38,30 +38,35 @@ def create_file_list_for_dwd_server(station_id: Union[str, int, List[int]],
         List of path's to file
 
     """
-    # Ensure integers
-    station_id = [int(s) for s in cast_to_list(station_id)]
+    # Check type of function parameters
+    station_ids = [int(statid) for statid in station_ids]
 
     # Check for the combination of requested parameters
-    check_parameters(parameter=parameter, time_resolution=time_resolution, period_type=period_type)
+    check_parameters(parameter=parameter,
+                     time_resolution=time_resolution,
+                     period_type=period_type)
 
-    # Create name of fileslist file
+    # Create name of fileslistfile
     filelist_local = f'{FILELIST_NAME}_{parameter.value}_' \
-                     f'{time_resolution.value}_{period_type.value}{DATA_FORMAT}'
+                     f'{time_resolution.value}_{period_type.value}'
 
     # Create filepath to filelist in folder
-    filelist_local_path = Path(folder, DWD_FOLDER_METADATA, filelist_local)
+    filelist_local_path = Path(folder,
+                               DWD_FOLDER_METADATA,
+                               filelist_local)
+
+    filelist_local_path = f"{filelist_local_path}{DATA_FORMAT}"
 
     if create_new_filelist or not Path(filelist_local_path).is_file():
-        create_fileindex(parameter, time_resolution, period_type, folder)
+        create_fileindex(parameter=parameter,
+                         time_resolution=time_resolution,
+                         period_type=period_type,
+                         folder=folder)
 
-    filelist = pd.read_csv(
-        filepath_or_buffer=filelist_local_path,
-        sep=",",
-        dtype={
-            DWDColumns.FILEID.value: int,
-            DWDColumns.STATION_ID.value: int,
-            DWDColumns.FILENAME.value: str
-        }
-    )
+    filelist = pd.read_csv(filepath_or_buffer=filelist_local_path,
+                           sep=",",
+                           dtype={DWDColumns.FILEID.value: int,
+                                  DWDColumns.STATION_ID.value: int,
+                                  DWDColumns.FILENAME.value: str})
 
-    return filelist.loc[filelist[DWDColumns.STATION_ID.value].isin(station_id), :]
+    return filelist.loc[filelist[DWDColumns.STATION_ID.value].isin(station_ids), :]
diff --git a/tests/parsing_data/test_parse_data_from_files.py b/tests/parsing_data/test_parse_data_from_files.py
@@ -1,52 +1,23 @@
-from io import StringIO
-from shutil import rmtree
+""" Tests for parse_dwd_data function """
+from typing import Union
+from io import StringIO, BytesIO
 from pathlib import Path
 import pandas as pd
 
-from python_dwd import parse_dwd_data
+from python_dwd.parsing_data.parse_data_from_files import parse_dwd_data
 
 fixtures_dir = Path(__file__, "../..").resolve().absolute() / "fixtures"
 
 
 def test_parse_dwd_data():
     filename = "tageswerte_KL_00001_19370101_19860630_hist.zip"
 
-    file = pd.read_json(fixtures_dir / "FIXED_STATIONDATA.JSON")
-    file_in_bytes = StringIO()
-    file.to_csv(file_in_bytes, sep=";")
+    station_data_original = pd.read_json(fixtures_dir / "FIXED_STATIONDATA.JSON")
+    file_in_bytes: Union[StringIO, BytesIO] = StringIO()
+    station_data_original.to_csv(file_in_bytes, sep=";")
     file_in_bytes.seek(0)
 
     station_data = parse_dwd_data(
         filenames_and_files=[(filename, file_in_bytes)])
 
-    stationdata_local = parse_dwd_data(
-        filenames_and_files=[(filename, file_in_bytes)],
-        prefer_local=True,
-        folder=Path(__file__).parent.absolute() / "dwd_data"
-    )
-
-    # 1. Compare freshly loaded data with data read from hdf and assure it's identical
-    assert stationdata_online.equals(stationdata_local)
-
-    # 2. Check functioning for filename only, that is used for parameter definition to parse data from local hdf file
-    assert stationdata_online.equals(
-        parse_dwd_data(
-            filenames_and_files=[filename],
-            prefer_local=True,
-            folder=Path(__file__).parent.absolute() / "dwd_data",
-            write_file=False
-        )
-    )
-
-    # 3. Check for only giving filename but no valid filepath
-    assert parse_dwd_data(
-        filenames_and_files=[filename],
-        prefer_local=True,
-        folder="wrong/folder/name",
-        write_file=False
-    ).empty
-
-    rmtree(Path(Path(__file__).parent.absolute() / "dwd_data"))
-
-    # To ensure files are deleted with the above execution
-    assert True
+    station_data.equals(station_data_original)
diff --git a/tests/test_data_collection.py b/tests/test_data_collection.py
@@ -4,55 +4,116 @@
 from pathlib import Path
 import pandas as pd
 from io import StringIO
+from shutil import rmtree
 
 from python_dwd.enumerations.column_names_enumeration import DWDColumns
 from python_dwd.enumerations.parameter_enumeration import Parameter
 from python_dwd.enumerations.time_resolution_enumeration import TimeResolution
 from python_dwd.enumerations.period_type_enumeration import PeriodType
 from python_dwd.data_collection import collect_dwd_data
 
+
 fixtures_dir = Path(__file__, "../").resolve().absolute() / "fixtures"
 
-# Setting parameters
+# Setting parameters for tests
 station_ids = [1]
 parameter = Parameter.CLIMATE_SUMMARY
 time_resolution = TimeResolution.DAILY
 period_type = PeriodType.HISTORICAL
-folder = ""
 parallel_download = False
-write_file = False
 create_new_filelist = False
 
 # Set filename for mock
 filename = "tageswerte_KL_00001_19370101_19860630_hist.zip"
 
 # Loading test data
 file = pd.read_json(fixtures_dir / "FIXED_STATIONDATA.JSON")
-file_in_bytes = StringIO()
-file.to_csv(file_in_bytes, sep=";")
-file_in_bytes.seek(0)
+
+# Prepare csv for regular "downloading" test
+csv_file = StringIO()
+file.to_csv(csv_file, sep=";")
+csv_file.seek(0)
 
 
 @patch(
-    "python_dwd.file_path_handling.file_list_creation.create_file_list_for_dwd_server",
+    "python_dwd.data_collection.create_file_list_for_dwd_server",
     mock.MagicMock(return_value=pd.DataFrame({DWDColumns.FILENAME.value: [filename]}))
 )
 @patch(
-    "python_dwd.download.download.download_dwd_data",
-    mock.MagicMock(return_value=(filename, file_in_bytes))
+    "python_dwd.data_collection.download_dwd_data",
+    mock.MagicMock(return_value=[(filename, csv_file)])
 )
-def test_collect_dwd_data_online():
-    # test for no local interaction
+def test_collect_dwd_data():
+    """ Test for data collection """
+
+    # Create folder for storage test
+    test_folder = Path(Path(__file__).parent.absolute() / "dwd_data")
+    test_folder.mkdir(parents=True, exist_ok=True)
+
+    """
+    1. Scenario
+    This scenario makes sure we take fresh data and write it to the given folder, thus we can run
+    just another test afterwards as no old data is used
+    """
     collect_dwd_data(
         station_ids=station_ids,
         parameter=parameter,
         time_resolution=time_resolution,
         period_type=period_type,
-        folder=folder,
+        folder=test_folder,
         prefer_local=False,
         parallel_download=parallel_download,
-        write_file=write_file,
+        write_file=True,
+        create_new_filelist=create_new_filelist
+    ).equals(file)
+
+    """
+    2. Scenario
+    This scenario tries to get the data from the given folder. This data was placed by the first test
+    and is now restored
+    """
+    collect_dwd_data(
+        station_ids=station_ids,
+        parameter=parameter,
+        time_resolution=time_resolution,
+        period_type=period_type,
+        folder=test_folder,
+        prefer_local=True,
+        parallel_download=parallel_download,
+        write_file=True,
         create_new_filelist=create_new_filelist
     ).equals(file)
 
-# @todo implement further tests
+    # Remove storage folder
+    rmtree(test_folder)
+
+    # Have to place an assert afterwards to ensure that above function is executed
+    assert True
+
+
+@patch(
+    "python_dwd.data_collection.restore_dwd_data",
+    mock.MagicMock(return_value=(False, pd.DataFrame()))
+)
+@patch(
+    "python_dwd.data_collection.create_file_list_for_dwd_server",
+    mock.MagicMock(return_value=pd.DataFrame(None, columns=[DWDColumns.FILENAME.value]))
+)
+def test_collect_dwd_data_empty():
+    """ Test for data collection with no available data """
+
+    """
+    1. Scenario
+    Test for request where no data is available
+    """
+    assert collect_dwd_data(
+        station_ids=station_ids,
+        parameter=parameter,
+        time_resolution=time_resolution,
+        period_type=period_type,
+        folder="",
+        prefer_local=True,
+        parallel_download=parallel_download,
+        write_file=False,
+        create_new_filelist=create_new_filelist
+    ).empty