diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ae8d44cc..45c240f9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,10 +12,11 @@ on: jobs: build: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: + os: ['windows-latest', 'macOS-latest', 'ubuntu-latest'] python-version: [3.7, 3.8, 3.9] steps: diff --git a/tests/utils.py b/tests/utils.py index dda7eb63..43079a31 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -4,8 +4,8 @@ import os import pytest +import logging import pandas as pd -import numpy as np from typing import Union, Dict @@ -29,6 +29,7 @@ def logging_file_path() -> str: yield logging_path # Cleanup after test if os.path.exists(logging_path): + logging.shutdown() os.remove(logging_path) diff --git a/tsflex/features/feature_collection.py b/tsflex/features/feature_collection.py index 39504173..0dd76023 100644 --- a/tsflex/features/feature_collection.py +++ b/tsflex/features/feature_collection.py @@ -319,6 +319,13 @@ def calculate( If n_jobs is either 0 or 1, the code will be executed sequentially without creating a process pool. This is very useful when debugging, as the stack trace will be more comprehensible. + .. note: + Multiprocessed execution is not supported on Windows. Even when, + `n_jobs` is set > 1, the feature extraction will still be executed + sequentially. + Why do we not support multiprocessing on Windows; see this issue + https://github.com/predict-idlab/tsflex/issues/51 + .. tip:: It takes on avg. _300ms_ to schedule everything with @@ -341,7 +348,7 @@ def calculate( delete_logging_handlers(logger) # Add logging handler (if path provided) if logging_file_path: - add_logging_handler(logger, logging_file_path) + f_handler = add_logging_handler(logger, logging_file_path) # Convert the data to a series_dict series_dict: Dict[str, pd.Series] = {} @@ -371,7 +378,9 @@ def calculate( ) nb_stroll_funcs = self._get_stroll_feat_length() - if n_jobs is None: + if os.name == "nt": # On Windows no multiprocessing is supported, see https://github.com/predict-idlab/tsflex/issues/51 + n_jobs = 1 + elif n_jobs is None: n_jobs = os.cpu_count() n_jobs = min(n_jobs, nb_stroll_funcs) @@ -388,7 +397,7 @@ def calculate( with Pool(processes=n_jobs) as pool: results = pool.imap_unordered(self._executor, range(nb_stroll_funcs)) if show_progress: - results = tqdm(results, total=self._get_stroll_feat_length()) + results = tqdm(results, total=nb_stroll_funcs) try: calculated_feature_list = [f for f in results] except: @@ -399,6 +408,11 @@ def calculate( pool.close() pool.join() + # Close the file handler (this avoids PermissionError: [WinError 32]) + if logging_file_path: + f_handler.close() + logger.removeHandler(f_handler) + if calculated_feature_list is None: raise RuntimeError( "Feature Extraction halted due to error while extracting one (or multiple) feature(s)! " diff --git a/tsflex/processing/series_pipeline.py b/tsflex/processing/series_pipeline.py index 26eb8e82..b6c15417 100644 --- a/tsflex/processing/series_pipeline.py +++ b/tsflex/processing/series_pipeline.py @@ -187,7 +187,7 @@ def process( delete_logging_handlers(logger) # Add logging handler (if path provided) if logging_file_path: - add_logging_handler(logger, logging_file_path) + f_handler = add_logging_handler(logger, logging_file_path) # Convert the data to a series_dict series_dict: Dict[str, pd.Series] = {} @@ -210,12 +210,21 @@ def process( output_keys.update(processed_dict.keys()) series_dict.update(processed_dict) except Exception as e: + # Close the file handler (this avoids PermissionError: [WinError 32]) + if logging_file_path: + f_handler.close() + logger.removeHandler(f_handler) raise _ProcessingError( "Error while processing function {}:\n {}".format( processor.name, str(e) ) ) from e + # Close the file handler (this avoids PermissionError: [WinError 32]) + if logging_file_path: + f_handler.close() + logger.removeHandler(f_handler) + if not return_all_series: # Return just the output series output_dict = {key: series_dict[str(key)] for key in output_keys} diff --git a/tsflex/utils/logging.py b/tsflex/utils/logging.py index 45456065..d3fa0326 100644 --- a/tsflex/utils/logging.py +++ b/tsflex/utils/logging.py @@ -1,6 +1,6 @@ """Utility functions for logging operations.""" -__author__ = 'Jeroen Van Der Donckt' +__author__ = "Jeroen Van Der Donckt" import logging import warnings @@ -22,7 +22,7 @@ def remove_inner_brackets(message: str) -> str: ------- str: A new message without any inner brackets. - + """ level = 0 new_message = "" @@ -48,7 +48,7 @@ def delete_logging_handlers(logger: logging.Logger): ---------- logger : logging.Logger The logger. - + """ if len(logger.handlers) > 1: logger.handlers = [ @@ -57,7 +57,9 @@ def delete_logging_handlers(logger: logging.Logger): assert len(logger.handlers) == 1, "Multiple logging StreamHandlers present!!" -def add_logging_handler(logger: logging.Logger, logging_file_path: Union[str, Path]): +def add_logging_handler( + logger: logging.Logger, logging_file_path: Union[str, Path] +) -> logging.FileHandler: """Add a logging file-handler to the logger. Parameters @@ -66,7 +68,12 @@ def add_logging_handler(logger: logging.Logger, logging_file_path: Union[str, Pa The logger. logging_file_path : Union[str, Path] The file path for the file handler. - + + Returns + ------- + logging.FileHandler + The file-handler that is added to the given logger. + """ if not isinstance(logging_file_path, Path): logging_file_path = Path(logging_file_path) @@ -81,12 +88,11 @@ def add_logging_handler(logger: logging.Logger, logging_file_path: Union[str, Pa open(logging_file_path, "w").close() f_handler = logging.FileHandler(logging_file_path, mode="w") f_handler.setFormatter( - logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - ) + logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") ) f_handler.setLevel(logging.INFO) logger.addHandler(f_handler) + return f_handler def logging_file_to_df(logging_file_path: str) -> pd.DataFrame: