In [42]:
import numpy as np
import pandas as pd
import matlab.engine
import yaml
import time
import importlib
import pathlib
import csv
from tqdm import tqdm
from loguru import logger
from functools import partial, wraps
from itertools import product
import os

In [18]:
def BF_zscore(input_data):
    """
    Z-score the input data vector.

    Parameters:
    input_data (array-like): The input time series (or any vector).

    Returns:
    numpy.ndarray: The z-scored transformation of the input.

    Raises:
    ValueError: If input_data contains NaN values.
    """
    # Convert input to numpy array
    input_data = np.array(input_data)

    # Check for NaNs
    if np.isnan(input_data).any():
        raise ValueError('input_data contains NaNs')

    # Z-score twice to reduce numerical error
    zscored_data = (input_data - np.mean(input_data)) / np.std(input_data, ddof=1)
    zscored_data = (zscored_data - np.mean(zscored_data)) / np.std(zscored_data, ddof=1)

    return zscored_data


In [40]:
eng = matlab.engine.start_matlab()

In [43]:
proj_root = pathlib.Path("/Users/jmoo2880/Documents/hctsa")
eng.addpath(eng.genpath(str(proj_root)), nargout=0)

In [59]:
def zscore_decorator(func):
    @wraps(func)
    def wrapper(y, *args, **kwargs):
        y = BF_zscore(y)
        return func(y, *args, **kwargs)
    return wrapper

def range_constructor(loader, node):
    start, end = loader.construct_sequence(node)
    return list(range(start, end+1))
yaml.add_constructor("!range", range_constructor)

In [82]:
def load_yaml(file):
    print(f"Loading configuration file: {file.split('/')[-1]}")
    funcs = {}
    with open(file) as f:
        yf = yaml.load(f, Loader=yaml.FullLoader)

    for module_name in yf:
        print(f"\n*** Importing module {module_name} *** \n")
        module = importlib.import_module(module_name)
        for function_name in yf[module_name]:
            # Get the function's configuration dictionary
            function_config = yf[module_name][function_name]
            # If no configs section exists or if it's empty, use a list with single empty dict
            if ('configs' not in function_config or function_config.get('configs') is None or 
                function_config.get('configs') == []):
                configs = [{}]
            else:
                configs = function_config.get('configs', [{}])

            for params in configs:
                # Handle the case where params is None
                if params is None:
                    params = {}
                    
                zscore_first = params.pop("zscore", False)
                param_keys, param_vals = zip(*params.items()) if params else ([], [])
                
                param_combinations = [dict(zip(param_keys, values)) 
                                   for values in product(*[v if isinstance(v, list) 
                                                        else [v] for v in param_vals])]
                
                # If no parameter combinations were generated, add empty dict
                if not param_combinations:
                    param_combinations = [{}]
                
                # create a function for each parameter combination
                for param_set in param_combinations:
                    feature_name = (f"{module_name}_{function_name}_" + 
                                  "_".join(f"{v}" for k, v in param_set.items())
                                  if param_set else f"{module_name}_{function_name}")
                    if not zscore_first:
                        feature_name += "_raw"
                    
                    print(f"Adding operation {feature_name} with params {param_set} "
                          f"(Z-score={zscore_first})")
                    
                    base_func = partial(getattr(module, function_name), **param_set)
                    if zscore_first:
                        base_func = zscore_decorator(base_func)
                    
                    # return the MATLAB callable corresponding to the python implementation for direct comparison
                    # make sure to check whethe the data needs to be zscored when calling the MATLAB func, cannot be wrapped as it is not a python function
                    # so needs to be done manually when calling the function.
                    hctsa_name = function_config.get('hctsa_name')
                    hctsa_callable = eval(f"eng.{hctsa_name}")

                    funcs[feature_name] = {'callable': base_func, 'params': param_set, 'hctsa_name': function_config.get('hctsa_name'), 'matlab_callable': hctsa_callable, 'isZscore': zscore_first}
                    
    return funcs

In [83]:
funcs = load_yaml("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/medical.yaml")

Loading configuration file: medical.yaml

*** Importing module Medical *** 

Adding operation Medical_HRV_Classic with params {} (Z-score=True)
Adding operation Medical_PNN_raw with params {} (Z-score=False)
Adding operation Medical_PolVar_1.0_3 with params {'d': 1.0, 'D': 3} (Z-score=True)
Adding operation Medical_PolVar_1.0_5 with params {'d': 1.0, 'D': 5} (Z-score=True)
Adding operation Medical_PolVar_1.0_4 with params {'d': 1.0, 'D': 4} (Z-score=True)
Adding operation Medical_PolVar_1.0_6 with params {'d': 1.0, 'D': 6} (Z-score=True)
Adding operation Medical_PolVar_0.5_3 with params {'d': 0.5, 'D': 3} (Z-score=True)
Adding operation Medical_PolVar_0.5_5 with params {'d': 0.5, 'D': 5} (Z-score=True)
Adding operation Medical_PolVar_0.5_4 with params {'d': 0.5, 'D': 4} (Z-score=True)
Adding operation Medical_PolVar_0.5_6 with params {'d': 0.5, 'D': 6} (Z-score=True)
Adding operation Medical_PolVar_0.1_3 with params {'d': 0.1, 'D': 3} (Z-score=True)
Adding operation Medical_PolVar_0.1_

In [84]:
funcs['Medical_HRV_Classic']

{'callable': <function functools.zscore_decorator.<locals>.wrapper(y: Union[list, numpy.ndarray]) -> dict>,
 'params': {},
 'hctsa_name': 'MD_hrv_classic',
 'matlab_callable': <matlab.engine.matlabengine.MatlabFunc at 0x1268c5c70>,
 'isZscore': True}

In [73]:
funcs['Medical_HRV_Classic']['hctsa_callable'](BF_zscore(matlab.double(empirical1000[2])))

{'pnn5': 0.9980998099809981,
 'pnn10': 0.9956995699569957,
 'pnn20': 0.9903990399039904,
 'pnn30': 0.9850985098509851,
 'pnn40': 0.9807980798079808,
 'lfhf': 0.44778742017875434,
 'vlf': 1.0800176439782538,
 'lf': 3.619672975964347,
 'hf': 8.083462850562869,
 'tri': 4.697040864255519,
 'SD1': 1000.0955197791459,
 'SD2': 999.9044710959541}

In [74]:
funcs['Medical_HRV_Classic']['callable']((empirical1000[2]))

{'pnn5': np.float64(0.9980998099809981),
 'pnn10': np.float64(0.9956995699569957),
 'pnn20': np.float64(0.9903990399039904),
 'pnn30': np.float64(0.9850985098509851),
 'pnn40': np.float64(0.9807980798079808),
 'lfhf': np.float64(0.44778742017875445),
 'vlf': np.float64(1.0800176439782536),
 'lf': np.float64(3.619672975964347),
 'hf': np.float64(8.083462850562869),
 'tri': np.float64(5.099439061703213),
 'SD1': np.float64(1000.095519779147),
 'SD2': np.float64(999.9044710959529)}

In [75]:
funcs

{'Medical_HRV_Classic': {'callable': <function functools.zscore_decorator.<locals>.wrapper(y: Union[list, numpy.ndarray]) -> dict>,
  'params': {},
  'hctsa_name': 'MD_hrv_classic',
  'hctsa_callable': <matlab.engine.matlabengine.MatlabFunc at 0x144283e30>},
 'Medical_PNN_raw': {'callable': functools.partial(<function PNN at 0x1265f63e0>),
  'params': {},
  'hctsa_name': 'MD_pNN',
  'hctsa_callable': <matlab.engine.matlabengine.MatlabFunc at 0x144280860>},
 'Medical_PolVar_1.0_3': {'callable': <function functools.zscore_decorator.<locals>.wrapper(x: Union[list, numpy.ndarray], *, d: float = 1.0, D: int = 3) -> float>,
  'params': {'d': 1.0, 'D': 3},
  'hctsa_name': 'MD_polvar',
  'hctsa_callable': <matlab.engine.matlabengine.MatlabFunc at 0x144283ef0>},
 'Medical_PolVar_1.0_5': {'callable': <function functools.zscore_decorator.<locals>.wrapper(x: Union[list, numpy.ndarray], *, d: float = 1.0, D: int = 5) -> float>,
  'params': {'d': 1.0, 'D': 5},
  'hctsa_name': 'MD_polvar',
  'hctsa_c

In [89]:
def validate(yaml):
    func_dict = load_yaml(yaml)
    f = func_dict['Medical_HRV_Classic']
    python_func = f['callable']
    matlab_func = f['matlab_callable']
    hctsa_name = f['hctsa_name']
    isZscore = f['isZscore']
    print(f"Comparing to {hctsa_name}")
    data = empirical1000[2]
    matlab_eval = matlab_func(BF_zscore(data)) if isZscore else matlab_func(data)
    python_eval = python_func(data)
    return {'matlab': matlab_eval, 'python': python_eval}

In [90]:
out = validate("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/medical.yaml")

Loading configuration file: medical.yaml

*** Importing module Medical *** 

Adding operation Medical_HRV_Classic with params {} (Z-score=True)
Adding operation Medical_PNN_raw with params {} (Z-score=False)
Adding operation Medical_PolVar_1.0_3 with params {'d': 1.0, 'D': 3} (Z-score=True)
Adding operation Medical_PolVar_1.0_5 with params {'d': 1.0, 'D': 5} (Z-score=True)
Adding operation Medical_PolVar_1.0_4 with params {'d': 1.0, 'D': 4} (Z-score=True)
Adding operation Medical_PolVar_1.0_6 with params {'d': 1.0, 'D': 6} (Z-score=True)
Adding operation Medical_PolVar_0.5_3 with params {'d': 0.5, 'D': 3} (Z-score=True)
Adding operation Medical_PolVar_0.5_5 with params {'d': 0.5, 'D': 5} (Z-score=True)
Adding operation Medical_PolVar_0.5_4 with params {'d': 0.5, 'D': 4} (Z-score=True)
Adding operation Medical_PolVar_0.5_6 with params {'d': 0.5, 'D': 6} (Z-score=True)
Adding operation Medical_PolVar_0.1_3 with params {'d': 0.1, 'D': 3} (Z-score=True)
Adding operation Medical_PolVar_0.1_

In [91]:
out

{'matlab': {'pnn5': 0.9980998099809981,
  'pnn10': 0.9956995699569957,
  'pnn20': 0.9903990399039904,
  'pnn30': 0.9850985098509851,
  'pnn40': 0.9807980798079808,
  'lfhf': 0.44778742017875434,
  'vlf': 1.0800176439782538,
  'lf': 3.619672975964347,
  'hf': 8.083462850562869,
  'tri': 4.697040864255519,
  'SD1': 1000.0955197791459,
  'SD2': 999.9044710959541},
 'python': {'pnn5': np.float64(0.9980998099809981),
  'pnn10': np.float64(0.9956995699569957),
  'pnn20': np.float64(0.9903990399039904),
  'pnn30': np.float64(0.9850985098509851),
  'pnn40': np.float64(0.9807980798079808),
  'lfhf': np.float64(0.44778742017875445),
  'vlf': np.float64(1.0800176439782536),
  'lf': np.float64(3.619672975964347),
  'hf': np.float64(8.083462850562869),
  'tri': np.float64(5.099439061703213),
  'SD1': np.float64(1000.095519779147),
  'SD2': np.float64(999.9044710959529)}}

In [44]:
funcs['Medical_PolVar_0.5_4']['hctsa_name']

'MD_polvar'

In [46]:
mlab_func = eval(f"eng.{funcs['Medical_PolVar_0.5_4']['hctsa_name']}")

In [50]:
mlab_func(matlab.double(empirical1000[1]), matlab.double(1), matlab.double(3))

0.33323332333233324

In [None]:
eng.MD_polvar(matlab.double(empirical1000[1]), matlab.double(1), matlab.double(3))

In [12]:
empirical1000 = []
with open("../../../empirical1000/hctsa_timeseries-data.csv") as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    for row in reader:
        # Convert each element from string to float (or int if appropriate)
        try:
            time_series = [float(value) for value in row if value != '']
            empirical1000.append(time_series)
        except ValueError as e:
            print(f"Skipping row due to conversion error: {row}")
            continue

In [22]:
computed = {}
feature_count = 0
tstart = time.perf_counter()
for (feature, func) in zip(funcs.keys(), funcs.values()):
    computed[feature] = func(empirical1000[2])
    if (isinstance(computed[feature], float) or isinstance(computed[feature], int)):
        feature_count += 1
    else:
        feature_count += len(computed[feature]) 
telapsed = time.perf_counter() - tstart
print(f"Computed {len(computed)} operations ({feature_count} features)")
print(f"Time taken: {telapsed} seconds")

Computed 15 operations (40 features)
Time taken: 0.19373850000556558 seconds


In [29]:
funcs['Medical_PolVar_1.0_5']

<function functools.zscore_decorator.<locals>.wrapper(x: Union[list, numpy.ndarray], *, d: float = 1.0, D: int = 5) -> float>

In [23]:
computed

{'Medical_HRV_Classic': {'pnn5': np.float64(0.9980998099809981),
  'pnn10': np.float64(0.9956995699569957),
  'pnn20': np.float64(0.9903990399039904),
  'pnn30': np.float64(0.9850985098509851),
  'pnn40': np.float64(0.9807980798079808),
  'lfhf': np.float64(0.44778742017875445),
  'vlf': np.float64(1.0800176439782536),
  'lf': np.float64(3.619672975964347),
  'hf': np.float64(8.083462850562869),
  'tri': np.float64(5.099439061703213),
  'SD1': np.float64(1000.095519779147),
  'SD2': np.float64(999.9044710959529)},
 'Medical_PNN_raw': {'pnn5': np.float64(0.9858985898589859),
  'pnn10': np.float64(0.9693969396939695),
  'pnn20': np.float64(0.9387938793879388),
  'pnn30': np.float64(0.9091909190919092),
  'pnn40': np.float64(0.8778877887788779),
  'pnn50': np.float64(0.8471847184718472),
  'pnn60': np.float64(0.815981598159816),
  'pnn70': np.float64(0.784978497849785),
  'pnn80': np.float64(0.756975697569757),
  'pnn90': np.float64(0.7267726772677268),
  'pnn100': np.float64(0.6972697269