In [1]:
import numpy as np
import pandas as pd
import matlab.engine
import yaml
import time
import importlib
import pathlib
import matplotlib.pyplot as plt
import csv
from tqdm import tqdm
from scipy.stats import pearsonr
from loguru import logger
from functools import partial, wraps
from itertools import product
import os
from numbers import Number

In [2]:
def BF_zscore(input_data):
    """
    Z-score the input data vector.

    Parameters:
    input_data (array-like): The input time series (or any vector).

    Returns:
    numpy.ndarray: The z-scored transformation of the input.

    Raises:
    ValueError: If input_data contains NaN values.
    """
    # Convert input to numpy array
    input_data = np.array(input_data)

    # Check for NaNs
    if np.isnan(input_data).any():
        raise ValueError('input_data contains NaNs')

    # Z-score twice to reduce numerical error
    zscored_data = (input_data - np.mean(input_data)) / np.std(input_data, ddof=1)
    zscored_data = (zscored_data - np.mean(zscored_data)) / np.std(zscored_data, ddof=1)

    return zscored_data


In [3]:
eng = matlab.engine.start_matlab()

In [4]:
proj_root = pathlib.Path("/Users/jmoo2880/Documents/hctsa")
eng.addpath(eng.genpath(str(proj_root)), nargout=0)

In [5]:
def zscore_decorator(func):
    @wraps(func)
    def wrapper(y, *args, **kwargs):
        y = BF_zscore(y)
        return func(y, *args, **kwargs)
    return wrapper

def range_constructor(loader, node):
    start, end = loader.construct_sequence(node)
    return list(range(start, end+1))
yaml.add_constructor("!range", range_constructor)

In [6]:
def load_yaml(file):
    print(f"Loading configuration file: {file.split('/')[-1]}")
    funcs = {}
    with open(file) as f:
        yf = yaml.load(f, Loader=yaml.FullLoader)

    for module_name in yf:
        print(f"\n*** Importing module {module_name} *** \n")
        module = importlib.import_module(module_name)
        for function_name in yf[module_name]:
            # Get the function's configuration dictionary
            function_config = yf[module_name][function_name]
            # If no configs section exists or if it's empty, use a list with single empty dict
            if ('configs' not in function_config or function_config.get('configs') is None or 
                function_config.get('configs') == []):
                configs = [{}]
            else:
                configs = function_config.get('configs', [{}])

            for params in configs:
                # Handle the case where params is None
                if params is None:
                    params = {}
                    
                zscore_first = params.pop("zscore", False)
                param_keys, param_vals = zip(*params.items()) if params else ([], [])
                
                param_combinations = [dict(zip(param_keys, values)) 
                                   for values in product(*[v if isinstance(v, list) 
                                                        else [v] for v in param_vals])]
                
                # If no parameter combinations were generated, add empty dict
                if not param_combinations:
                    param_combinations = [{}]
                
                # create a function for each parameter combination
                for param_set in param_combinations:
                    feature_name = (f"{module_name}_{function_name}_" + 
                                  "_".join(f"{v}" for k, v in param_set.items())
                                  if param_set else f"{module_name}_{function_name}")
                    if not zscore_first:
                        feature_name += "_raw"
                    
                    print(f"Adding operation {feature_name} with params {param_set} "
                          f"(Z-score={zscore_first})")
                    
                    base_func = partial(getattr(module, function_name), **param_set)
                    if zscore_first:
                        base_func = zscore_decorator(base_func)
                    
                    # return the MATLAB callable corresponding to the python implementation for direct comparison
                    # make sure to check whethe the data needs to be zscored when calling the MATLAB func, cannot be wrapped as it is not a python function
                    # so needs to be done manually when calling the function.
                    hctsa_name = function_config.get('hctsa_name')
                    hctsa_callable = eval(f"eng.{hctsa_name}")

                    # keep ordered args only for testing YAML otherwise bloats
                    funcs[feature_name] = {'callable': base_func, 'params': param_set, 'hctsa_name': function_config.get('hctsa_name'), 
                                           'matlab_callable': hctsa_callable, 'isZscore': zscore_first, 'ordered_args': function_config.get('ordered_args')}
                    
    return funcs

In [7]:
funcs = load_yaml("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/medical.yaml")

Loading configuration file: medical.yaml

*** Importing module Medical *** 

Adding operation Medical_HRV_Classic with params {} (Z-score=True)
Adding operation Medical_PNN_raw with params {} (Z-score=False)
Adding operation Medical_PolVar_1.0_3 with params {'d': 1.0, 'D': 3} (Z-score=True)
Adding operation Medical_PolVar_1.0_5 with params {'d': 1.0, 'D': 5} (Z-score=True)
Adding operation Medical_PolVar_1.0_4 with params {'d': 1.0, 'D': 4} (Z-score=True)
Adding operation Medical_PolVar_1.0_6 with params {'d': 1.0, 'D': 6} (Z-score=True)
Adding operation Medical_PolVar_0.5_3 with params {'d': 0.5, 'D': 3} (Z-score=True)
Adding operation Medical_PolVar_0.5_5 with params {'d': 0.5, 'D': 5} (Z-score=True)
Adding operation Medical_PolVar_0.5_4 with params {'d': 0.5, 'D': 4} (Z-score=True)
Adding operation Medical_PolVar_0.5_6 with params {'d': 0.5, 'D': 6} (Z-score=True)
Adding operation Medical_PolVar_0.1_3 with params {'d': 0.1, 'D': 3} (Z-score=True)
Adding operation Medical_PolVar_0.1_

In [8]:
empirical1000 = []
with open("../../../empirical1000/hctsa_timeseries-data.csv") as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    for row in reader:
        # Convert each element from string to float (or int if appropriate)
        try:
            time_series = [float(value) for value in row if value != '']
            empirical1000.append(time_series)
        except ValueError as e:
            print(f"Skipping row due to conversion error: {row}")
            continue

In [9]:
funcs['Medical_PNN_raw']

{'callable': functools.partial(<function PNN at 0x169302020>),
 'params': {},
 'hctsa_name': 'MD_pNN',
 'matlab_callable': <matlab.engine.matlabengine.MatlabFunc at 0x1692647d0>,
 'isZscore': False,
 'ordered_args': None}

In [10]:
if not funcs['Medical_PNN_raw']['params']:
    print("empty")

empty


In [11]:
param_order = funcs['Medical_PolVar_0.1_3']['ordered_args']
pms = funcs['Medical_PolVar_0.1_3']['params']
ordered_args = [pms[k] for k in param_order]

In [12]:
def eval_comparison(yaml, data):
    func_dict = load_yaml(yaml)
    func_res = dict()
    for func in func_dict:
        print(f"Evalutating {func}")
        f = func_dict[func]
        python_func = f['callable']
        matlab_func = f['matlab_callable']
        hctsa_name = f['hctsa_name']
        isZscore = f['isZscore']
        params = f['params']
        ordered_args = []
        if params:
            order = f['ordered_args']
            ordered_args = [params[k] for k in order]
            
        print(f"Comparing to {hctsa_name}")
        res = dict()
        for i in range(len(data)):
            x = data[i]
            matlab_eval = matlab_func(BF_zscore(matlab.double(x)), *ordered_args) if isZscore else matlab_func(matlab.double(x), *ordered_args)
            python_eval = python_func(x)
            res[i] = {'matlab': matlab_eval, 'python': python_eval}
        func_res[func] = res
    return func_res

In [13]:
out = eval_comparison("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/medical.yaml", empirical1000)

Loading configuration file: medical.yaml

*** Importing module Medical *** 

Adding operation Medical_HRV_Classic with params {} (Z-score=True)
Adding operation Medical_PNN_raw with params {} (Z-score=False)
Adding operation Medical_PolVar_1.0_3 with params {'d': 1.0, 'D': 3} (Z-score=True)
Adding operation Medical_PolVar_1.0_5 with params {'d': 1.0, 'D': 5} (Z-score=True)
Adding operation Medical_PolVar_1.0_4 with params {'d': 1.0, 'D': 4} (Z-score=True)
Adding operation Medical_PolVar_1.0_6 with params {'d': 1.0, 'D': 6} (Z-score=True)
Adding operation Medical_PolVar_0.5_3 with params {'d': 0.5, 'D': 3} (Z-score=True)
Adding operation Medical_PolVar_0.5_5 with params {'d': 0.5, 'D': 5} (Z-score=True)
Adding operation Medical_PolVar_0.5_4 with params {'d': 0.5, 'D': 4} (Z-score=True)
Adding operation Medical_PolVar_0.5_6 with params {'d': 0.5, 'D': 6} (Z-score=True)
Adding operation Medical_PolVar_0.1_3 with params {'d': 0.1, 'D': 3} (Z-score=True)
Adding operation Medical_PolVar_0.1_

In [14]:
def nan_aware_relative_error(a, b):
    a = np.asarray(a, dtype=float)
    b = np.asarray(b, dtype=float)

    # Masks
    both_nan = np.isnan(a) & np.isnan(b)
    one_nan = np.isnan(a) ^ np.isnan(b)
    neither_nan = ~np.isnan(a) & ~np.isnan(b)

    rel_error = np.empty_like(a, dtype=float)
    
    # Handle NaNs
    rel_error[both_nan] = 0.0
    rel_error[one_nan] = np.inf

    # Avoid division by zero
    safe_a = np.where(np.abs(a) < 1e-12, np.nan, a)  # define "zero" threshold if needed

    # Compute relative error where defined
    rel_error[neither_nan] = np.abs(a[neither_nan] - b[neither_nan]) / np.abs(safe_a[neither_nan])

    # Where a is zero, fall back to using b if possible
    fallback = (neither_nan) & (np.abs(a) < 1e-12) & (np.abs(b) > 1e-12)
    rel_error[fallback] = np.abs(a[fallback] - b[fallback]) / np.abs(b[fallback])

    # Still zero in both a and b → no error
    both_zero = (a == 0) & (b == 0)
    rel_error[both_zero] = 0.0

    return rel_error

In [15]:
# def compare_outputs(outputs):
#     # needs to handle both scalars and dicts
#     scalar_feature_correlations = dict()
#     for fout in outputs:
#         scalar_store_ml = []
#         scalar_store_py = []
#         for ts in outputs[fout]:
#             matlab_output = outputs[fout][ts]['matlab']
#             python_output = outputs[fout][ts]['python']
#             if isinstance(matlab_output, float):
#                     rel_err = (np.abs(matlab_output - python_output))/(matlab_output) * 100 # as percentage
#                     print(f"[{fout}] | Relative error (%): {rel_err}")
#                     scalar_store_ml.append(matlab_output)
#                     scalar_store_py.append(python_output)
#             elif isinstance(matlab_output, dict):
#                 matlab_arr = np.array(list(matlab_output.values()))
#                 python_arr = np.array(list(python_output.values()))
#                 # check that the standard deviation is non-zero before computing correlation
#                 corr, corr_pval = None, None
#                 if np.std(matlab_arr) == 0 or np.std(python_arr) == 0:
#                     corr = np.nan
#                     corr_pval = np.nan
#                 else:
#                     pear = pearsonr(matlab_arr, python_arr)
#                     corr = pear.statistic
#                     corr_pval = pear.pvalue
                    
#                 rel_err = nan_aware_relative_error(python_arr, matlab_arr) * 100 # as percentage
#                 print(f"[{fout}, ts: {ts}] Pearson Correlation: {corr}, pvalue: {corr_pval} | max rel err: {np.max(rel_err)} ({np.array(list(matlab_output.keys()))[np.argmax(rel_err)]})")

#             else:
#                 raise(ValueError("Expected either a dict or float. Got an unexpected datatype."))
#         if scalar_store_py:
#             pear = pearsonr(scalar_store_ml, scalar_store_py)
#             scalar_feature_correlations[fout] = {'r': pear.statistic, 'pval': pear.pvalue}
    
#     return scalar_feature_correlations


In [15]:
def compare_outputs(outputs):
    """
    outputs:
      { feature_name:
          { ts_id:
              { 'matlab': scalar_or_dict,
                'python': scalar_or_dict }
          }
      }
    Returns:
      { flattened_feature_name: { 'r': ..., 'pval': ... } }
    """
    # 1) First pass: flatten dict‐features into scalar slots
    flat = {}  # { "feat" or "feat.subkey" : { ts: (ml, py) } }
    for feat, ts_dict in outputs.items():
        for ts, run in ts_dict.items():
            ml = run['matlab']
            py = run['python']
            if isinstance(ml, dict) and isinstance(py, dict):
                # assume same keys
                for k, mlv in ml.items():
                    pyv = py[k]
                    slot = f"{feat}.{k}"
                    flat.setdefault(slot, {})[ts] = (mlv, pyv)
            elif isinstance(ml, Number) and isinstance(py, Number):
                flat.setdefault(feat, {})[ts] = (ml, py)
            else:
                raise ValueError(f"Feature {feat}@{ts} is neither both scalars nor both dicts.")

    # 2) Second pass: compute per‐feature correlation & print rel-errors
    results = {}
    for slot, tsmap in flat.items():
        ml_vals, py_vals = [], []
        for ts, (mlv, pyv) in tsmap.items():
            ml_vals.append(mlv)
            py_vals.append(pyv)
            # relative error for each ts
            if mlv == 0:
                rel_err = np.nan
            else:
                rel_err = abs(mlv - pyv) / abs(mlv) * 100
            print(f"[{slot} | ts={ts}]  RelErr% = {rel_err:.2f}")

        ml_arr = np.array(ml_vals, dtype=float)
        py_arr = np.array(py_vals, dtype=float)
        # only compute corr if there’s variance
        if len(ml_arr) > 1 and ml_arr.std() and py_arr.std():
            r, p = pearsonr(ml_arr, py_arr)
        else:
            r, p = np.nan, np.nan

        results[slot] = {'r': r, 'pval': p}

    return results

In [16]:
def binpicker(xmin, xmax, nbins):
    """
    Choose histogram bins. 
    A 1:1 port of the internal MATLAB function.


    Parameters:
    -----------
    xmin : float
        Minimum value of the data range.
    xmax : float
        Maximum value of the data range.
    nbins : int or None
        Number of bins. If None, an automatic rule is used.

    Returns:
    --------
    edges : numpy.ndarray
        Array of bin edges.
    """

    rawBinWidth = abs(xmax - xmin)/nbins

    if xmin is not None:
        if not np.issubdtype(type(xmin), np.floating):
            raise ValueError("Input must be float type when number of bins is specified.")

        xscale = max(abs(xmin), abs(xmax))
        xrange = xmax - xmin

        # Make sure the bin width is not effectively zero
        rawBinWidth = max(rawBinWidth, np.spacing(xscale))

        # If the data are not constant, place the bins at "nice" locations
        if xrange > max(np.sqrt(np.spacing(xscale)), np.finfo(xscale).tiny):
            # Choose the bin width as a "nice" value
            pow_of_ten = 10 ** np.floor(np.log10(rawBinWidth))
            rel_size = rawBinWidth / pow_of_ten  # guaranteed in [1, 10)

            # Automatic rule specified
            if nbins is None:
                if rel_size < 1.5:
                    bin_width = 1 * pow_of_ten
                elif rel_size < 2.5:
                    bin_width = 2 * pow_of_ten
                elif rel_size < 4:
                    bin_width = 3 * pow_of_ten
                elif rel_size < 7.5:
                    bin_width = 5 * pow_of_ten
                else:
                    bin_width = 10 * pow_of_ten

                left_edge = max(min(bin_width * np.floor(xmin / bin_width), xmin), -np.finfo(xmax).max)
                nbins_actual = max(1, np.ceil((xmax - left_edge) / bin_width))
                right_edge = min(max(left_edge + nbins_actual * bin_width, xmax), np.finfo(xmax).max)

            # Number of bins specified
            else:
                bin_width = pow_of_ten * np.floor(rel_size)
                left_edge = max(min(bin_width * np.floor(xmin / bin_width), xmin), -np.finfo(xmin).max)
                if nbins > 1:
                    ll = (xmax - left_edge) / nbins
                    ul = (xmax - left_edge) / (nbins - 1)
                    p10 = 10 ** np.floor(np.log10(ul - ll))
                    bin_width = p10 * np.ceil(ll / p10)

                nbins_actual = nbins
                right_edge = min(max(left_edge + nbins_actual * bin_width, xmax), np.finfo(xmax).max)

        else:  # the data are nearly constant
            if nbins is None:
                nbins = 1

            bin_range = max(1, np.ceil(nbins * np.spacing(xscale)))
            left_edge = np.floor(2 * (xmin - bin_range / 4)) / 2
            right_edge = np.ceil(2 * (xmax + bin_range / 4)) / 2

            bin_width = (right_edge - left_edge) / nbins
            nbins_actual = nbins

        if not np.isfinite(bin_width):
            edges = np.linspace(left_edge, right_edge, nbins_actual + 1)
        else:
            edges = np.concatenate([
                [left_edge],
                left_edge + np.arange(1, nbins_actual) * bin_width,
                [right_edge]
            ])
    else:
        # empty input
        if nbins is not None:
            edges = np.arange(nbins + 1, dtype=float)
        else:
            edges = np.array([0.0, 1.0])

    return edges

In [18]:
ma = np.max(empirical1000[427])
mi = np.min(empirical1000[427])

In [19]:
est = (ma - mi) / 10 

In [17]:
results = compare_outputs(out)

[Medical_HRV_Classic.pnn5 | ts=0]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=1]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=2]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=3]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=4]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=5]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=6]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=7]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=8]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=9]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=10]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=11]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=12]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=13]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=14]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=15]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=16]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=17]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=18]  RelErr% = 0.00
[Medical_HRV_Classic.pnn5 | ts=19]  RelEr

In [18]:
results

{'Medical_HRV_Classic.pnn5': {'r': np.float64(1.0), 'pval': np.float64(0.0)},
 'Medical_HRV_Classic.pnn10': {'r': np.float64(1.0), 'pval': np.float64(0.0)},
 'Medical_HRV_Classic.pnn20': {'r': np.float64(0.9999999999999999),
  'pval': np.float64(0.0)},
 'Medical_HRV_Classic.pnn30': {'r': np.float64(1.0), 'pval': np.float64(0.0)},
 'Medical_HRV_Classic.pnn40': {'r': np.float64(1.0), 'pval': np.float64(0.0)},
 'Medical_HRV_Classic.lfhf': {'r': np.float64(1.0), 'pval': np.float64(0.0)},
 'Medical_HRV_Classic.vlf': {'r': np.float64(1.0), 'pval': np.float64(0.0)},
 'Medical_HRV_Classic.lf': {'r': np.float64(1.0), 'pval': np.float64(0.0)},
 'Medical_HRV_Classic.hf': {'r': np.float64(1.0), 'pval': np.float64(0.0)},
 'Medical_HRV_Classic.tri': {'r': np.float64(1.0), 'pval': np.float64(0.0)},
 'Medical_HRV_Classic.SD1': {'r': np.float64(1.0), 'pval': np.float64(0.0)},
 'Medical_HRV_Classic.SD2': {'r': np.float64(1.0), 'pval': np.float64(0.0)},
 'Medical_PNN_raw.pnn5': {'r': np.float64(0.9999999

In [23]:
def histc(x, bins):
    map_to_bins = np.digitize(x, bins) # Get indices of the bins to which each value in input array belongs.
    res = np.zeros(bins.shape)
    for el in map_to_bins:
        res[el-1] += 1 # Increment appropriate bin.
    return res

In [71]:
histc(np.array(empirical1000[425]), np.linspace(0, 1, 20))

array([   0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
          0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
          0., 2369.])

In [100]:
lower = np.ceil(np.min(empirical1000[425]) * 10)/10 - 0.1
upper = np.floor(np.max(empirical1000[425]) * 10)/10 + 0.1

In [110]:
histc(np.array(empirical1000[425]), np.linspace(lower, upper, 10))

array([2.124e+03, 2.190e+02, 2.300e+01, 2.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00])

In [30]:
from utilities import binpicker, histc
import math

In [43]:
nbins = int(np.ceil(np.sqrt(len(empirical1000[425]))))
N = len(empirical1000[425])

In [None]:
binpicker()

In [53]:
nbins0 = max(math.ceil(math.sqrt(N)), 1)
x = np.array(empirical1000[425])
rawW = (x.max() - x.min()) / nbins0
rawW

np.float64(34.80816326530612)

In [56]:
edges = binpicker(x.min(), x.max(), None)

TypeError: unsupported operand type(s) for /: 'float' and 'NoneType'

In [39]:
binpicker(np.array(empirical1000[425]).min(), np.array(empirical1000[425]).max(), 12)

array([   0.,  150.,  300.,  450.,  600.,  750.,  900., 1050., 1200.,
       1350., 1500., 1650., 1800.])

In [55]:
np.linspace(lower, upper, 10)

NameError: name 'lower' is not defined

In [50]:
np.min(empirical1000[119])

np.float64(-3.1083)

In [44]:
empirical1000[119]

[0.76227,
 -0.55059,
 -1.1573,
 -2.2658,
 0.47299,
 1.382,
 1.8027,
 -0.1421,
 -0.79025,
 -1.4076,
 -0.69409,
 0.81296,
 2.0334,
 0.5888,
 -0.61614,
 -0.8218,
 -1.9738,
 0.030968,
 1.1243,
 2.6362,
 -0.24128,
 -0.45993,
 -0.85231,
 -2.3977,
 -0.75927,
 0.55005,
 0.62124,
 1.6137,
 1.2145,
 -0.32395,
 -1.072,
 -2.3998,
 0.33879,
 1.0263,
 2.2207,
 0.25107,
 -0.71241,
 -1.0224,
 -1.7571,
 0.46858,
 1.6997,
 1.4209,
 -0.44947,
 -0.6381,
 -1.4585,
 -1.0078,
 0.40252,
 1.6105,
 2.1872,
 -0.40821,
 -0.59828,
 -1.6298,
 -2.7498,
 0.40843,
 0.45921,
 0.87882,
 2.462,
 0.67357,
 -0.54133,
 -0.61655,
 -1.6255,
 -1.3204,
 0.31978,
 0.95862,
 2.0772,
 -0.17611,
 -1.0956,
 -2.0197,
 -0.13859,
 0.76309,
 1.218,
 1.2388,
 -0.66825,
 -1.9431,
 -0.86242,
 0.56015,
 0.81118,
 1.951,
 -0.032876,
 -1.1705,
 -2.2373,
 0.11327,
 0.53047,
 0.89429,
 2.1449,
 0.39765,
 -0.7215,
 -1.2198,
 -1.6381,
 0.58896,
 1.795,
 1.1405,
 -0.43287,
 -0.87314,
 -1.9548,
 0.091879,
 1.2052,
 1.8813,
 0.019876,
 -0.70827,
 -1

In [36]:
np.array(empirical1000[1]).shape

(10000,)

In [30]:
np.histogram(np.array(empirical1000[0]), bins=np.linspace(0, 1, 19))

(array([1606, 1380, 1284, 1105,  889,  831,  673,  602,  448,  351,  265,
         193,  153,  109,   73,   27,   11,    0]),
 array([0.        , 0.05555556, 0.11111111, 0.16666667, 0.22222222,
        0.27777778, 0.33333333, 0.38888889, 0.44444444, 0.5       ,
        0.55555556, 0.61111111, 0.66666667, 0.72222222, 0.77777778,
        0.83333333, 0.88888889, 0.94444444, 1.        ]))

In [281]:
np.max(np.histogram(empirical1000[0], 10))

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

In [157]:
type(out['Medical_PolVar_0.1_5'][0]['matlab'])

float

In [273]:
compare_outputs(out)

[Medical_HRV_Classic, ts: 0] Pearson Correlation: 0.9999999991747857, pvalue: 3.0135915986735436e-45 | max rel err: 1.418998817500987 (tri)
[Medical_HRV_Classic, ts: 1] Pearson Correlation: 0.9999999952409542, pvalue: 1.9224275952637545e-41 | max rel err: 3.1428571428571326 (tri)
[Medical_HRV_Classic, ts: 2] Pearson Correlation: 0.9999999560542322, pvalue: 1.2907312341591382e-36 | max rel err: 7.891028651949271 (tri)
[Medical_HRV_Classic, ts: 3] Pearson Correlation: 0.9999999997552321, pvalue: 6.918654500991682e-48 | max rel err: 0.5494505494505526 (tri)
[Medical_HRV_Classic, ts: 4] Pearson Correlation: 0.9999999959772019, pvalue: 8.296439266866039e-42 | max rel err: 2.6666666666666665 (tri)
[Medical_HRV_Classic, ts: 5] Pearson Correlation: 0.9999999715182349, pvalue: 1.475995494389196e-37 | max rel err: 6.51162790697673 (tri)
[Medical_HRV_Classic, ts: 6] Pearson Correlation: 0.9999999998413515, pvalue: 7.914663655272445e-49 | max rel err: 1.6894844159627078 (tri)
[Medical_HRV_Classic,

  pear = pearsonr(matlab_arr, python_arr)
  rel_err = (np.abs(matlab_output - python_output))/(matlab_output) * 100 # as percentage


{'Medical_PolVar_1.0_3': {'r': np.float64(0.9999670325440647),
  'pval': np.float64(0.0)},
 'Medical_PolVar_1.0_5': {'r': np.float64(0.9999749739337168),
  'pval': np.float64(0.0)},
 'Medical_PolVar_1.0_4': {'r': np.float64(0.9999664005416269),
  'pval': np.float64(0.0)},
 'Medical_PolVar_1.0_6': {'r': np.float64(0.9999764505261076),
  'pval': np.float64(0.0)},
 'Medical_PolVar_0.5_3': {'r': np.float64(0.9999652296137027),
  'pval': np.float64(0.0)},
 'Medical_PolVar_0.5_5': {'r': np.float64(0.9999669687339889),
  'pval': np.float64(0.0)},
 'Medical_PolVar_0.5_4': {'r': np.float64(0.9999665300948115),
  'pval': np.float64(0.0)},
 'Medical_PolVar_0.5_6': {'r': np.float64(0.9999801103690649),
  'pval': np.float64(0.0)},
 'Medical_PolVar_0.1_3': {'r': np.float64(0.9998251178571953),
  'pval': np.float64(0.0)},
 'Medical_PolVar_0.1_5': {'r': np.float64(0.9998856873093136),
  'pval': np.float64(0.0)},
 'Medical_PolVar_0.1_4': {'r': np.float64(0.9998752833950938),
  'pval': np.float64(0.0)},

In [145]:
out['Medical_HRV_Classic']

{0: {'matlab': {'pnn5': 0.9958995899589959,
   'pnn10': 0.9921992199219922,
   'pnn20': 0.9856985698569857,
   'pnn30': 0.9784978497849784,
   'pnn40': 0.9718971897189719,
   'lfhf': 0.4723746215565267,
   'vlf': 1.3091913853979957,
   'lf': 3.884111898130746,
   'hf': 8.222524498314849,
   'tri': 3.9416633819471816,
   'SD1': 995.0634239085406,
   'SD2': 1004.9123257276789},
  'python': {'pnn5': np.float64(0.9958995899589959),
   'pnn10': np.float64(0.9921992199219922),
   'pnn20': np.float64(0.9856985698569857),
   'pnn30': np.float64(0.9784978497849784),
   'pnn40': np.float64(0.9718971897189719),
   'lfhf': np.float64(0.4723746215565267),
   'vlf': np.float64(1.3091913853979955),
   'lf': np.float64(3.884111898130746),
   'hf': np.float64(8.222524498314849),
   'tri': np.float64(3.8865137971239796),
   'SD1': np.float64(995.0634239085407),
   'SD2': np.float64(1004.9123257276782)}},
 1: {'matlab': {'pnn5': 0.9976997699769977,
   'pnn10': 0.995099509950995,
   'pnn20': 0.98999899989

In [99]:
out[3]

{'matlab': {'pnn5': 0.997997997997998,
  'pnn10': 0.995995995995996,
  'pnn20': 0.98998998998999,
  'pnn30': 0.984984984984985,
  'pnn40': 0.978978978978979,
  'lfhf': 0.2902038457437392,
  'vlf': 1.0810047912351703,
  'lf': 3.0877935009936177,
  'hf': 10.640084706941668,
  'tri': 5.4945054945054945,
  'SD1': 976.9888846341381,
  'SD2': 1022.4933835000317},
 'python': {'pnn5': np.float64(0.997997997997998),
  'pnn10': np.float64(0.995995995995996),
  'pnn20': np.float64(0.98998998998999),
  'pnn30': np.float64(0.984984984984985),
  'pnn40': np.float64(0.978978978978979),
  'lfhf': np.float64(0.29020384574373925),
  'vlf': np.float64(1.0810047912351712),
  'lf': np.float64(3.0877935009936195),
  'hf': np.float64(10.640084706941671),
  'tri': np.float64(5.46448087431694),
  'SD1': np.float64(976.9888846341379),
  'SD2': np.float64(1022.4933835000317)}}

In [44]:
funcs['Medical_PolVar_0.5_4']['hctsa_name']

'MD_polvar'

In [46]:
mlab_func = eval(f"eng.{funcs['Medical_PolVar_0.5_4']['hctsa_name']}")

In [50]:
mlab_func(matlab.double(empirical1000[1]), matlab.double(1), matlab.double(3))

0.33323332333233324

In [None]:
eng.MD_polvar(matlab.double(empirical1000[1]), matlab.double(1), matlab.double(3))

In [12]:
empirical1000 = []
with open("../../../empirical1000/hctsa_timeseries-data.csv") as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    for row in reader:
        # Convert each element from string to float (or int if appropriate)
        try:
            time_series = [float(value) for value in row if value != '']
            empirical1000.append(time_series)
        except ValueError as e:
            print(f"Skipping row due to conversion error: {row}")
            continue

In [22]:
computed = {}
feature_count = 0
tstart = time.perf_counter()
for (feature, func) in zip(funcs.keys(), funcs.values()):
    computed[feature] = func(empirical1000[2])
    if (isinstance(computed[feature], float) or isinstance(computed[feature], int)):
        feature_count += 1
    else:
        feature_count += len(computed[feature]) 
telapsed = time.perf_counter() - tstart
print(f"Computed {len(computed)} operations ({feature_count} features)")
print(f"Time taken: {telapsed} seconds")

Computed 15 operations (40 features)
Time taken: 0.19373850000556558 seconds


In [29]:
funcs['Medical_PolVar_1.0_5']

<function functools.zscore_decorator.<locals>.wrapper(x: Union[list, numpy.ndarray], *, d: float = 1.0, D: int = 5) -> float>

In [23]:
computed

{'Medical_HRV_Classic': {'pnn5': np.float64(0.9980998099809981),
  'pnn10': np.float64(0.9956995699569957),
  'pnn20': np.float64(0.9903990399039904),
  'pnn30': np.float64(0.9850985098509851),
  'pnn40': np.float64(0.9807980798079808),
  'lfhf': np.float64(0.44778742017875445),
  'vlf': np.float64(1.0800176439782536),
  'lf': np.float64(3.619672975964347),
  'hf': np.float64(8.083462850562869),
  'tri': np.float64(5.099439061703213),
  'SD1': np.float64(1000.095519779147),
  'SD2': np.float64(999.9044710959529)},
 'Medical_PNN_raw': {'pnn5': np.float64(0.9858985898589859),
  'pnn10': np.float64(0.9693969396939695),
  'pnn20': np.float64(0.9387938793879388),
  'pnn30': np.float64(0.9091909190919092),
  'pnn40': np.float64(0.8778877887788779),
  'pnn50': np.float64(0.8471847184718472),
  'pnn60': np.float64(0.815981598159816),
  'pnn70': np.float64(0.784978497849785),
  'pnn80': np.float64(0.756975697569757),
  'pnn90': np.float64(0.7267726772677268),
  'pnn100': np.float64(0.6972697269