In [1]:
import numpy as np
import pandas as pd
import matlab.engine
import yaml
import time
import importlib
import pathlib
import matplotlib.pyplot as plt
import csv
from tqdm import tqdm
from scipy.stats import pearsonr
from loguru import logger
from Entropy import ApproximateEntropy
from functools import partial, wraps
from itertools import product
import os
from typing import Union
from numbers import Number
from utilities import ZScore

In [145]:
import importlib
import Distributional
importlib.reload(Distributional)

<module 'Distributional' from '/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Operations/Distributional.py'>

In [3]:
def compare_outputs(outputs, exclude_keys=None):
    """
    Compare MATLAB and Python feature outputs, computing relative errors
    and Pearson correlations.

    Parameters
    ----------
    outputs : dict
        Nested dictionary of feature values with structure:
        {
            feature_name: {
                ts_id: {
                    'matlab': scalar_or_dict,
                    'python': scalar_or_dict
                }
            }
        }
    exclude_keys : set or list, optional
        Set or list of keys (feature names or feature.subkey) to exclude from comparison.

    Returns
    -------
    results : dict
        Dictionary mapping feature (or subfeature) names to correlation and stats:
        {
            'feature.subkey': {
                'r': float,
                'pval': float,
                'res_py': ndarray,
                'res_matlab': ndarray,
                'max_rel_err': float
            }
        }
    """
    if exclude_keys is None:
        exclude_keys = set()
    else:
        exclude_keys = set(exclude_keys)

    flat = {}
    for feat, ts_dict in outputs.items():
        for ts, run in ts_dict.items():
            ml = run['matlab']
            py = run['python']
            if isinstance(ml, dict) and isinstance(py, dict):
                for k, mlv in ml.items():
                    slot = f"{feat}.{k}"
                    if slot in exclude_keys or k not in py:
                        continue
                    pyv = py[k]
                    flat.setdefault(slot, {})[ts] = (mlv, pyv)
            elif isinstance(ml, Number) and isinstance(py, Number):
                if feat in exclude_keys:
                    continue
                flat.setdefault(feat, {})[ts] = (ml, py)
            else:
                raise ValueError(f"Feature {feat}@{ts} is neither both scalars nor both dicts.")

    results = {}
    for slot, tsmap in flat.items():
        ml_vals, py_vals = [], []
        rel_errors = []

        for ts, (mlv, pyv) in tsmap.items():
            ml_vals.append(mlv)
            py_vals.append(pyv)

            both_finite = np.isfinite(mlv) and np.isfinite(pyv)
            both_nan = np.isnan(mlv) and np.isnan(pyv)
            both_posinf = (mlv == np.inf) and (pyv == np.inf)
            both_neginf = (mlv == -np.inf) and (pyv == -np.inf)

            if both_finite:
                if mlv == 0:
                    rel_err = np.nan
                else:
                    rel_err = abs(mlv - pyv) / abs(mlv) * 100
                    rel_errors.append(rel_err)
                print(f"[{slot} | ts={ts}]  RelErr% = {rel_err:.2f}")
            elif both_nan or both_posinf or both_neginf:
                print(f"[{slot} | ts={ts}]  RelErr% = MATCH (both non-finite)")
            else:
                print(f"[{slot} | ts={ts}]  RelErr% = NaN (mismatch in finiteness)")

        ml_arr = np.array(ml_vals, dtype=float)
        py_arr = np.array(py_vals, dtype=float)
        finite_mask = np.isfinite(ml_arr) & np.isfinite(py_arr)

        if finite_mask.sum() > 1 and ml_arr[finite_mask].std() and py_arr[finite_mask].std():
            r, p = pearsonr(ml_arr[finite_mask], py_arr[finite_mask])
        else:
            r, p = np.nan, np.nan

        max_rel_err = np.nanmax(rel_errors) if rel_errors else np.nan

        results[slot] = {
            'r': r,
            'pval': p,
            'res_py': py_arr,
            'res_matlab': ml_arr,
            'max_rel_err': max_rel_err
        }

    return results

In [4]:
empirical1000 = []
with open("../../../empirical1000/hctsa_timeseries-data.csv") as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    for row in reader:
        # Convert each element from string to float (or int if appropriate)
        try:
            time_series = [float(value) for value in row if value != '']
            empirical1000.append(time_series)
        except ValueError as e:
            print(f"Skipping row due to conversion error: {row}")
            continue

In [5]:
def load_yaml(file):
    print(f"Loading configuration file: {file.split('/')[-1]}")
    funcs = {}
    with open(file) as f:
        yf = yaml.load(f, Loader=yaml.FullLoader)

    for module_name in yf:
        print(f"\n*** Importing module {module_name} *** \n")
        module = importlib.import_module(module_name)
        for function_name in yf[module_name]:
            # Get the function's configuration dictionary
            function_config = yf[module_name][function_name]
            # If no configs section exists or if it's empty, use a list with single empty dict
            if ('configs' not in function_config or function_config.get('configs') is None or 
                function_config.get('configs') == []):
                configs = [{}]
            else:
                configs = function_config.get('configs', [{}])

            for params in configs:
                # Handle the case where params is None
                if params is None:
                    params = {}
                    
                zscore_first = params.pop("zscore", False)
                param_keys, param_vals = zip(*params.items()) if params else ([], [])
                
                param_combinations = [dict(zip(param_keys, values)) 
                                   for values in product(*[v if isinstance(v, list) 
                                                        else [v] for v in param_vals])]
                
                # If no parameter combinations were generated, add empty dict
                if not param_combinations:
                    param_combinations = [{}]
                
                # create a function for each parameter combination
                for param_set in param_combinations:
                    feature_name = (f"{module_name}_{function_name}_" + 
                                  "_".join(f"{v}" for k, v in param_set.items())
                                  if param_set else f"{module_name}_{function_name}")
                    if not zscore_first:
                        feature_name += "_raw"
                    
                    print(f"Adding operation {feature_name} with params {param_set} "
                          f"(Z-score={zscore_first})")
                    
                    base_func = partial(getattr(module, function_name), **param_set)
                    if zscore_first:
                        base_func = zscore_decorator(base_func)
                    
                    # return the MATLAB callable corresponding to the python implementation for direct comparison
                    # make sure to check whethe the data needs to be zscored when calling the MATLAB func, cannot be wrapped as it is not a python function
                    # so needs to be done manually when calling the function.
                    hctsa_name = function_config.get('hctsa_name')
                    hctsa_callable = eval(f"eng.{hctsa_name}")

                    # keep ordered args only for testing YAML otherwise bloats
                    funcs[feature_name] = {'callable': base_func, 'params': param_set, 'hctsa_name': function_config.get('hctsa_name'), 
                                           'matlab_callable': hctsa_callable, 'isZscore': zscore_first, 'ordered_args': function_config.get('ordered_args')}
                    
    return funcs

In [6]:
def eval_comparison(yaml, data):
    func_dict = load_yaml(yaml)
    func_res = dict()
    for func in func_dict:
        print(f"Evalutating {func}")
        f = func_dict[func]
        python_func = f['callable']
        matlab_func = f['matlab_callable']
        hctsa_name = f['hctsa_name']
        isZscore = f['isZscore']
        params = f['params']
        ordered_args = []
        if params:
            order = f['ordered_args']
            ordered_args = [params[k] for k in order]
            
        print(f"Comparing to {hctsa_name}")
        res = dict()
        for i in range(len(data)):
            x = np.array(data[i])
            matlab_eval = matlab_func(ZScore(matlab.double(x)), *ordered_args) if isZscore else matlab_func(matlab.double(x), *ordered_args)
            python_eval = python_func(x)
            res[i] = {'matlab': matlab_eval, 'python': python_eval}
        func_res[func] = res
    return func_res

In [7]:
eng = matlab.engine.start_matlab()

In [8]:
proj_root = pathlib.Path("/Users/jmoo2880/Documents/hctsa")
eng.addpath(eng.genpath(str(proj_root)), nargout=0)
def zscore_decorator(func):
    @wraps(func)
    def wrapper(y, *args, **kwargs):
        y = ZScore(y)
        return func(y, *args, **kwargs)
    return wrapper

def range_constructor(loader, node):
    start, end = loader.construct_sequence(node)
    return list(range(start, end+1))
yaml.add_constructor("!range", range_constructor)

In [23]:
funcs = load_yaml("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/distributional.yaml")

Loading configuration file: distributional.yaml

*** Importing module Distributional *** 

Adding operation Distributional_Burstiness_raw with params {} (Z-score=False)


In [29]:
burstiness_evals = eval_comparison("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/distributional.yaml", empirical1000)
burstiness_compares = compare_outputs(burstiness_evals)

Loading configuration file: distributional.yaml

*** Importing module Distributional *** 

Adding operation Distributional_Burstiness_raw with params {} (Z-score=False)
Evalutating Distributional_Burstiness_raw
Comparing to DN_Burstiness
[Distributional_Burstiness_raw.B | ts=0]  RelErr% = 0.00
[Distributional_Burstiness_raw.B | ts=1]  RelErr% = 0.00
[Distributional_Burstiness_raw.B | ts=2]  RelErr% = 0.00
[Distributional_Burstiness_raw.B | ts=3]  RelErr% = 0.00
[Distributional_Burstiness_raw.B | ts=4]  RelErr% = 0.00
[Distributional_Burstiness_raw.B | ts=5]  RelErr% = 0.00
[Distributional_Burstiness_raw.B | ts=6]  RelErr% = 0.00
[Distributional_Burstiness_raw.B | ts=7]  RelErr% = 0.00
[Distributional_Burstiness_raw.B | ts=8]  RelErr% = 0.00
[Distributional_Burstiness_raw.B | ts=9]  RelErr% = 0.00
[Distributional_Burstiness_raw.B | ts=10]  RelErr% = 0.00
[Distributional_Burstiness_raw.B | ts=11]  RelErr% = 0.00
[Distributional_Burstiness_raw.B | ts=12]  RelErr% = 0.00
[Distributional_Bu

In [31]:
[burstiness_compares[p]['r'] for p in burstiness_compares]

[np.float64(0.9999999999999999), np.float64(0.9999999999999998)]

In [38]:
eval_comparison("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/distributional.yaml", empirical1000)

Loading configuration file: distributional.yaml

*** Importing module Distributional *** 

Adding operation Distributional_CustomSkewness_pearson_raw with params {'whatSkew': 'pearson'} (Z-score=False)
Adding operation Distributional_CustomSkewness_bowley with params {'whatSkew': 'bowley'} (Z-score=True)
Evalutating Distributional_CustomSkewness_pearson_raw
Comparing to DN_CustomSkewness
Evalutating Distributional_CustomSkewness_bowley
Comparing to DN_CustomSkewness


  


{'Distributional_CustomSkewness_pearson_raw': {0: {'matlab': 2.80227524712231,
   'python': np.float64(2.8022752471223074)},
  1: {'matlab': 4.987335820206696, 'python': np.float64(4.987335820206694)},
  2: {'matlab': 6.486031817338214, 'python': np.float64(6.486031817338227)},
  3: {'matlab': 4.65384516088154, 'python': np.float64(4.653845160881544)},
  4: {'matlab': 5.919211569991916, 'python': np.float64(5.919211569991916)},
  5: {'matlab': 8.714220888450903, 'python': np.float64(8.714220888450905)},
  6: {'matlab': 1.886521531653141, 'python': np.float64(1.8865215316531396)},
  7: {'matlab': 2.0993113649965904, 'python': np.float64(2.099311364996592)},
  8: {'matlab': 2.4760577684416916, 'python': np.float64(2.4760577684416916)},
  9: {'matlab': 38.210952920293984, 'python': np.float64(38.210952920293984)},
  10: {'matlab': 3.5622426447085838, 'python': np.float64(3.5622426447085767)},
  11: {'matlab': -1.0828983715968739,
   'python': np.float64(-1.0828983715968756)},
  12: {'matl

In [39]:
customSkew_evals = eval_comparison("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/distributional.yaml", empirical1000)
customSkew_compares = compare_outputs(customSkew_evals)

Loading configuration file: distributional.yaml

*** Importing module Distributional *** 

Adding operation Distributional_CustomSkewness_pearson_raw with params {'whatSkew': 'pearson'} (Z-score=False)
Adding operation Distributional_CustomSkewness_bowley with params {'whatSkew': 'bowley'} (Z-score=True)
Evalutating Distributional_CustomSkewness_pearson_raw
Comparing to DN_CustomSkewness
Evalutating Distributional_CustomSkewness_bowley
Comparing to DN_CustomSkewness


  


[Distributional_CustomSkewness_pearson_raw | ts=0]  RelErr% = 0.00
[Distributional_CustomSkewness_pearson_raw | ts=1]  RelErr% = 0.00
[Distributional_CustomSkewness_pearson_raw | ts=2]  RelErr% = 0.00
[Distributional_CustomSkewness_pearson_raw | ts=3]  RelErr% = 0.00
[Distributional_CustomSkewness_pearson_raw | ts=4]  RelErr% = 0.00
[Distributional_CustomSkewness_pearson_raw | ts=5]  RelErr% = 0.00
[Distributional_CustomSkewness_pearson_raw | ts=6]  RelErr% = 0.00
[Distributional_CustomSkewness_pearson_raw | ts=7]  RelErr% = 0.00
[Distributional_CustomSkewness_pearson_raw | ts=8]  RelErr% = 0.00
[Distributional_CustomSkewness_pearson_raw | ts=9]  RelErr% = 0.00
[Distributional_CustomSkewness_pearson_raw | ts=10]  RelErr% = 0.00
[Distributional_CustomSkewness_pearson_raw | ts=11]  RelErr% = 0.00
[Distributional_CustomSkewness_pearson_raw | ts=12]  RelErr% = 0.00
[Distributional_CustomSkewness_pearson_raw | ts=13]  RelErr% = 0.00
[Distributional_CustomSkewness_pearson_raw | ts=14]  RelEr

In [53]:
def eval_comparisoncv(yaml, data):
    func_dict = load_yaml(yaml)
    func_res = dict()
    for func in func_dict:
        print(f"Evalutating {func}")
        f = func_dict[func]
        python_func = f['callable']
        matlab_func = f['matlab_callable']
        hctsa_name = f['hctsa_name']
        isZscore = f['isZscore']
        params = f['params']
        ordered_args = []
        if params:
            order = f['ordered_args']
            ordered_args = [params[k] for k in order]
            
        print(f"Comparing to {hctsa_name}")
        res = dict()
        for i in range(len(data)):
            x = np.array(data[i])
            matlab_eval = matlab_func(ZScore(matlab.double(x)), matlab.double(ordered_args[0])) if isZscore else matlab_func(matlab.double(x), matlab.double(ordered_args[0]))
            python_eval = python_func(x)
            res[i] = {'matlab': matlab_eval, 'python': python_eval}
        func_res[func] = res
    return func_res

In [54]:
cv_evals = eval_comparisoncv("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/distributional.yaml", empirical1000)
cv_compares = compare_outputs(cv_evals)

Loading configuration file: distributional.yaml

*** Importing module Distributional *** 

Adding operation Distributional_CV_1_raw with params {'k': 1} (Z-score=False)
Adding operation Distributional_CV_2_raw with params {'k': 2} (Z-score=False)
Evalutating Distributional_CV_1_raw
Comparing to DN_cv
Evalutating Distributional_CV_2_raw
Comparing to DN_cv
[Distributional_CV_1_raw | ts=0]  RelErr% = 0.00
[Distributional_CV_1_raw | ts=1]  RelErr% = 0.00
[Distributional_CV_1_raw | ts=2]  RelErr% = 0.00
[Distributional_CV_1_raw | ts=3]  RelErr% = 0.00
[Distributional_CV_1_raw | ts=4]  RelErr% = 0.00
[Distributional_CV_1_raw | ts=5]  RelErr% = 0.00
[Distributional_CV_1_raw | ts=6]  RelErr% = 0.00
[Distributional_CV_1_raw | ts=7]  RelErr% = 0.00
[Distributional_CV_1_raw | ts=8]  RelErr% = 0.00
[Distributional_CV_1_raw | ts=9]  RelErr% = 0.00
[Distributional_CV_1_raw | ts=10]  RelErr% = 0.00
[Distributional_CV_1_raw | ts=11]  RelErr% = 0.00
[Distributional_CV_1_raw | ts=12]  RelErr% = 0.00
[Di

In [57]:
cv_compares

{'Distributional_CV_1_raw': {'r': np.float64(1.0),
  'pval': np.float64(0.0),
  'res_py': array([ 7.77435431e-01,  4.09045447e-01,  3.06373071e-01,  4.36071746e-01,
          3.41035255e-01,  2.25436361e-01,  1.34062846e+00,  1.16784553e+00,
          9.14754650e-01,  5.23410134e-02,  6.17012080e-01, -2.16894836e+00,
          1.06309175e+00,  2.10034898e+00,  1.30535471e+00,  2.13736424e-01,
          2.16424711e-01,  2.44092001e+01,  1.41287725e-01, -8.69924738e+01,
          2.19278066e-02,  5.41908725e-01,  5.50902045e-01,  2.63914589e-01,
          3.43399540e+00,  3.71408579e-01,  7.12703622e-01, -2.08908337e+00,
         -2.35322997e+00,  6.60386560e-01,  3.70440055e-01,  1.64005616e-01,
          9.03684748e-01,  9.92144859e-01,  4.47421041e-01,  7.61480468e-01,
          8.99539922e-01,  1.27227806e+00,  3.02168275e-01, -8.57528629e-01,
          8.82157641e-01,  3.09386435e+00,  2.40418582e+00,  3.13458782e+02,
         -9.60717401e+02,  1.42258732e+03, -5.32577492e+00, -3.86

In [56]:
[cv_compares[p]['r'] for p in cv_compares]

[np.float64(1.0), np.float64(1.0)]

In [58]:
eng.DN_cv(matlab.double(empirical1000[181]), matlab.double(1))

inf

In [67]:
from Distributional import CV, FitMLE

In [75]:
def eval_comparison_fitmle(yaml, data):
    func_dict = load_yaml(yaml)
    func_res = dict()
    for func in func_dict:
        print(f"Evalutating {func}")
        f = func_dict[func]
        python_func = f['callable']
        matlab_func = f['matlab_callable']
        hctsa_name = f['hctsa_name']
        isZscore = f['isZscore']
        params = f['params']
        ordered_args = []
        if params:
            order = f['ordered_args']
            ordered_args = [params[k] for k in order]
            
        print(f"Comparing to {hctsa_name}")
        res = dict()
        for i in range(len(data)):
            x = np.array(data[i])
            matlab_eval = matlab_func(ZScore(matlab.double(x)), ordered_args[0]) if isZscore else matlab_func(matlab.double(x), ordered_args[0])
            python_eval = python_func(x)
            res[i] = {'matlab': matlab_eval, 'python': python_eval}
        func_res[func] = res
    return func_res

In [80]:
fitmle_evals = eval_comparison_fitmle("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/distributional.yaml", empirical1000)
fitmle_compares = compare_outputs(fitmle_evals)

Loading configuration file: distributional.yaml

*** Importing module Distributional *** 

Adding operation Distributional_FitMLE_geometric_raw with params {'fitWhat': 'geometric'} (Z-score=False)
Evalutating Distributional_FitMLE_geometric_raw
Comparing to DN_Fit_mle
[Distributional_FitMLE_geometric_raw | ts=0]  RelErr% = 0.00
[Distributional_FitMLE_geometric_raw | ts=1]  RelErr% = 0.00
[Distributional_FitMLE_geometric_raw | ts=2]  RelErr% = 0.00
[Distributional_FitMLE_geometric_raw | ts=3]  RelErr% = 0.00
[Distributional_FitMLE_geometric_raw | ts=4]  RelErr% = 0.00
[Distributional_FitMLE_geometric_raw | ts=5]  RelErr% = 0.00
[Distributional_FitMLE_geometric_raw | ts=6]  RelErr% = 0.00
[Distributional_FitMLE_geometric_raw | ts=7]  RelErr% = 0.00
[Distributional_FitMLE_geometric_raw | ts=8]  RelErr% = 0.00
[Distributional_FitMLE_geometric_raw | ts=9]  RelErr% = 0.00
[Distributional_FitMLE_geometric_raw | ts=10]  RelErr% = 0.00
[Distributional_FitMLE_geometric_raw | ts=11]  RelErr% = 0.

In [59]:
CV(empirical1000[181], 1)

np.float64(1.2593256722066732e+18)

In [73]:
eng.DN_Fit_mle(matlab.double(ZScore(empirical1000[0])), 'gaussian')

{'mean': 4.902744876744691e-17, 'std': 0.9999499987499377}

In [72]:
FitMLE(ZScore(empirical1000[0]), 'gaussian')

{'mean': np.float64(2.2026824808563105e-17),
 'std': np.float64(0.9999499987499374)}

In [84]:
def eval_comparison_hlm(yaml, data):
    func_dict = load_yaml(yaml)
    func_res = dict()
    for func in func_dict:
        print(f"Evalutating {func}")
        f = func_dict[func]
        python_func = f['callable']
        matlab_func = f['matlab_callable']
        hctsa_name = f['hctsa_name']
        isZscore = f['isZscore']
        params = f['params']
        ordered_args = []
        if params:
            order = f['ordered_args']
            ordered_args = [params[k] for k in order]
            
        print(f"Comparing to {hctsa_name}")
        res = dict()
        for i in range(len(data)):
            x = np.array(data[i])
            matlab_eval = matlab_func(ZScore(matlab.double(x)), *ordered_args) if isZscore else matlab_func(matlab.double(x), *ordered_args)
            python_eval = python_func(x)
            res[i] = {'matlab': matlab_eval, 'python': python_eval}
        func_res[func] = res
    return func_res

In [86]:
hlm_evals = eval_comparison_hlm("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/distributional.yaml", empirical1000)
hlm_compares = compare_outputs(hlm_evals)

Loading configuration file: distributional.yaml

*** Importing module Distributional *** 

Adding operation Distributional_HighLowMu_raw with params {} (Z-score=False)
Evalutating Distributional_HighLowMu_raw
Comparing to DN_HighLowMu
[Distributional_HighLowMu_raw | ts=0]  RelErr% = 0.00
[Distributional_HighLowMu_raw | ts=1]  RelErr% = 0.00
[Distributional_HighLowMu_raw | ts=2]  RelErr% = 0.00
[Distributional_HighLowMu_raw | ts=3]  RelErr% = 0.00
[Distributional_HighLowMu_raw | ts=4]  RelErr% = 0.00
[Distributional_HighLowMu_raw | ts=5]  RelErr% = 0.00
[Distributional_HighLowMu_raw | ts=6]  RelErr% = 0.00
[Distributional_HighLowMu_raw | ts=7]  RelErr% = 0.00
[Distributional_HighLowMu_raw | ts=8]  RelErr% = 0.00
[Distributional_HighLowMu_raw | ts=9]  RelErr% = 0.00
[Distributional_HighLowMu_raw | ts=10]  RelErr% = 0.00
[Distributional_HighLowMu_raw | ts=11]  RelErr% = 0.00
[Distributional_HighLowMu_raw | ts=12]  RelErr% = 0.00
[Distributional_HighLowMu_raw | ts=13]  RelErr% = 0.00
[Dist

In [25]:
def eval_comparison_ha(yaml, data):
    func_dict = load_yaml(yaml)
    func_res = dict()
    for func in func_dict:
        print(f"Evalutating {func}")
        f = func_dict[func]
        python_func = f['callable']
        matlab_func = f['matlab_callable']
        hctsa_name = f['hctsa_name']
        isZscore = f['isZscore']
        params = f['params']
        ordered_args = []
        if params:
            order = f['ordered_args']
            ordered_args = [params[k] for k in order]
            
        print(f"Comparing to {hctsa_name}")
        res = dict()
        for i in range(len(data)):
            x = np.array(data[i])
            matlab_eval = matlab_func(ZScore(matlab.double(x)), ordered_args[0], ordered_args[1]) if isZscore else matlab_func(matlab.double(x), ordered_args[0], ordered_args[1])
            python_eval = python_func(x)
            res[i] = {'matlab': matlab_eval, 'python': python_eval}
        func_res[func] = res
    return func_res

In [103]:
ha_evals = eval_comparison_ha("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/distributional.yaml", empirical1000)
ha_compares = compare_outputs(ha_evals)

Loading configuration file: distributional.yaml

*** Importing module Distributional *** 

Adding operation Distributional_HistogramAsymmetry_11_False with params {'numBins': 11, 'doSimple': False} (Z-score=True)
Evalutating Distributional_HistogramAsymmetry_11_False
Comparing to DN_HistogramAsymmetry
[Distributional_HistogramAsymmetry_11_False.densityDiff | ts=0]  RelErr% = 0.00
[Distributional_HistogramAsymmetry_11_False.densityDiff | ts=1]  RelErr% = 0.00
[Distributional_HistogramAsymmetry_11_False.densityDiff | ts=2]  RelErr% = 0.00
[Distributional_HistogramAsymmetry_11_False.densityDiff | ts=3]  RelErr% = 0.00
[Distributional_HistogramAsymmetry_11_False.densityDiff | ts=4]  RelErr% = 0.00
[Distributional_HistogramAsymmetry_11_False.densityDiff | ts=5]  RelErr% = 0.00
[Distributional_HistogramAsymmetry_11_False.densityDiff | ts=6]  RelErr% = 0.00
[Distributional_HistogramAsymmetry_11_False.densityDiff | ts=7]  RelErr% = 0.00
[Distributional_HistogramAsymmetry_11_False.densityDiff |

In [104]:
ha_compares

{'Distributional_HistogramAsymmetry_11_False.densityDiff': {'r': np.float64(1.0),
  'pval': np.float64(0.0),
  'res_py': array([-1.562e+03, -7.160e+02,  3.180e+02, -5.800e+01, -4.200e+01,
          1.160e+02, -1.722e+03, -1.402e+03, -1.140e+03,  1.020e+02,
         -4.220e+02,  7.100e+02, -3.042e+03, -6.720e+02, -3.500e+02,
          4.740e+02,  2.394e+03, -8.000e+00,  2.260e+02,  8.000e+00,
         -4.800e+01,  8.000e+01,  8.860e+02, -1.964e+03, -7.030e+03,
         -1.606e+03, -1.986e+03,  5.172e+03,  5.662e+03, -2.400e+02,
         -1.940e+02,  1.480e+02, -8.200e+01,  3.200e+01, -5.400e+01,
         -3.080e+02, -2.410e+03, -1.870e+03,  9.400e+01, -4.260e+02,
         -1.300e+03, -6.780e+02, -1.168e+03,  2.000e+01,  4.000e+00,
          0.000e+00, -4.800e+01,  1.920e+02,  0.000e+00, -1.490e+03,
          1.220e+02,  2.244e+03, -1.130e+03, -2.200e+02,  3.060e+02,
         -4.300e+02, -7.220e+02, -9.820e+02, -6.000e+00, -1.800e+01,
         -1.400e+01, -3.000e+01, -8.000e+00, -1.600e+

In [121]:
importlib.reload(Distributional)
from Distributional import HistogramMode

In [122]:
HistogramMode(empirical1000[103], 11, doSimple=False)

np.float64(0.28800000000000003)

In [129]:
def eval_comparison_hm(yaml, data):
    func_dict = load_yaml(yaml)
    func_res = dict()
    for func in func_dict:
        print(f"Evalutating {func}")
        f = func_dict[func]
        python_func = f['callable']
        matlab_func = f['matlab_callable']
        hctsa_name = f['hctsa_name']
        isZscore = f['isZscore']
        params = f['params']
        ordered_args = []
        if params:
            order = f['ordered_args']
            ordered_args = [params[k] for k in order]
            
        print(f"Comparing to {hctsa_name}")
        res = dict()
        for i in range(len(data)):
            x = np.array(data[i])
            matlab_eval = matlab_func(abs(ZScore(matlab.double(x))) if ordered_args[2] else ZScore(matlab.double(x)), ordered_args[0], ordered_args[1]) if isZscore else matlab_func(matlab.double(x), ordered_args[0], ordered_args[1])
            python_eval = python_func(x)
            res[i] = {'matlab': matlab_eval, 'python': python_eval}
        func_res[func] = res
    return func_res

In [130]:
hm_evals = eval_comparison_hm("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/distributional.yaml", empirical1000)
hm_compares = compare_outputs(hm_evals)

Loading configuration file: distributional.yaml

*** Importing module Distributional *** 

Adding operation Distributional_HistogramMode_5_True_False with params {'numBins': 5, 'doSimple': True, 'doAbs': False} (Z-score=True)
Adding operation Distributional_HistogramMode_5_True_True with params {'numBins': 5, 'doSimple': True, 'doAbs': True} (Z-score=True)
Adding operation Distributional_HistogramMode_10_True_False with params {'numBins': 10, 'doSimple': True, 'doAbs': False} (Z-score=True)
Adding operation Distributional_HistogramMode_10_True_True with params {'numBins': 10, 'doSimple': True, 'doAbs': True} (Z-score=True)
Adding operation Distributional_HistogramMode_21_True_False with params {'numBins': 21, 'doSimple': True, 'doAbs': False} (Z-score=True)
Adding operation Distributional_HistogramMode_21_True_True with params {'numBins': 21, 'doSimple': True, 'doAbs': True} (Z-score=True)
Evalutating Distributional_HistogramMode_5_True_False
Comparing to DN_HistogramMode
Evalutating D

In [137]:
def eval_comparison_mean(yaml, data):
    func_dict = load_yaml(yaml)
    func_res = dict()
    for func in func_dict:
        print(f"Evalutating {func}")
        f = func_dict[func]
        python_func = f['callable']
        matlab_func = f['matlab_callable']
        hctsa_name = f['hctsa_name']
        isZscore = f['isZscore']
        params = f['params']
        ordered_args = []
        if params:
            order = f['ordered_args']
            ordered_args = [params[k] for k in order]
            
        print(f"Comparing to {hctsa_name}")
        res = dict()
        for i in range(len(data)):
            x = np.array(data[i])
            matlab_eval = matlab_func(ZScore(matlab.double(x)), *ordered_args) if isZscore else matlab_func(matlab.double(x), *ordered_args)
            python_eval = python_func(x)
            res[i] = {'matlab': matlab_eval, 'python': python_eval}
        func_res[func] = res
    return func_res

In [140]:
mean_evals = eval_comparison_mean("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/distributional.yaml", empirical1000)
mean_compares = compare_outputs(mean_evals)

Loading configuration file: distributional.yaml

*** Importing module Distributional *** 

Adding operation Distributional_Mean_norm_raw with params {'meanType': 'norm'} (Z-score=False)
Adding operation Distributional_Mean_harm_raw with params {'meanType': 'harm'} (Z-score=False)
Adding operation Distributional_Mean_rms_raw with params {'meanType': 'rms'} (Z-score=False)
Adding operation Distributional_Mean_median_raw with params {'meanType': 'median'} (Z-score=False)
Adding operation Distributional_Mean_midhinge_raw with params {'meanType': 'midhinge'} (Z-score=False)
Evalutating Distributional_Mean_norm_raw
Comparing to DN_Mean
Evalutating Distributional_Mean_harm_raw
Comparing to DN_Mean


  # compute bin centers from bin edges
  # compute bin centers from bin edges


Evalutating Distributional_Mean_rms_raw
Comparing to DN_Mean
Evalutating Distributional_Mean_median_raw
Comparing to DN_Mean
Evalutating Distributional_Mean_midhinge_raw
Comparing to DN_Mean

p =

    0.0910
    0.3652


p =

    0.1974
    0.3588


p =

    0.4442
    0.7019


p =

    0.2537
    0.4879


p =

    0.3424
    0.5602


p =

    0.6057
    0.8340


p =

    0.0050
    0.0638


p =

    0.0168
    0.2117


p =

    0.0280
    0.1291


p =

    18
    20


p =

     1
     3


p =

   -1.2370
    0.3073


p =

     2
    13


p =

   -0.3065
    1.3099


p =

    3.1705
   16.4170


p =

     3
     5


p =

     3
     5


p =

   -5.8660
    6.8500


p =

    45
    55


p =

   -0.7621
    0.7181


p =

    0.9873
    1.0168


p =

    2.0198
    3.7920


p =

   59.3825
  168.0250


p =

    1.4901
    2.1473


p =

    0.0000
    0.0036


p =

    0.9237
    1.7062


p =

    2.6709
    6.9584


p =

  -10.2830
   -0.4707


p =

   -1.6930
   -0.1409


p =

    0.6166

In [143]:
[mean_compares[p]['r'] for p in mean_compares]

[np.float64(1.0),
 np.float64(0.9999999999999994),
 np.float64(0.9999999999999998),
 np.float64(1.0),
 np.float64(0.9999999999926217)]

In [147]:
minmax_evals = eval_comparison_mean("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/distributional.yaml", empirical1000)
minmax_compares = compare_outputs(minmax_evals)

Loading configuration file: distributional.yaml

*** Importing module Distributional *** 

Adding operation Distributional_MinMax_max with params {'minOrMax': 'max'} (Z-score=True)
Adding operation Distributional_MinMax_min with params {'minOrMax': 'min'} (Z-score=True)
Evalutating Distributional_MinMax_max
Comparing to DN_MinMax
Evalutating Distributional_MinMax_min
Comparing to DN_MinMax
[Distributional_MinMax_max | ts=0]  RelErr% = 0.00
[Distributional_MinMax_max | ts=1]  RelErr% = 0.00
[Distributional_MinMax_max | ts=2]  RelErr% = 0.00
[Distributional_MinMax_max | ts=3]  RelErr% = 0.00
[Distributional_MinMax_max | ts=4]  RelErr% = 0.00
[Distributional_MinMax_max | ts=5]  RelErr% = 0.00
[Distributional_MinMax_max | ts=6]  RelErr% = 0.00
[Distributional_MinMax_max | ts=7]  RelErr% = 0.00
[Distributional_MinMax_max | ts=8]  RelErr% = 0.00
[Distributional_MinMax_max | ts=9]  RelErr% = 0.00
[Distributional_MinMax_max | ts=10]  RelErr% = 0.00
[Distributional_MinMax_max | ts=11]  RelErr% 