In [1]:
import numpy as np
import pandas as pd
import matlab.engine
import yaml
import time
import importlib
import pathlib
import matplotlib.pyplot as plt
import csv
from tqdm import tqdm
from scipy.stats import pearsonr
from loguru import logger
from functools import partial, wraps
from itertools import product
import os
from numbers import Number
from utilities import ZScore

In [2]:
eng = matlab.engine.start_matlab()

In [3]:
proj_root = pathlib.Path("/Users/jmoo2880/Documents/hctsa")
eng.addpath(eng.genpath(str(proj_root)), nargout=0)

In [4]:
def zscore_decorator(func):
    @wraps(func)
    def wrapper(y, *args, **kwargs):
        y = ZScore(y)
        return func(y, *args, **kwargs)
    return wrapper

def range_constructor(loader, node):
    start, end = loader.construct_sequence(node)
    return list(range(start, end+1))
yaml.add_constructor("!range", range_constructor)

In [5]:
def load_yaml(file):
    print(f"Loading configuration file: {file.split('/')[-1]}")
    funcs = {}
    with open(file) as f:
        yf = yaml.load(f, Loader=yaml.FullLoader)

    for module_name in yf:
        print(f"\n*** Importing module {module_name} *** \n")
        module = importlib.import_module(module_name)
        for function_name in yf[module_name]:
            # Get the function's configuration dictionary
            function_config = yf[module_name][function_name]
            # If no configs section exists or if it's empty, use a list with single empty dict
            if ('configs' not in function_config or function_config.get('configs') is None or 
                function_config.get('configs') == []):
                configs = [{}]
            else:
                configs = function_config.get('configs', [{}])

            for params in configs:
                # Handle the case where params is None
                if params is None:
                    params = {}
                    
                zscore_first = params.pop("zscore", False)
                param_keys, param_vals = zip(*params.items()) if params else ([], [])
                
                param_combinations = [dict(zip(param_keys, values)) 
                                   for values in product(*[v if isinstance(v, list) 
                                                        else [v] for v in param_vals])]
                
                # If no parameter combinations were generated, add empty dict
                if not param_combinations:
                    param_combinations = [{}]
                
                # create a function for each parameter combination
                for param_set in param_combinations:
                    feature_name = (f"{module_name}_{function_name}_" + 
                                  "_".join(f"{v}" for k, v in param_set.items())
                                  if param_set else f"{module_name}_{function_name}")
                    if not zscore_first:
                        feature_name += "_raw"
                    
                    print(f"Adding operation {feature_name} with params {param_set} "
                          f"(Z-score={zscore_first})")
                    
                    base_func = partial(getattr(module, function_name), **param_set)
                    if zscore_first:
                        base_func = zscore_decorator(base_func)
                    
                    # return the MATLAB callable corresponding to the python implementation for direct comparison
                    # make sure to check whethe the data needs to be zscored when calling the MATLAB func, cannot be wrapped as it is not a python function
                    # so needs to be done manually when calling the function.
                    hctsa_name = function_config.get('hctsa_name')
                    hctsa_callable = eval(f"eng.{hctsa_name}")

                    # keep ordered args only for testing YAML otherwise bloats
                    funcs[feature_name] = {'callable': base_func, 'params': param_set, 'hctsa_name': function_config.get('hctsa_name'), 
                                           'matlab_callable': hctsa_callable, 'isZscore': zscore_first, 'ordered_args': function_config.get('ordered_args')}
                    
    return funcs

In [6]:
funcs = load_yaml("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/physics.yaml")

Loading configuration file: physics.yaml

*** Importing module Physics *** 

Adding operation Physics_ForcePotential_dblwell_[1, 0.2, 0.1] with params {'whatPotential': 'dblwell', 'params': [1, 0.2, 0.1]} (Z-score=True)
Adding operation Physics_ForcePotential_dblwell_[1, 0.5, 0.2] with params {'whatPotential': 'dblwell', 'params': [1, 0.5, 0.2]} (Z-score=True)
Adding operation Physics_ForcePotential_dblwell_[2, 0.05, 0.2] with params {'whatPotential': 'dblwell', 'params': [2, 0.05, 0.2]} (Z-score=True)
Adding operation Physics_ForcePotential_dblwell_[3, 0.01, 0.1] with params {'whatPotential': 'dblwell', 'params': [3, 0.01, 0.1]} (Z-score=True)
Adding operation Physics_ForcePotential_sine_[3, 0.5, 1] with params {'whatPotential': 'sine', 'params': [3, 0.5, 1]} (Z-score=True)
Adding operation Physics_ForcePotential_sine_[1, 1, 1] with params {'whatPotential': 'sine', 'params': [1, 1, 1]} (Z-score=True)
Adding operation Physics_ForcePotential_sine_[10, 0.04, 10] with params {'whatPotenti

In [7]:
empirical1000 = []
with open("../../../empirical1000/hctsa_timeseries-data.csv") as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    for row in reader:
        # Convert each element from string to float (or int if appropriate)
        try:
            time_series = [float(value) for value in row if value != '']
            empirical1000.append(time_series)
        except ValueError as e:
            print(f"Skipping row due to conversion error: {row}")
            continue

In [11]:
evaluated = funcs['Physics_ForcePotential_dblwell_[1, 0.2, 0.1]']['callable'](empirical1000[0])

In [12]:
len(empirical1000[15])

1000

In [13]:
evaluated

{'mean': np.float64(-0.03066383112503455),
 'median': np.float64(-0.06454548643280199),
 'std': np.float64(0.8848958666476731),
 'range': np.float64(3.5131275572890037),
 'proppos': np.float64(0.4801),
 'pcross': np.float64(0.009000900090009001),
 'ac1': np.float64(0.9982611038831876),
 'ac10': np.float64(0.8587383444477116),
 'ac50': np.float64(0.2657223758114441),
 'tau': np.float64(449.68395883980446),
 'finaldev': np.float64(0.9453553197391972),
 'pcrossup': np.float64(0.0186018601860186),
 'pcrossdown': np.float64(0.020602060206020602)}

In [9]:
evaluated['v'][4000]

KeyError: 'v'

In [None]:
def eval_comparison(yaml, data):
    func_dict = load_yaml(yaml)
    func_res = dict()
    for func in func_dict:
        print(f"Evalutating {func}")
        f = func_dict[func]
        python_func = f['callable']
        matlab_func = f['matlab_callable']
        hctsa_name = f['hctsa_name']
        isZscore = f['isZscore']
        params = f['params']
        ordered_args = []
        if params:
            order = f['ordered_args']
            ordered_args = [params[k] for k in order]
            
        print(f"Comparing to {hctsa_name}")
        res = dict()
        for i in range(len(data)):
            x = data[i]
            matlab_eval = matlab_func(ZScore(matlab.double(x)), ordered_args[0], matlab.double(ordered_args[1])) if isZscore else matlab_func(matlab.double(x), ordered_args[0], matlab.double(ordered_args[1]))
            python_eval = python_func(x)
            res[i] = {'matlab': matlab_eval, 'python': python_eval}
        func_res[func] = res
    return func_res

In [44]:
def compare_outputs(outputs):
    """
    Compare MATLAB and Python feature outputs, computing relative errors
    and Pearson correlations.

    Parameters
    ----------
    outputs : dict
        Nested dictionary of feature values with structure:
        {
            feature_name: {
                ts_id: {
                    'matlab': scalar_or_dict,
                    'python': scalar_or_dict
                }
            }
        }

    Returns
    -------
    results : dict
        Dictionary mapping feature (or subfeature) names to correlation and stats:
        {
            'feature.subkey': {
                'r': float,
                'pval': float,
                'res_py': ndarray,
                'res_matlab': ndarray
            }
        }
    """
    flat = {}
    for feat, ts_dict in outputs.items():
        for ts, run in ts_dict.items():
            ml = run['matlab']
            py = run['python']
            if isinstance(ml, dict) and isinstance(py, dict):
                for k, mlv in ml.items():
                    pyv = py[k]
                    slot = f"{feat}.{k}"
                    flat.setdefault(slot, {})[ts] = (mlv, pyv)
            elif isinstance(ml, Number) and isinstance(py, Number):
                flat.setdefault(feat, {})[ts] = (ml, py)
            else:
                raise ValueError(f"Feature {feat}@{ts} is neither both scalars nor both dicts.")

    results = {}
    for slot, tsmap in flat.items():
        ml_vals, py_vals = [], []
        for ts, (mlv, pyv) in tsmap.items():
            ml_vals.append(mlv)
            py_vals.append(pyv)
            
            both_finite = np.isfinite(mlv) and np.isfinite(pyv)
            both_nan = np.isnan(mlv) and np.isnan(pyv)
            both_posinf = (mlv == np.inf) and (pyv == np.inf)
            both_neginf = (mlv == -np.inf) and (pyv == -np.inf)

            if both_finite:
                if mlv == 0:
                    rel_err = np.nan
                else:
                    rel_err = abs(mlv - pyv) / abs(mlv) * 100
                print(f"[{slot} | ts={ts}]  RelErr% = {rel_err:.2f}")
            elif both_nan or both_posinf or both_neginf:
                print(f"[{slot} | ts={ts}]  RelErr% = MATCH (both non-finite)")
            else:
                print(f"[{slot} | ts={ts}]  RelErr% = NaN (mismatch in finiteness)")

        ml_arr = np.array(ml_vals, dtype=float)
        py_arr = np.array(py_vals, dtype=float)
        finite_mask = np.isfinite(ml_arr) & np.isfinite(py_arr)

        if finite_mask.sum() > 1 and ml_arr[finite_mask].std() and py_arr[finite_mask].std():
            r, p = pearsonr(ml_arr[finite_mask], py_arr[finite_mask])
        else:
            r, p = np.nan, np.nan

        results[slot] = {
            'r': r,
            'pval': p,
            'res_py': py_arr,
            'res_matlab': ml_arr
        }

    return results

In [33]:
eng.PH_ForcePotential(ZScore(matlab.double(empirical1000[1])), 'dblwell', matlab.double([1, 0.2, 0.1]))

{'mean': -0.01009179898477893,
 'median': -0.06432088941300483,
 'std': 0.908519622144995,
 'range': 3.7190085734778986,
 'proppos': 0.4892,
 'pcross': 0.007600760076007601,
 'ac1': 0.9982594116955565,
 'ac10': 0.8576114483523344,
 'ac50': 0.37072251198867795,
 'tau': 236.28477089690773,
 'finaldev': 0.9029036728434826,
 'pcrossup': 0.020202020202020204,
 'pcrossdown': 0.021002100210021003}

In [34]:
out = eval_comparison("/Users/jmoo2880/Documents/py-hctsa-project/pyhctsa/Configurations/physics.yaml", empirical1000)

Loading configuration file: physics.yaml

*** Importing module Physics *** 

Adding operation Physics_ForcePotential_dblwell_[1, 0.2, 0.1] with params {'whatPotential': 'dblwell', 'params': [1, 0.2, 0.1]} (Z-score=True)
Adding operation Physics_ForcePotential_dblwell_[1, 0.5, 0.2] with params {'whatPotential': 'dblwell', 'params': [1, 0.5, 0.2]} (Z-score=True)
Adding operation Physics_ForcePotential_dblwell_[2, 0.05, 0.2] with params {'whatPotential': 'dblwell', 'params': [2, 0.05, 0.2]} (Z-score=True)
Adding operation Physics_ForcePotential_dblwell_[3, 0.01, 0.1] with params {'whatPotential': 'dblwell', 'params': [3, 0.01, 0.1]} (Z-score=True)
Adding operation Physics_ForcePotential_sine_[3, 0.5, 1] with params {'whatPotential': 'sine', 'params': [3, 0.5, 1]} (Z-score=True)
Adding operation Physics_ForcePotential_sine_[1, 1, 1] with params {'whatPotential': 'sine', 'params': [1, 1, 1]} (Z-score=True)
Adding operation Physics_ForcePotential_sine_[10, 0.04, 10] with params {'whatPotenti

In [24]:
len(empirical1000[22])

10000

In [46]:
res = compare_outputs(out)

[Physics_ForcePotential_dblwell_[1, 0.2, 0.1].mean | ts=0]  RelErr% = 7.37
[Physics_ForcePotential_dblwell_[1, 0.2, 0.1].mean | ts=1]  RelErr% = 85.63
[Physics_ForcePotential_dblwell_[1, 0.2, 0.1].mean | ts=2]  RelErr% = 102.08
[Physics_ForcePotential_dblwell_[1, 0.2, 0.1].mean | ts=3]  RelErr% = 0.00
[Physics_ForcePotential_dblwell_[1, 0.2, 0.1].mean | ts=4]  RelErr% = 0.00
[Physics_ForcePotential_dblwell_[1, 0.2, 0.1].mean | ts=5]  RelErr% = 0.00
[Physics_ForcePotential_dblwell_[1, 0.2, 0.1].mean | ts=6]  RelErr% = 0.97
[Physics_ForcePotential_dblwell_[1, 0.2, 0.1].mean | ts=7]  RelErr% = 69.29
[Physics_ForcePotential_dblwell_[1, 0.2, 0.1].mean | ts=8]  RelErr% = 0.00
[Physics_ForcePotential_dblwell_[1, 0.2, 0.1].mean | ts=9]  RelErr% = 0.00
[Physics_ForcePotential_dblwell_[1, 0.2, 0.1].mean | ts=10]  RelErr% = 52.77
[Physics_ForcePotential_dblwell_[1, 0.2, 0.1].mean | ts=11]  RelErr% = 0.00
[Physics_ForcePotential_dblwell_[1, 0.2, 0.1].mean | ts=12]  RelErr% = 9.78
[Physics_ForcePot

In [52]:
res['Physics_ForcePotential_sine_[10, 0.04, 10].mean']

{'r': np.float64(0.999996915126437),
 'pval': np.float64(0.0),
 'res_py': array([-7.68374496e+03,  4.58648917e+03,  5.78336706e+03, -2.80910136e+02,
        -1.61401160e+03,  3.11193714e+03, -1.09379510e+03, -5.58647417e+03,
         6.67458252e+03,  1.62785981e+03, -3.78007564e+01, -5.60795989e+03,
        -3.15928905e+03, -8.31084667e+03, -4.20684627e+03,  7.32994156e+01,
        -1.82134131e+03,  8.66803009e+00, -5.55048150e+03, -1.00556623e+03,
        -4.14327356e+04, -4.09566002e+04, -7.35869389e+05, -4.41441330e+04,
         3.58182907e+05, -2.62866413e+05, -2.81297568e+05, -7.24045763e+04,
         3.23432747e+05, -5.78422791e+04, -6.89130192e+04, -1.95719545e+04,
         6.52239846e+04,  7.22585798e+04,  4.86476099e+04, -3.58338604e+04,
         1.55087529e+05,  2.85484699e+05, -1.11026847e+05,  3.51552007e+05,
        -2.41829609e+05,  1.22444882e+04,  2.01348428e+05,  1.00519408e+03,
        -2.08218503e+02,  2.03777649e+03, -7.27327097e+03,  1.91654886e+05,
         3.1182