In [1]:
import numpy as np
import pandas as pd
import yaml
import time
import importlib
from tqdm import tqdm
from loguru import logger
from functools import partial, wraps
from itertools import product
import os
from BF_zscore import BF_zscore
from FC_Surprise import FC_Surprise
from PN_sampenc import PN_sampenc

In [2]:
base_dir = "../"

In [3]:
ts1 = np.loadtxt(base_dir + "ts1.txt")
ts2 = np.loadtxt(base_dir + "ts2.txt")
ts3 = np.loadtxt(base_dir + "ts3.txt")
ts4 = np.loadtxt(base_dir + "ts4.txt")

In [4]:
FC_Surprise(BF_zscore(ts1))

{'min': np.float64(1.0078579253996456),
 'max': np.float64(1.171182981502945),
 'mean': np.float64(1.0968550876232457),
 'sum': np.float64(548.4275438116229),
 'median': np.float64(1.1086626245216111),
 'lq': np.float64(1.0642108619507773),
 'uq': np.float64(1.1239300966523995),
 'std': np.float64(0.04092262237859694),
 'tstat': np.float64(52.922942691362614)}

In [5]:
fpath = "../Configurations/basic.yaml"

In [6]:
def zscore_decorator(func):
    @wraps(func)
    def wrapper(y, *args, **kwargs):
        y = BF_zscore(y)
        return func(y, *args, **kwargs)
    return wrapper

In [7]:
def range_constructor(loader, node):
    start, end = loader.construct_sequence(node)
    return list(range(start, end+1))
yaml.add_constructor("!range", range_constructor)

In [8]:
def load_yaml2(file):
    print(f"Loading configuration file: {file.split('/')[-1]}")
    funcs = {}
    with open(file) as f:
        yf = yaml.load(f, Loader=yaml.FullLoader)

    for module_name in yf:
        print(f"\n*** Importing module {module_name} *** \n")
        module = importlib.import_module(module_name)
        for function_name in yf[module_name]:
            # Get the function's configuration dictionary
            function_config = yf[module_name][function_name]
            # If no configs section exists or if it's empty, use a list with single empty dict
            if ('configs' not in function_config or function_config.get('configs') is None or 
                function_config.get('configs') == []):
                configs = [{}]
            else:
                configs = function_config.get('configs', [{}])

            for params in configs:
                # Handle the case where params is None
                if params is None:
                    params = {}
                    
                zscore_first = params.pop("zscore", False)
                param_keys, param_vals = zip(*params.items()) if params else ([], [])
                
                param_combinations = [dict(zip(param_keys, values)) 
                                   for values in product(*[v if isinstance(v, list) 
                                                        else [v] for v in param_vals])]
                
                # If no parameter combinations were generated, add empty dict
                if not param_combinations:
                    param_combinations = [{}]
                
                # create a function for each parameter combination
                for param_set in param_combinations:
                    feature_name = (f"{module_name}_{function_name}_" + 
                                  "_".join(f"{v}" for k, v in param_set.items())
                                  if param_set else f"{module_name}_{function_name}")
                    if not zscore_first:
                        feature_name += "_raw"
                    
                    print(f"Adding operation {feature_name} with params {param_set} "
                          f"(Z-score={zscore_first})")
                    
                    base_func = partial(getattr(module, function_name), **param_set)
                    if zscore_first:
                        base_func = zscore_decorator(base_func)
                        
                    funcs[feature_name] = base_func
                    
    return funcs
            

In [9]:
# def load_yaml(file):
#     print(f"Loading configuration file: {file.split('/')[-1]}")
#     # print stats for configuration file
#     funcs = {}
#     with open(file) as f:
#         yf = yaml.load(f, Loader=yaml.FullLoader)
#         # instantiate the featuresß
#         for module_name in yf:
#             print(f"*** Importing module {module_name}")
#             module = importlib.import_module(module_name, __package__)
#             for function_name in yf[module_name]:
#                 configs = yf[module_name][function_name].get('configs', [{}])
#                 for params in configs:
#                     zscore_first = params.pop("zscore", False)

#                     param_keys, param_vals = zip(*params.items() if params else ([], []))
#                     param_combinations = [dict(zip(param_keys, values)) for values in product(*[
#                         v if isinstance(v, list) else [v] for v in param_vals])]
                    
#                     # create a function for each parameter combination
#                     for param_set in param_combinations:
#                         feature_name = f"{module_name}_{function_name}_" + "_".join(f"{v}" for k, v in param_set.items())
#                         print(f"Adding operation {feature_name} with params {param_set} (Z-score={zscore_first})")

#                         base_func = partial(getattr(module, function_name), **param_set)
#                         if zscore_first:
#                             base_func = zscore_decorator(base_func)
#                             #feature_func = partial(base_func, zscore=zscore_first, **param_set)
#                         funcs[feature_name] = base_func
#                         #funcs.append(feature_func)
#     return funcs

In [10]:
feats = load_yaml2(fpath)

Loading configuration file: basic.yaml

*** Importing module CO *** 

Adding operation CO_AutoCorr_1_Fourier with params {'tau': 1, 'method': 'Fourier'} (Z-score=True)
Adding operation CO_AutoCorr_2_Fourier with params {'tau': 2, 'method': 'Fourier'} (Z-score=True)
Adding operation CO_AutoCorr_3_Fourier with params {'tau': 3, 'method': 'Fourier'} (Z-score=True)
Adding operation CO_AutoCorr_4_Fourier with params {'tau': 4, 'method': 'Fourier'} (Z-score=True)
Adding operation CO_AutoCorr_5_Fourier with params {'tau': 5, 'method': 'Fourier'} (Z-score=True)
Adding operation CO_AutoCorr_6_Fourier with params {'tau': 6, 'method': 'Fourier'} (Z-score=True)
Adding operation CO_AutoCorr_7_Fourier with params {'tau': 7, 'method': 'Fourier'} (Z-score=True)
Adding operation CO_AutoCorr_8_Fourier with params {'tau': 8, 'method': 'Fourier'} (Z-score=True)
Adding operation CO_AutoCorr_9_Fourier with params {'tau': 9, 'method': 'Fourier'} (Z-score=True)
Adding operation CO_AutoCorr_10_Fourier with par

In [35]:
%%time
computed = {}
feature_count = 0
tstart = time.perf_counter()
for (feature, func) in zip(feats.keys(), feats.values()):
    computed[feature] = func(ts1)
    if (isinstance(computed[feature], float) or isinstance(computed[feature], int)):
        feature_count += 1
    else:
        feature_count += len(computed[feature]) 
telapsed = time.perf_counter() - tstart
print(f"Computed {len(computed)} operations ({feature_count} features)")
print(f"Time taken: {telapsed} seconds")

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
[32m2025-02-22 06:26:40.975[0m | [1mINFO    [0m | [36mDN[0m:[36mQuantile[0m:[36m219[0m - [1mUsing quantile p = 0.5 (median) by default[0m


This time series (N = 1000) is too short for StatAv(len,'500')
This time series (N = 1000) is too short for StatAv(len,'1000')
Computed 612 operations (2929 features)
Time taken: 2.1449284580303356 seconds
CPU times: user 2.26 s, sys: 256 ms, total: 2.52 s
Wall time: 2.14 s


  out = np.std(qs, ddof=1)/np.std(y, ddof=1)


unpack all of the individual features

In [38]:
individual_features = {}
for base_op in computed:
    if isinstance(computed[base_op], dict):
        for (k, v) in zip(computed[base_op].keys(), computed[base_op].values()):
            feature_name = base_op + "_" + k
            individual_features[feature_name] = float(v)
    else:
        individual_features[base_op] = computed[base_op]

  individual_features[feature_name] = float(v)


In [48]:
len(individual_features)

2923

In [44]:
individual_features['MISC_ForcePotential_dblwell_[1, 0.2, 0.1]_mean']

-0.10975444802278665

array, float, dict

In [47]:
tsdf = pd.DataFrame([individual_features])
tsdf.head(1)

Unnamed: 0,CO_AutoCorr_1_Fourier,CO_AutoCorr_2_Fourier,CO_AutoCorr_3_Fourier,CO_AutoCorr_4_Fourier,CO_AutoCorr_5_Fourier,CO_AutoCorr_6_Fourier,CO_AutoCorr_7_Fourier,CO_AutoCorr_8_Fourier,CO_AutoCorr_9_Fourier,CO_AutoCorr_10_Fourier,...,MISC_WLCoeffs_db3_max_mean_coeff,MISC_WLCoeffs_db3_max_max_coeff,MISC_WLCoeffs_db3_max_med_coeff,MISC_WLCoeffs_db3_max_wb99m,MISC_WLCoeffs_db3_max_wb90m,MISC_WLCoeffs_db3_max_wb75m,MISC_WLCoeffs_db3_max_wb50m,MISC_WLCoeffs_db3_max_wb25m,MISC_WLCoeffs_db3_max_wb10m,MISC_WLCoeffs_db3_max_wb1m
0,[0.979402500982948],[0.9198380320708269],[0.8237568162288854],[0.6950587301125483],[0.5389350666038799],[0.3616591580073549],[0.17033432795165965],[-0.027390759104679675],[-0.2236278292903217],[-0.41056371716338463],...,0.084747,0.830869,0.023938,0.002,0.01,0.023,0.046,0.139,0.261,0.626


In [102]:
np.count_nonzero(np.isnan(tsdf.iloc[0].values)) # number of NaN values 

0

In [11]:
class Calculator:
    """Compute all univariate time series features.
    
    The calculator takes in a univariate time-series dataset of N instances and returns a 
    feature matrix of size N x F where F is the number of features.

    """

    def __init__(self, dataset=None, name=None, configfile=None):
        
        # define a configfile by sb
        self._features = {} 

        def compute(self):
            pass 

        def load_yaml(file : str) -> dict:
            # function to construct the partials from the YAML file. 

            print(f"Loading configuration file: {file.split('/')[-1]}")
            funcs = {} # dictionary of partial functions to be re-used. 
            with open(file) as f:
                yf = yaml.load(f, Loader=yaml.FullLoader)

            for module_name in yf:
                print(f"\n*** Importing module {module_name} *** \n")
                module = importlib.import_module(module_name)
                for function_name in yf[module_name]:
                    # Get the function's configuration dictionary
                    function_config = yf[module_name][function_name]
                    # If no configs section exists or if it's empty, use a list with single empty dict
                    if ('configs' not in function_config or function_config.get('configs') is None or 
                        function_config.get('configs') == []):
                        configs = [{}]
                    else:
                        configs = function_config.get('configs', [{}])

                    for params in configs:
                        # Handle the case where params is None
                        if params is None:
                            params = {}
                            
                        zscore_first = params.pop("zscore", False)
                        param_keys, param_vals = zip(*params.items()) if params else ([], [])
                        
                        param_combinations = [dict(zip(param_keys, values)) 
                                        for values in product(*[v if isinstance(v, list) 
                                                                else [v] for v in param_vals])]
                        # If no parameter combinations were generated, add empty dict
                        if not param_combinations:
                            param_combinations = [{}]
                        
                        # create a function for each parameter combination
                        for param_set in param_combinations:
                            feature_name = (f"{module_name}_{function_name}_" + 
                                        "_".join(f"{v}" for k, v in param_set.items())
                                        if param_set else f"{module_name}_{function_name}")
                            if not zscore_first:
                                feature_name += "_raw"
                            
                            print(f"Adding operation {feature_name} with params {param_set} "
                                f"(Z-score={zscore_first})")
                            
                            base_func = partial(getattr(module, function_name), **param_set)
                            if zscore_first:
                                base_func = zscore_decorator(base_func)
                                
                            funcs[feature_name] = base_func
                            
            return funcs
        
