In [1]:
import pandas as pd

In [2]:
all_df = pd.read_csv("/home/think/Desktop/all_df.csv")

In [3]:
all_df.drop(all_df.columns[[0]], axis=1, inplace=True)

In [4]:
all_df

Unnamed: 0,id,x,y,z
0,1,0.100000,0.100000,0.100000
1,1,0.095657,0.102960,0.107116
2,1,0.091243,0.105749,0.114304
3,1,0.086763,0.108366,0.121566
4,1,0.082225,0.110810,0.128902
...,...,...,...,...
4999995,500,0.697298,-0.201389,0.617136
4999996,500,0.696305,-0.202385,0.618926
4999997,500,0.695297,-0.203362,0.620717
4999998,500,0.694274,-0.204320,0.622507


In [23]:
import nolds

def extract_chaos_features(series):
    """Extract chaos features from a 1D time series."""
    return {
        'lyap': nolds.lyap_r(series),
        'corr_dim': nolds.corr_dim(series, emb_dim=10),
        'ap_entropy': nolds.sampen(series),
    }

def compute_features_by_id(df_all):
    """
    Agrupa por 'id' y calcula las features para cada grupo.

    Parameters
    ----------
    df_all : pd.DataFrame
        DataFrame con columnas ['id', 'x', 'y', 'z']

    Returns
    -------
    pd.DataFrame
        DataFrame con columnas ['id', 'lyap', 'corr_dim', 'ap_entropy']
    """

    feature_rows = []

    for id_value, group in df_all.groupby("id"):
        try:
            x_series = group["y"].values
            features = extract_chaos_features(x_series)
            features["id"] = id_value
            feature_rows.append(features)
        except Exception as e:
            print(f"Error calculating features for id {id_value}: {e}")

    return pd.DataFrame(feature_rows)

In [24]:
subset_df = all_df.iloc[1:10000]

In [25]:
subset_df_new = subset_df[["id", "y"]]

In [26]:
subset_df_new

Unnamed: 0,id,y
1,1,0.102960
2,1,0.105749
3,1,0.108366
4,1,0.110810
5,1,0.113081
...,...,...
9995,1,-0.804275
9996,1,-0.843197
9997,1,-0.880443
9998,1,-0.915961


In [27]:
features_df = compute_features_by_id(subset_df_new)
features_df = features_df.sort_values("id").reset_index(drop=True)

print(features_df.head())

       lyap  corr_dim  ap_entropy  id
0  0.000575  1.359338    0.068571   1


In [28]:
features_df

Unnamed: 0,lyap,corr_dim,ap_entropy,id
0,0.000575,1.359338,0.068571,1


In [9]:
import pandas as pd
from tsfresh import extract_features

def process_tsfresh_features(
    all_df,
    select_ids=None,
    variables=None,
    default_fc_parameters=None
):
    """
    Extracts tsfresh features per id and per variable separately.

    Parameters
    ----------
    all_df : pd.DataFrame
        Must have columns ['id', 'x', 'y', 'z'], where multiple rows per id.
    select_ids : list[int], optional
        Which ids to process.
    variables : list[str], optional
        Which of ['x', 'y', 'z'] to process. None = all present.
    default_fc_parameters : dict, optional
        Feature calc params passed to tsfresh.extract_features()

    Returns
    -------
    pd.DataFrame
        One row per id, columns like x__feature1, y__feature1, etc.
    """
    if select_ids is None:
        select_ids = all_df['id'].unique().tolist()
    if variables is None:
        variables = [c for c in ['x', 'y', 'z'] if c in all_df.columns]

    records = []

    for sid in select_ids:
        ts = all_df[all_df['id'] == sid]
        record = {'id': sid}
        if ts.empty:
            print(f"⚠️ id {sid} not found in all_df — skipping")
            continue

        for var in variables:
            if var not in ts.columns:
                print(f"⚠️ column '{var}' not found — skipping {sid}")
                continue

            # Shape: id, time index, value
            temp_df = pd.DataFrame({
                'id': sid,
                'time': range(len(ts)),
                'value': ts[var].values
            })
            # extract_features expects column_id, column_sort, column_value
            features_df = extract_features(
                temp_df,
                column_id='id',
                column_sort='time',
                column_value='value',
                default_fc_parameters=default_fc_parameters
            )

            # Prefix the feature columns with the variable name
            features_df.columns = [f"{var}__{c}" for c in features_df.columns]

            # Save features as a dict
            record.update(features_df.iloc[0].to_dict())

        records.append(record)

    return pd.DataFrame(records)


In [10]:
tsdf = process_tsfresh_features(subset_df)

Feature Extraction: 100%|██████████| 1/1 [01:12<00:00, 72.05s/it]
Feature Extraction: 100%|██████████| 1/1 [01:10<00:00, 70.66s/it]
Feature Extraction: 100%|██████████| 1/1 [01:10<00:00, 70.41s/it]


In [11]:
tsdf

Unnamed: 0,id,x__value__variance_larger_than_standard_deviation,x__value__has_duplicate_max,x__value__has_duplicate_min,x__value__has_duplicate,x__value__sum_values,x__value__abs_energy,x__value__mean_abs_change,x__value__mean_change,x__value__mean_second_derivative_central,...,z__value__fourier_entropy__bins_5,z__value__fourier_entropy__bins_10,z__value__fourier_entropy__bins_100,z__value__permutation_entropy__dimension_3__tau_1,z__value__permutation_entropy__dimension_4__tau_1,z__value__permutation_entropy__dimension_5__tau_1,z__value__permutation_entropy__dimension_6__tau_1,z__value__permutation_entropy__dimension_7__tau_1,z__value__query_similarity_count__query_None__threshold_0.0,z__value__mean_n_absolute_max__number_of_maxima_7
0,1,0.0,0.0,0.0,0.0,-143.879924,2644.996813,0.012985,-0.000113,2e-06,...,0.045395,0.090729,0.136002,0.707022,0.72883,0.750632,0.77243,0.794223,,1.881531


In [12]:
import pandas as pd
from tsfresh import extract_features

def process_tsfresh_features_long(
    all_df,
    select_ids=None,
    variables=None,
    default_fc_parameters=None
):
    """
    Extracts tsfresh features per id and per variable separately,
    returning one row per (id, axis).
    """
    if select_ids is None:
        select_ids = all_df['id'].unique().tolist()
    if variables is None:
        variables = [c for c in ['x', 'y', 'z'] if c in all_df.columns]

    records = []
    for sid in select_ids:
        ts = all_df[all_df['id'] == sid]
        if ts.empty:
            continue

        for var in variables:
            temp_df = pd.DataFrame(
                {'id': sid, 'time': range(len(ts)), 'value': ts[var].values}
            )
            features_df = extract_features(
                temp_df,
                column_id='id',
                column_sort='time',
                column_value='value',
                default_fc_parameters=default_fc_parameters
            )
            features_df['id'] = sid
            features_df['axis'] = var
            records.append(features_df)

    # Concatenate all results vertically
    result_df = pd.concat(records, ignore_index=True)
    return result_df


In [17]:
tsdf = process_tsfresh_features_long(subset_df,
                                     variables=['x', 'y'])

Feature Extraction: 100%|██████████| 1/1 [01:10<00:00, 70.38s/it]
Feature Extraction: 100%|██████████| 1/1 [01:09<00:00, 69.74s/it]
Feature Extraction: 100%|██████████| 1/1 [01:10<00:00, 70.67s/it]
Feature Extraction: 100%|██████████| 1/1 [01:10<00:00, 70.47s/it]
Feature Extraction: 100%|██████████| 1/1 [01:10<00:00, 70.50s/it]
Feature Extraction: 100%|██████████| 1/1 [01:10<00:00, 70.07s/it]


In [18]:
tsdf

Unnamed: 0,value__variance_larger_than_standard_deviation,value__has_duplicate_max,value__has_duplicate_min,value__has_duplicate,value__sum_values,value__abs_energy,value__mean_abs_change,value__mean_change,value__mean_second_derivative_central,value__median,...,value__fourier_entropy__bins_100,value__permutation_entropy__dimension_3__tau_1,value__permutation_entropy__dimension_4__tau_1,value__permutation_entropy__dimension_5__tau_1,value__permutation_entropy__dimension_6__tau_1,value__permutation_entropy__dimension_7__tau_1,value__query_similarity_count__query_None__threshold_0.0,value__mean_n_absolute_max__number_of_maxima_7,id,axis
0,0.0,0.0,0.0,0.0,-143.879924,2644.996813,0.012985,-0.000113,2.224768e-06,-0.001344,...,0.181214,0.764904,0.836534,0.90923,0.981796,1.05423,,1.519426,1,x
1,0.0,0.0,0.0,0.0,134.753485,2612.465651,0.013024,-0.000105,-1.827286e-06,0.001307,...,0.181214,0.764915,0.836566,0.908088,0.980347,1.053223,,1.530853,1,y
2,0.0,0.0,0.0,0.0,-143.523062,2667.86955,0.013117,-4.8e-05,2.554791e-06,-0.001785,...,0.181214,0.764886,0.8365,0.907986,0.980212,1.052306,,1.513167,2,x
3,0.0,0.0,0.0,0.0,128.093597,2650.657201,0.013117,-0.000128,-4.126253e-07,0.001151,...,0.181214,0.764843,0.83643,0.908339,0.981302,1.054133,,1.547837,2,y
4,0.0,0.0,0.0,0.0,-112.347772,2694.698148,0.013282,1.1e-05,2.057021e-06,-0.001988,...,0.181214,0.764925,0.837026,0.908998,0.98084,1.054291,,1.498174,3,x
5,0.0,0.0,0.0,0.0,127.732843,2704.57066,0.013243,-0.000114,7.166387e-07,0.00056,...,0.181214,0.765412,0.83757,0.91047,0.984422,1.059106,,1.55876,3,y


In [None]:
from tsfresh import extract_features
weee = extract_features(subset_df_new, column_id='id')

Feature Extraction: 100%|██████████| 1/1 [01:08<00:00, 68.12s/it]


In [29]:
weee

Unnamed: 0,x__variance_larger_than_standard_deviation,x__has_duplicate_max,x__has_duplicate_min,x__has_duplicate,x__sum_values,x__abs_energy,x__mean_abs_change,x__mean_change,x__mean_second_derivative_central,x__median,...,x__fourier_entropy__bins_5,x__fourier_entropy__bins_10,x__fourier_entropy__bins_100,x__permutation_entropy__dimension_3__tau_1,x__permutation_entropy__dimension_4__tau_1,x__permutation_entropy__dimension_5__tau_1,x__permutation_entropy__dimension_6__tau_1,x__permutation_entropy__dimension_7__tau_1,x__query_similarity_count__query_None__threshold_0.0,x__mean_n_absolute_max__number_of_maxima_7
1,0.0,0.0,0.0,0.0,-143.779924,2645.006813,0.012984,-0.000114,2e-06,-0.001322,...,0.079983,0.136002,0.181214,0.764898,0.836522,0.909212,0.981772,1.0542,,1.519426
