In [1]:
# !pip install seglearn

[documentation](https://dmbee.github.io/seglearn/)

In [2]:
%load_ext autoreload
%load_ext memory_profiler
%autoreload 2

In [5]:
import numpy as np
import pandas as pd
import scipy.stats as ss

---

In [6]:
from utils import get_data

df_emg = get_data()

memory usage:  96.13 MB


In [33]:
df_emg.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3600000 entries, 2021-06-29 07:22:20.045943 to 2021-06-29 08:22:20.044943
Freq: L
Data columns (total 5 columns):
 #   Column  Dtype  
---  ------  -----  
 0   emg     float32
 1   eog     float32
 2   lso     float32
 3   rio     float32
 4   m1-a1   float32
dtypes: float32(5)
memory usage: 256.1 MB


## Toying with segments

In [7]:
n_segments = 10
segment_width = 1000
n_variables = 3

a = np.ones((n_segments, segment_width, n_variables))
b = np.zeros((n_segments, segment_width, n_variables))

In [7]:
out = np.concatenate((np.mean(a, axis=1), np.mean(b, axis=1)), axis=1)

## Seglearn code base

In [28]:
from seglearn.transform import FeatureRep, FeatureRepMix, Segment


def slope(x):
    return np.apply_along_axis(
        lambda x_: np.polyfit(np.arange(0, len(x_)), x_, 1)[0], arr=x, axis=1
    )


union = FeatureRepMix(
    [
        (sig + "_" + k, FeatureRep(features={"": v}), i)
        for k, v in {
            "min": lambda x: np.min(x, axis=1).astype(x.dtype),
            "max": lambda x: np.max(x, axis=1).astype(x.dtype),
            "mean": lambda x: np.mean(x, axis=1).astype(x.dtype),
            "std": lambda x: np.std(x, axis=1).astype(x.dtype),
            "var": lambda x: np.var(x, axis=1).astype(x.dtype),
            "skew": lambda x: ss.skew(x, axis=1).astype(x.dtype),
            "kurt": lambda x: ss.kurtosis(x, axis=1).astype(x.dtype),
            "rms": lambda x: np.sqrt(np.mean(np.square(x), axis=1)).astype(x.dtype),
            # example of one to many
            # example of many to one
            # "mean_min_global_mean": lambda x: np.mean(x, axis=1) - np.mean(x),
            #             **{f"quantile_{q}": lambda x: np.quantile(x, q=q) for q in quantiles},
            "slope": slope,  # ["emg", "eog", "lso", "rio", "m1-a1"]),
            "area": lambda x: np.sum(x, axis=1),
        }.items()
        for i, sig in enumerate(["emg", "eog", "lso", "rio", "m1-a1"])
    ]
    + [
        # multivariate doesn't seem to work :/
#         (
#             "emg_mean_std_mean",
#             FeatureRep(
#                 features={
#                     "ab": lambda x: np.concatenate(
#                         (np.mean(x, axis=1), np.std(x, axis=1)), axis=1
#                     )
#                 }
#             ),
#         )
    ]
)

In [29]:
def extract_feats_seglearn(data: pd.DataFrame):
    fs = 1000
    segment = Segment(width=int(30*fs), step=int(10*fs))
    X, y, _ = segment.fit_transform(X=[data.values], y=[[True]*len(data)])
    X = union.fit_transform(X, y)
    df_feat = pd.DataFrame(data=X, columns=union.f_labels)
    return df_feat

In [24]:
# df_feat = extract_feats_seglearn(df_emg)

In [30]:
fs = 1000
segment = Segment(width=int(30*fs), step=int(10*fs))
X, y, z = segment.fit_transform(X=[df_emg.values], y=[[True]*len(df_emg)])
X = union.fit_transform(X, y)
df_feat = pd.DataFrame(data=X, columns=union.f_labels)

In [26]:
z is None

True

In [31]:
df_feat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 358 entries, 0 to 357
Data columns (total 50 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   emg_min__0      358 non-null    float64
 1   eog_min__1      358 non-null    float64
 2   lso_min__2      358 non-null    float64
 3   rio_min__3      358 non-null    float64
 4   m1-a1_min__4    358 non-null    float64
 5   emg_max__0      358 non-null    float64
 6   eog_max__1      358 non-null    float64
 7   lso_max__2      358 non-null    float64
 8   rio_max__3      358 non-null    float64
 9   m1-a1_max__4    358 non-null    float64
 10  emg_mean__0     358 non-null    float64
 11  eog_mean__1     358 non-null    float64
 12  lso_mean__2     358 non-null    float64
 13  rio_mean__3     358 non-null    float64
 14  m1-a1_mean__4   358 non-null    float64
 15  emg_std__0      358 non-null    float64
 16  eog_std__1      358 non-null    float64
 17  lso_std__2      358 non-null    flo

In [14]:
df_feat

Unnamed: 0,emg_min__0,eog_min__1,lso_min__2,rio_min__3,m1-a1_min__4,emg_max__0,eog_max__1,lso_max__2,rio_max__3,m1-a1_max__4,...,emg_slope__0,eog_slope__1,lso_slope__2,rio_slope__3,m1-a1_slope__4,emg_area__0,eog_area__1,lso_area__2,rio_area__3,m1-a1_area__4
0,-0.062500,0.046875,0.93750,-0.062500,0.046875,0.062500,0.125000,1.015625,0.062500,0.125000,...,1.453172e-07,1.441164e-07,1.599141e-08,1.453172e-07,1.441164e-07,-64.218750,2467.265625,29518.828125,-64.218750,2467.265625
1,-0.046875,0.000000,0.93750,-0.046875,0.000000,0.062500,0.125000,1.015625,0.062500,0.125000,...,-7.435104e-08,-3.123776e-08,2.122526e-08,-7.435104e-08,-3.123776e-08,-41.250000,2494.453125,29519.765625,-41.250000,2494.453125
2,-1.078125,-1.375000,-0.78125,-1.078125,-1.375000,1.531250,0.718750,1.078125,1.531250,0.718750,...,6.763373e-06,-1.897054e-05,-1.646258e-05,6.763373e-06,-1.897054e-05,1079.843750,-914.218750,26674.296875,1079.843750,-914.218750
3,-1.312500,-1.406250,-0.78125,-1.312500,-1.406250,1.812500,1.156250,1.984375,1.812500,1.156250,...,1.850948e-05,-4.114080e-05,-3.655443e-05,1.850948e-05,-4.114080e-05,4751.953125,-8703.281250,19680.468750,4751.953125,-8703.281250
4,-1.312500,-1.406250,-0.78125,-1.312500,-1.406250,1.812500,1.156250,1.984375,1.812500,1.156250,...,1.362858e-05,-3.453340e-05,-2.932054e-05,1.362858e-05,-3.453340e-05,8481.875000,-18403.203125,11483.906250,8481.875000,-18403.203125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
353,-0.328125,-1.609375,-0.46875,-0.328125,-1.609375,1.484375,0.656250,1.859375,1.484375,0.656250,...,1.012667e-05,-1.877398e-05,-1.633145e-05,1.012667e-05,-1.877398e-05,15187.265625,-2490.390625,21358.828125,15187.265625,-2490.390625
354,-1.062500,-2.000000,-2.00000,-1.062500,-2.000000,1.984375,1.437500,1.984375,1.984375,1.437500,...,3.009870e-06,1.689519e-05,-2.764985e-05,3.009870e-06,1.689519e-05,18968.203125,-1124.765625,14679.140625,18968.203125,-1124.765625
355,-1.062500,-2.000000,-2.00000,-1.062500,-2.000000,1.984375,1.437500,1.984375,1.984375,1.437500,...,-1.033203e-05,-3.294487e-05,-2.134044e-05,-1.033203e-05,-3.294487e-05,16398.203125,-8276.015625,8490.000000,16398.203125,-8276.015625
356,-1.890625,-2.000000,-2.00000,-1.890625,-2.000000,1.984375,1.984375,1.984375,1.984375,1.984375,...,2.920637e-06,-5.388524e-06,-2.528988e-06,2.920637e-06,-5.388524e-06,18313.828125,-3479.140625,4565.703125,18313.828125,-3479.140625
