In [6]:
import numpy as np
import pandas as pd
import os
import glob
from sklearn.preprocessing import LabelEncoder

In [2]:
def load_samples_and_labels(
    csv_path: str,
    window_size: int = 64,
    feature_cols: list[str] | None = None,
    label_col: str = "label"
) -> tuple[np.ndarray, np.ndarray]:

    df = pd.read_csv(csv_path)

    # pick feature columns
    if feature_cols is None:
        feature_cols = [c for c in df.columns if c != label_col]

    # pull out raw arrays
    data   = df[feature_cols].to_numpy(dtype=np.float32)
    labels = df[label_col].to_numpy()            # e.g. int32 or string

    # how many full windows fit?
    n_windows = data.shape[0] // window_size
    data   = data[:n_windows * window_size]
    labels = labels[:n_windows * window_size]

    # reshape
    X = data.reshape(n_windows, window_size, len(feature_cols))
    L = labels.reshape(n_windows, window_size)

    # now pick one label per window (e.g. the first, or majority)
    # here we assume they're all the same, so we take the first:
    y = L[:, 0]

    return X, y

In [8]:
feature_cols = ["ax'", "ay'", "az'", "gx'", "gy'", "gz'"]
label_col = "stroke_side_spin"
folder_path = "./stroke_peaks"

In [9]:
all_data = []

all_files = glob.glob(os.path.join(folder_path, "*.csv"))

for filename in all_files:

    X, y = load_samples_and_labels(filename, window_size=64, feature_cols=feature_cols, label_col=label_col)
    print(filename, X.shape, y.shape)  # -> (n_samples, 64, n_features), (n_samples,)

    all_data.append((X, y))

./stroke_peaks/serve_forehand_flat.csv (53, 64, 6) (53,)
./stroke_peaks/overhead_forehand_flat.csv (46, 64, 6) (46,)
./stroke_peaks/groundstroke_forehand_flat.csv (37, 64, 6) (37,)
./stroke_peaks/groundstroke_forehand_slice.csv (77, 64, 6) (77,)
./stroke_peaks/volley_backhand_slice.csv (89, 64, 6) (89,)
./stroke_peaks/groundstroke_backhand_slice.csv (55, 64, 6) (55,)
./stroke_peaks/serve_forehand_slice.csv (38, 64, 6) (38,)
./stroke_peaks/groundstroke_backhand_flat.csv (44, 64, 6) (44,)
./stroke_peaks/overhead_forehand_slice.csv (36, 64, 6) (36,)
./stroke_peaks/volley_forehand_slice.csv (70, 64, 6) (70,)
./stroke_peaks/serve_forehand_topspin.csv (58, 64, 6) (58,)
./stroke_peaks/groundstroke_backhand_topspin.csv (36, 64, 6) (36,)
./stroke_peaks/groundstroke_forehand_topspin.csv (19, 64, 6) (19,)


In [11]:
X = np.concatenate([X for X, y in all_data], axis=0)
y = np.concatenate([y for X, y in all_data], axis=0)

encoder = LabelEncoder()
y = encoder.fit_transform(y)

np.savez('stroke_peak_data.npz', X=X, y=y)