# Imports and configs

In [1]:
from sklearn.model_selection import KFold, cross_val_score, cross_validate, GroupKFold
from sklearn.metrics import f1_score
from sklearn.metrics import mean_squared_error, r2_score
from scipy.fft import rfft, rfftfreq
from scipy.stats import skew, kurtosis
from scipy.signal import find_peaks

from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.base import clone

from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import Ridge
from lightgbm import LGBMClassifier, early_stopping
from xgboost import XGBClassifier

import matplotlib.pyplot as plt
import matplotlib.font_manager as fm

plt.rcParams['font.family'] = ['MS Gothic', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

import seaborn as sns
import pandas as pd
import numpy as np
import polars as pl
import warnings
import optuna
import joblib
import glob
import gc
import time
from tqdm import tqdm
import os

warnings.filterwarnings("ignore")

In [2]:
class CFG:
    train_path = "input/train.csv"
    train_demographic_path = "input/train_demographics.csv"
    test_path = "input/test.csv"
    test_demographic_path = "input/test_demographics.csv"

    target = "gesture"
    
    n_folds = 5
    seed = 42

    run_optuna = True
    n_optuna_trials = 100 

# Data loading

In [3]:
train = pd.read_csv(CFG.train_path).reset_index(drop=True)
train_d = pd.read_csv(CFG.train_demographic_path).reset_index(drop=True)
test = pd.read_csv(CFG.test_path).reset_index(drop=True)
test_d = pd.read_csv(CFG.test_demographic_path).reset_index(drop=True)

# Preprocessing

In [4]:
# Remove low-variance columns
def get_low_var_cols(df, threshold=0.95):
    return [col for col in df.columns if df[col].nunique() <= 1 or (df[col] == df[col].iloc[0]).mean() > threshold]

lowv = get_low_var_cols(train)
train = train.drop(columns=lowv)
test = test.drop(columns=[c for c in lowv if c in test.columns])

print(f"removed columns: {lowv}")

removed columns: []


In [5]:
# Encode target
le_gesture = LabelEncoder()
train["e_gesture"] = le_gesture.fit_transform(train["gesture"])

In [6]:
# rotが欠損しているsequenceは除外
rot_cols = [col for col in train.columns if col.startswith('rot_')]

rot_null_row_train = train[rot_cols].isnull().any(axis=1)
rot_null_sequence_ids_train = train.loc[rot_null_row_train, 'sequence_id'].unique()
rot_null_row_test = test[rot_cols].isnull().any(axis=1)
rot_null_sequence_ids_test = test.loc[rot_null_row_test, 'sequence_id'].unique()

train = train[~train['sequence_id'].isin(rot_null_sequence_ids_train)].reset_index(drop=True)
test = test[~test['sequence_id'].isin(rot_null_sequence_ids_test)].reset_index(drop=True)

print(f"=== rot null sequence count ===")
print(f"train: {len(rot_null_sequence_ids_train)}")
print(f"test: {len(rot_null_sequence_ids_test)}")


=== rot null sequence count ===
train: 50
test: 0


In [7]:
# thm, tofのいずれか1つでもNaNがあればIMU-only
thm_cols = [col for col in train.columns if col.startswith('thm_')]
tof_cols = [col for col in train.columns if col.startswith('tof_')]

thm_null_row_train = train[thm_cols].isnull().any(axis=1)
thm_null_sequence_ids_train = train.loc[thm_null_row_train, 'sequence_id'].unique()

print(f"thm null sequence count: {len(thm_null_sequence_ids_train)}")

tof_null_row_train = train[tof_cols].isnull().any(axis=1)
tof_null_sequence_ids_train = train.loc[tof_null_row_train, 'sequence_id'].unique()

print(f"tof null sequence count: {len(tof_null_sequence_ids_train)}")

imu_only_seq_ids = list(set(thm_null_sequence_ids_train) | set(tof_null_sequence_ids_train))
full_data_seq_ids = train[~train['sequence_id'].isin(imu_only_seq_ids)]['sequence_id'].unique()

# Set thermal and ToF columns to NaN for IMU-only sequences
train.loc[train['sequence_id'].isin(imu_only_seq_ids), thm_cols+tof_cols] = np.nan

print(f"imu only seq count: {len(imu_only_seq_ids)}")
print(f"full data seq count: {len(full_data_seq_ids)}")

thm null sequence count: 508
tof null sequence count: 435
imu only seq count: 508
full data seq count: 7593


In [8]:
# full dataからimu only用のデータを作成
train_imu_only = train.copy()
train_imu_only[thm_cols+tof_cols] = np.nan

train_imu_only.isnull().sum()

row_id                   0
sequence_type            0
sequence_id              0
sequence_counter         0
subject                  0
                     ...  
tof_5_v60           571253
tof_5_v61           571253
tof_5_v62           571253
tof_5_v63           571253
e_gesture                0
Length: 342, dtype: int64

In [9]:
def feature_set(df: pd.DataFrame, use_extra: bool = False) -> dict:
    """
    Extracts an enhanced set of features from sensor data for gesture prediction.

    Features include:
    - Basic statistics (mean, std, min, max, median)
    - Advanced statistics (energy, range, quantiles, skew, kurtosis)
    - Peak detection for IMU signals
    - Frequency band energy from FFT
    - Aggregated thermal and ToF features (max/std, rate of change)
    """
    feats = {}
    imu_cols = [c for c in df.columns if any(x in c for x in ['acc_', 'rot_'])]

    # Vector magnitudes and jerk features
    df['acc_mag'] = np.sqrt(df['acc_x']**2 + df['acc_y']**2 + df['acc_z']**2)
    df['rot_mag'] = np.sqrt(df['rot_x']**2 + df['rot_y']**2 + df['rot_z']**2)
    df['acc_jerk_x'] = df['acc_x'].diff().fillna(0)
    df['acc_jerk_y'] = df['acc_y'].diff().fillna(0)
    df['acc_jerk_z'] = df['acc_z'].diff().fillna(0)
    imu_cols.extend(['acc_mag', 'rot_mag', 'acc_jerk_x', 'acc_jerk_y', 'acc_jerk_z'])

    # Feature Extraction for IMU Sensors
    for col in imu_cols:
        arr = df[col].values
        # Basic Statistics
        feats[f'{col}_mean'] = np.mean(arr)
        feats[f'{col}_std'] = np.std(arr)
        feats[f'{col}_min'] = np.min(arr)
        feats[f'{col}_max'] = np.max(arr)
        feats[f'{col}_median'] = np.median(arr)
        feats[f'{col}_range'] = feats[f'{col}_max'] - feats[f'{col}_min']
        # Advanced Statistics
        feats[f'{col}_q25'] = np.quantile(arr, 0.25)
        feats[f'{col}_q75'] = np.quantile(arr, 0.75)
        feats[f'{col}_energy'] = np.sum(arr**2) / len(arr)
        feats[f'{col}_skew'] = skew(arr)
        feats[f'{col}_kurtosis'] = kurtosis(arr)
        # Rolling Window Features
        for window in [5, 10, 15, 20, 25, 50]:
            rolling_mean = df[col].rolling(window=window, min_periods=1).mean()
            rolling_std = df[col].rolling(window=window, min_periods=1).std().fillna(0)
            feats[f'{col}_roll_mean_{window}_mean'] = rolling_mean.mean()
            feats[f'{col}_roll_std_{window}_mean'] = rolling_std.mean()
        # Frequency Domain Features (FFT)
        fft_vals = rfft(arr)
        fft_mags = np.abs(fft_vals)
        if len(fft_mags) > 1:
            feats[f'{col}_fft_mean'] = np.mean(fft_mags)
            feats[f'{col}_fft_max'] = np.max(fft_mags)
            freqs = rfftfreq(len(arr))
            feats[f'{col}_fft_dominant_freq'] = freqs[np.argmax(fft_mags[1:]) + 1] if len(fft_mags) > 1 else 0
            # Energy in Frequency Bands
            bands = [(0, 2), (2, 5), (5, 10)]
            for low, high in bands:
                mask = (freqs >= low) & (freqs < high)
                feats[f'{col}_fft_energy_{low}_{high}'] = np.sum(fft_mags[mask])
        # Peak Detection
        peaks, _ = find_peaks(arr, height=0)
        feats[f'{col}_num_peaks'] = len(peaks) / len(arr)

    # Cross-Sensor Correlations
    for (c1, c2) in [('acc_x', 'acc_y'), ('acc_x', 'acc_z'), ('acc_y', 'acc_z'),
                     ('rot_x', 'rot_y'), ('rot_x', 'rot_z'), ('rot_y', 'rot_z')]:
        if c1 in df.columns and c2 in df.columns:
            feats[f'{c1}_{c2}_corr'] = np.corrcoef(df[c1], df[c2])[0, 1]

    # Extra Sensor Features (Thermal & ToF)
    if use_extra:
        thm_cols = [c for c in df.columns if 'thm' in c]
        tof_cols = [c for c in df.columns if 'tof' in c]
        # Thermal Features
        if thm_cols:
            thm_data = df[thm_cols].values
            feats['thm_max_across_sensors'] = np.nanmax(thm_data, axis=1).mean()
            feats['thm_std_across_sensors'] = np.nanstd(thm_data, axis=1).mean()
            for col in thm_cols:
                feats[f'{col}_mean'] = np.nanmean(df[col])
                feats[f'{col}_max'] = np.nanmax(df[col])
                feats[f'{col}_diff_mean'] = df[col].diff().abs().mean()
        # ToF Features
        if tof_cols:
            tof_data = df[tof_cols].values
            valid_tof = tof_data >= 0
            min_tof_per_time = np.min(tof_data, axis=1, where=valid_tof, initial=10000)
            feats['tof_min_across_sensors'] = np.mean(min_tof_per_time)
            feats['tof_num_valid_sensors'] = np.sum(valid_tof, axis=1).mean()
            for col in tof_cols:
                valid_vals = df[col][df[col] >= 0]
                if valid_vals.size > 0:
                    feats[f'{col}_mean'] = np.mean(valid_vals)
                    feats[f'{col}_min'] = np.min(valid_vals)
                    feats[f'{col}_diff_mean'] = df[col].diff().abs().mean()

    return feats

In [10]:
def build_features(data: pd.DataFrame, demographics: pd.DataFrame, use_extra: bool) -> tuple[pd.DataFrame, list]:
    """Builds feature matrix and returns features with subject groups for CV."""
    features = []
    groups = []
    for seq_id, g in data.groupby('sequence_id'):
        subj = g['subject'].iloc[0]
        groups.append(subj)
        feats = feature_set(g, use_extra=use_extra)
        feats['sequence_id'] = seq_id
        demo_row = demographics[demographics['subject'] == subj]
        if not demo_row.empty:
            demo = demo_row.iloc[0]
            for dcol in ['age', 'sex', 'height_cm', 'handedness']:
                feats[dcol] = demo.get(dcol, np.nan)
        features.append(feats)
    df_feats = pd.DataFrame(features).set_index('sequence_id')
    if 'sex' in df_feats.columns:
        df_feats['sex'] = df_feats['sex'].map({'M': 1, 'F': 0})
    if 'handedness' in df_feats.columns:
        df_feats['handedness'] = df_feats['handedness'].map({'R': 1, 'L': 0, 'A': 2})
    
    return df_feats, groups

In [11]:
# Build separate feature sets for IMU-only and full-sensor data
print("Building features for IMU-only sequences...")
X_imu, groups_imu = build_features(train_imu_only, train_d, use_extra=False)
y_imu = train_imu_only.groupby('sequence_id')['e_gesture'].first().loc[X_imu.index]

print("Building features for full-sensor sequences...")
X_extra, groups_extra = build_features(train[train['sequence_id'].isin(full_data_seq_ids)], train_d, use_extra=True)
y_extra = train.groupby('sequence_id')['e_gesture'].first().loc[X_extra.index]

print(f"IMU-only features: {X_imu.shape}")
print(f"Full-sensor features: {X_extra.shape}")

Building features for IMU-only sequences...
Building features for full-sensor sequences...
IMU-only features: (8101, 370)
Full-sensor features: (7593, 1349)


# Training base model

In [26]:
def train_cv(model, X, y, groups, model_name="Model"):
    """
    fold別詳細結果表示対応クロスバリデーション関数（F1スコア評価版）
    最適エポック数での最終学習機能付き
    """

    print(f"=== {model_name} Cross Validation 開始 ===")
    
    # 結果保存用
    fold_results = []
    f1_macro_scores = []
    f1_binary_scores = []
    final_scores = []
    times = []
    best_iterations = []  # 最適エポック数記録用
    
    # 各fold実行
    for fold, (train_idx, val_idx) in enumerate(GroupKFold(n_splits=CFG.n_folds).split(X, y, groups)):
        start_time = time.time()
        
        # データ分割
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
        
        # モデル学習（クローンを作成）
        fold_model = clone(model)
        fold_model.fit(X_train, y_train, 
                      eval_set=[(X_val, y_val)],
                      eval_metric='multi_logloss',
                      callbacks=[early_stopping(150, verbose=False)])
        
        # 最適エポック数を取得（エラーハンドリング付き）
        try:
            # LightGBMの正しい属性名
            best_iteration = fold_model.best_iteration_
        except AttributeError:
            # early stoppingが動作しなかった場合やバージョン違いの場合
            try:
                best_iteration = fold_model.best_iteration
            except AttributeError:
                # デフォルトとしてn_estimatorsを使用
                best_iteration = fold_model.n_estimators
                print(f"Warning: Could not get best_iteration for fold {fold}, using n_estimators={best_iteration}")
        
        best_iterations.append(best_iteration)
        
        # 予測
        y_pred = fold_model.predict(X_val)
        
        # 評価指標計算
        f1_macro = f1_score(y_val, y_pred, average='macro')
        f1_binary = f1_score((y_val == 0), (y_pred == 0), average='binary')
        final_score = (f1_macro + f1_binary) / 2
        
        # 実行時間
        fold_time = time.time() - start_time
        
        # 結果保存
        f1_macro_scores.append(f1_macro)
        f1_binary_scores.append(f1_binary)
        final_scores.append(final_score)
        times.append(fold_time)
        
        fold_results.append({
            'fold': fold,
            'f1_macro': f1_macro,
            'f1_binary': f1_binary,
            'final_score': final_score,
            'time': fold_time,
            'best_iteration': best_iteration
        })
        
        # fold別結果表示
        print(f"--- Fold {fold} - Final Score: {final_score:.4f} (F1_macro: {f1_macro:.4f}, F1_binary: {f1_binary:.4f}) - Best Iteration: {best_iteration} - Time: {fold_time:.2f} s")
    
    # 最適エポック数の計算
    optimal_n_estimators = int(np.mean(best_iterations))
    std_iterations = np.std(best_iterations)
    
    # 全体統計
    results = {
        'fold_results': fold_results,
        'f1_macro': {
            'mean': np.mean(f1_macro_scores),
            'std': np.std(f1_macro_scores),
            'scores': f1_macro_scores
        },
        'f1_binary': {
            'mean': np.mean(f1_binary_scores),
            'std': np.std(f1_binary_scores),
            'scores': f1_binary_scores
        },
        'final_score': {
            'mean': np.mean(final_scores),
            'std': np.std(final_scores),
            'scores': final_scores
        },
        'time': {
            'mean': np.mean(times),
            'total': np.sum(times)
        },
        'best_iterations': {
            'mean': optimal_n_estimators,
            'std': std_iterations,
            'all': best_iterations
        }
    }
    
    # 全体結果表示
    print(f"\n{model_name} Overall Results:")
    print(f"  F1 Macro: {results['f1_macro']['mean']:.6f} ± {results['f1_macro']['std']:.6f}")
    print(f"  F1 Binary: {results['f1_binary']['mean']:.6f} ± {results['f1_binary']['std']:.6f}")
    print(f"  Final Score: {results['final_score']['mean']:.6f} ± {results['final_score']['std']:.6f}")
    print(f"  Best Iterations: {optimal_n_estimators} ± {std_iterations:.1f} (range: {min(best_iterations)}-{max(best_iterations)})")
    print(f"  Total Time: {results['time']['total']:.2f} s")
    print()
    
    # 最適エポック数で最終モデル学習
    print(f"Training final {model_name} model on full dataset with optimal n_estimators={optimal_n_estimators}...")
    
    # 最終モデルの作成（最適エポック数で設定）
    final_model = clone(model)
    
    # LightGBMの場合、n_estimatorsを最適値に更新
    if hasattr(final_model, 'set_params'):
        final_model.set_params(n_estimators=optimal_n_estimators)
    else:
        # set_paramsが使えない場合の代替案
        final_model.n_estimators = optimal_n_estimators
    
    # 全データで学習（early stoppingなし）
    final_model.fit(X, y)
    
    # モデル保存
    model_filename = f"models/model_{model_name.lower().replace('-', '_').replace(' ', '_')}.pkl"
    joblib.dump(final_model, model_filename)
    print(f"Model saved to: {model_filename}")
    print(f"Final model trained with n_estimators={optimal_n_estimators}")
    
    return results

## LightGBM (gbdt)

In [27]:
imu_lgbm_model = LGBMClassifier(
    objective='multiclass',
    metric='multi_logloss',
    boosting_type='gbdt',
    device='gpu',
    gpu_platform_id=0,
    gpu_device_id=0,
    n_estimators=3000,
    learning_rate=0.015,
    feature_fraction=0.7,
    bagging_fraction=0.7,
    bagging_freq=1,
    lambda_l1=0.3,
    lambda_l2=0.3,
    max_depth=-1,
    num_leaves=60,
    min_child_samples=20,
    n_jobs=-1,
    random_state=CFG.seed,
    verbose=-1
)

full_lgbm_model = LGBMClassifier(
    objective='multiclass',
    metric='multi_logloss',
    boosting_type='gbdt',
    device='gpu',
    gpu_platform_id=0,
    gpu_device_id=0,
    n_estimators=3000,
    learning_rate=0.015,
    feature_fraction=0.7,
    bagging_fraction=0.7,
    bagging_freq=1,
    lambda_l1=0.3,
    lambda_l2=0.3,
    max_depth=-1,
    num_leaves=60,
    min_child_samples=20,
    n_jobs=-1,
    random_state=CFG.seed,
    verbose=-1
)

In [28]:
# Train IMU-only model
imu_lgbm_results = train_cv(
    model=imu_lgbm_model,
    X=X_imu,
    y=y_imu,
    groups=groups_imu,
    model_name="IMU-only-LightGBM",
)

=== IMU-only-LightGBM Cross Validation 開始 ===
--- Fold 0 - Final Score: 0.5325 (F1_macro: 0.5710, F1_binary: 0.4939) - Best Iteration: 545 - Time: 206.43 s
--- Fold 1 - Final Score: 0.5870 (F1_macro: 0.6308, F1_binary: 0.5432) - Best Iteration: 604 - Time: 214.54 s
--- Fold 2 - Final Score: 0.5348 (F1_macro: 0.6122, F1_binary: 0.4574) - Best Iteration: 468 - Time: 177.36 s
--- Fold 3 - Final Score: 0.5188 (F1_macro: 0.5751, F1_binary: 0.4626) - Best Iteration: 604 - Time: 200.94 s
--- Fold 4 - Final Score: 0.5418 (F1_macro: 0.5787, F1_binary: 0.5049) - Best Iteration: 422 - Time: 165.17 s

IMU-only-LightGBM Overall Results:
  F1 Macro: 0.593569 ± 0.023690
  F1 Binary: 0.492408 ± 0.031160
  Final Score: 0.542989 ± 0.023247
  Best Iterations: 528 ± 73.0 (range: 422-604)
  Total Time: 964.44 s

Training final IMU-only-LightGBM model on full dataset with optimal n_estimators=528...
Model saved to: models/model_imu_only_lightgbm.pkl
Final model trained with n_estimators=528


In [29]:
# Train full-sensor model
full_lgbm_results = train_cv(
    model=full_lgbm_model,
    X=X_extra,
    y=y_extra,
    groups=groups_extra,
    model_name="Full-sensor-LightGBM",
)

=== Full-sensor-LightGBM Cross Validation 開始 ===
--- Fold 0 - Final Score: 0.6794 (F1_macro: 0.6540, F1_binary: 0.7048) - Best Iteration: 630 - Time: 336.60 s
--- Fold 1 - Final Score: 0.6581 (F1_macro: 0.6799, F1_binary: 0.6364) - Best Iteration: 540 - Time: 348.19 s
--- Fold 2 - Final Score: 0.6690 (F1_macro: 0.6440, F1_binary: 0.6941) - Best Iteration: 971 - Time: 1933.40 s
--- Fold 3 - Final Score: 0.7240 (F1_macro: 0.6932, F1_binary: 0.7549) - Best Iteration: 731 - Time: 479.17 s
--- Fold 4 - Final Score: 0.6986 (F1_macro: 0.6801, F1_binary: 0.7171) - Best Iteration: 702 - Time: 847.68 s

Full-sensor-LightGBM Overall Results:
  F1 Macro: 0.670228 ± 0.018291
  F1 Binary: 0.701454 ± 0.038474
  Final Score: 0.685841 ± 0.023309
  Best Iterations: 714 ± 144.1 (range: 540-971)
  Total Time: 3945.04 s

Training final Full-sensor-LightGBM model on full dataset with optimal n_estimators=714...
Model saved to: models/model_full_sensor_lightgbm.pkl
Final model trained with n_estimators=714


In [30]:
# Save label encoder
joblib.dump(le_gesture, "models/le_gesture.pkl")

['models/le_gesture.pkl']

# Submission

In [27]:
def has_extra_sensors(df):
    """Detect if sequence has valid thermal/ToF sensor data"""
    thm_cols = [c for c in df.columns if 'thm_' in c]
    tof_cols = [c for c in df.columns if 'tof_' in c]
    
    if not thm_cols or not tof_cols:
        return False
    
    thm_vals = df[thm_cols].values
    tof_vals = df[tof_cols].values
    
    # Check if all values are missing/invalid
    if np.all(np.isnan(thm_vals)) and np.all((tof_vals == -1) | (np.isnan(tof_vals))):
        return False
    
    # Return True if sufficient valid data exists
    return (np.isnan(thm_vals).mean() < 0.8) or (np.all(tof_vals != -1) and (tof_vals != -1).mean() > 0.2)

In [None]:
def extract_features_for_test(sequence_df: pd.DataFrame, demo_df: pd.DataFrame):
    """Extract features for test sequence and determine sensor availability"""
    use_extra = has_extra_sensors(sequence_df)
    feats = feature_set(sequence_df, use_extra=use_extra)
    
    # Add demographics
    demo_row = demo_df.iloc[0] if len(demo_df) > 0 else {}
    for dcol in ['age','adult_child','sex','handedness','height_cm','shoulder_to_wrist_cm','elbow_to_wrist_cm']:
        feats[dcol] = demo_row.get(dcol, np.nan)
    feats['sex_F'] = int(feats.get('sex', 0) == 0)
    feats['sex_M'] = int(feats.get('sex', 0) == 1)
    feats['handed_L'] = int(feats.get('handedness', 0) == 0)
    feats['handed_R'] = int(feats.get('handedness', 0) == 1)
    feats['handed_A'] = int(feats.get('handedness', 0) == 2)
    
    return pd.DataFrame([feats]), use_extra

In [None]:
# モデル・エンコーダを読み込む
model_imu = joblib.load("models/model_imu-only-lightgbm.pkl")
model_extra = joblib.load("models/model_full-sensor-lightgbm.pkl")
le_gesture = joblib.load("models/le_gesture.pkl")

def predict(sequence: pl.DataFrame, demographics: pl.DataFrame) -> str:
    """Smart prediction using appropriate model based on available sensors"""
    sequence = sequence.to_pandas()
    demographics = demographics.to_pandas()
    
    try:
        feats, use_extra = extract_features_for_test(sequence, demographics)
        model = model_extra if use_extra else model_imu
        pred = model.predict(feats)[0]
        return le_gesture.inverse_transform([pred])[0]
        
    except Exception as e:
        return le_gesture.classes_[0]


In [49]:
# Inference Server Setup 
import kaggle_evaluation.cmi_inference_server

inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        data_paths=(
            'input/test.csv',
            'input/test_demographics.csv',
        )
    )

In [50]:
pd.read_parquet('submission.parquet').reset_index(drop=True)

Unnamed: 0,sequence_id,gesture
0,SEQ_000001,Above ear - pull hair
1,SEQ_000011,Above ear - pull hair
