In [None]:
# Utility Functions
def calculate_rms(data):
    return np.sqrt(np.mean(np.square(data)))

def custom_kurtosis(data):
    if len(data) < 4 or np.all(data == data[0]):
        return np.nan
    return kurtosis(data, fisher=True)

def calculate_iqr(data):
    return np.percentile(data, 75) - np.percentile(data, 25)

def calculate_zero_crossing(data):
    return np.sum(np.diff(np.sign(data)) != 0)

def calculate_change_rate_raw(data):
    if len(data) < 2:
        return 0
    return np.sum(np.diff(data)) / (len(data) - 1)

def calculate_change_rate_abs(data):
    if len(data) < 2:
        return 0
    abs_data = np.abs(data)
    return np.sum(np.diff(abs_data)) / (len(abs_data) - 1)

def calculate_change_rate_raw_column(data):
    if len(data) < 2:
        return pd.Series(0, index=data.index)
    return pd.Series(np.diff(data) / data[:-1].replace(0, np.nan), index=data.index[1:]).fillna(0)

def calculate_change_rate_abs_column(data):
    if len(data) < 2:
        return pd.Series(0, index=data.index)
    abs_data = np.abs(data)
    return pd.Series(np.diff(abs_data) / abs_data[:-1].replace(0, np.nan), index=data.index[1:]).fillna(0)

In [None]:
# Feature Engineering Function
def create_feature(df, win):
    df = df.copy()
    df['accY'] = df['accY'] - 1
    df['SMA_gyro'] = df[['gyroX', 'gyroY', 'gyroZ']].abs().sum(axis=1)
    df['SMA_acc'] = df[['accX', 'accY', 'accZ']].abs().sum(axis=1)
    df['SVM_gyro'] = np.sqrt((df['gyroX']**2) + (df['gyroY']**2) + (df['gyroZ']**2))
    df['SVM_acc'] = np.sqrt((df['accX']**2) + (df['accY']**2) + (df['accZ']**2))
    df['Sum_XYZ_gyro'] = df['gyroX'] + df['gyroY'] + df['gyroZ']
    df['Sum_XYZ_acc'] = df['accX'] + df['accY'] + df['accZ']
    df['Entropy_gyro'] = (1 + df['Sum_XYZ_gyro']**2) * np.log(1 + df['Sum_XYZ_gyro']**2)
    df['Entropy_acc'] = (1 + df['Sum_XYZ_acc']**2) * np.log(1 + df['Sum_XYZ_acc']**2)

    rolling_features = {
        'Mean': 'mean', 'Sd': 'std', 'Min': 'min', 'Max': 'max', 'Sum': 'sum'
    }
    for feature_name, func in rolling_features.items():
        for col in ['gyroX', 'gyroY', 'gyroZ', 'SMA_gyro', 'SVM_gyro',
                    'accX', 'accY', 'accZ', 'SMA_acc', 'SVM_acc']:
            df[f'{feature_name}_{col}'] = df[col].rolling(win, min_periods=1).agg(func)

    for col in ['gyroX', 'gyroY', 'gyroZ', 'accX', 'accY', 'accZ']:
        df[f'RMS_{col}'] = df[col].rolling(win, min_periods=1).apply(calculate_rms, raw=True)
        df[f'Kurtosis_{col}'] = df[col].rolling(win, min_periods=1).apply(custom_kurtosis, raw=True)
        df[f'IQR_{col}'] = df[col].rolling(win, min_periods=1).apply(calculate_iqr, raw=True)
        df[f'ZeroCross_{col}'] = df[col].rolling(win, min_periods=1).apply(calculate_zero_crossing, raw=True)
        df[f'Cumulative_{col}'] = df[col].cumsum()
        df[f'Cumulative_Abs_{col}'] = df[col].abs().cumsum()
        df[f'ChangeRate1_{col}'] = df[col].rolling(win, min_periods=2).apply(calculate_change_rate_raw, raw=True)
        df[f'ChangeRate1_Abs_{col}'] = df[col].rolling(win, min_periods=2).apply(calculate_change_rate_abs, raw=True)
        df[f'ChangeRate2_{col}'] = calculate_change_rate_raw_column(df[col])
        df[f'ChangeRate2_Abs_{col}'] = calculate_change_rate_abs_column(df[col])

    df['Mean_accX'] = df['accX'].rolling(win, min_periods=1).mean()
    df['Mean_accY'] = df['accY'].rolling(win, min_periods=1).mean()
    df['Mean_accZ'] = df['accZ'].rolling(win, min_periods=1).mean()
    df['DBAX'] = (df['Mean_accX'] - df['accX']).abs()
    df['DBAY'] = (df['Mean_accY'] - df['accY']).abs()
    df['DBAZ'] = (df['Mean_accZ'] - df['accZ']).abs()
    df['ODBA'] = df['DBAX'] + df['DBAY'] + df['DBAZ']
    df['VeDBA'] = np.sqrt(df['DBAX']**2 + df['DBAY']**2 + df['DBAZ']**2)
    return df

In [None]:
# Preprocessing + Feature Extraction
def prep_ds(data, win):
    data.fillna(0, inplace=True)
    data = data.drop_duplicates()
    behavior_mapping = {"feeding": "eating"}
    data["Classification"] = data["Classification"].replace(behavior_mapping)
    data = data[data["Classification"].isin([
        "lying", "lying ruminating", "lying other",
        "standing", "standing ruminating", "standing other",
        "walking", "walking ruminating", "eating"
    ])]
    data.reset_index(drop=True, inplace=True)
    raw_sensor_cols = ['accX', 'accY', 'accZ', 'gyroX', 'gyroY', 'gyroZ']
    data = remove_outliers_iqr(data, raw_sensor_cols, factor=5.0)
    behavior_dfs = {
        behavior: create_feature(sub_df, win)
        for behavior, sub_df in data.groupby('Classification')
    }
    return pd.concat(behavior_dfs.values()).reset_index(drop=True)

In [None]:
# Processing data
def process_and_save_files(file_paths, win, data_name="data"):
    all_processed_files = []
    for file_path in file_paths:
        if os.path.exists(file_path):
            print(f"Processing file: {file_path}")
            data = pd.read_excel(file_path)
            processed_data = prep_ds(data, win)
            all_processed_files.append(processed_data)
        else:
            print(f"File not found: {file_path}")
    final_data = pd.concat(all_processed_files, ignore_index=True)
    return final_data