In [None]:
import os
import pandas as pd
import numpy as np
from glob import glob

# Папки
input_dir = "data"       # исходные .csv файлы
output_dir = "processed" # куда сохранять обработанные
os.makedirs(output_dir, exist_ok=True)

# Названия колонок
column_names = [
    "timestamp",
    "activity_id",  
    "heart_rate",
    "IMU_hand_temperature",
    "IMU_hand_3D_acceleration_data_16g_1",
    "IMU_hand_3D_acceleration_data_16g_2",
    "IMU_hand_3D_acceleration_data_16g_3",
    "IMU_hand_3D_acceleration_data_6g_1",
    "IMU_hand_3D_acceleration_data_6g_2",
    "IMU_hand_3D_acceleration_data_6g_3",
    "IMU_hand_3D_gyroscope_data_1",
    "IMU_hand_3D_gyroscope_data_2",
    "IMU_hand_3D_gyroscope_data_3",
    "IMU_hand_3D_magnetometer_data_1",
    "IMU_hand_3D_magnetometer_data_2",
    "IMU_hand_3D_magnetometer_data_3",
    "IMU_hand_orientation_1",
    "IMU_hand_orientation_2",
    "IMU_hand_orientation_3",
    "IMU_hand_orientation_4",
    "IMU_chest_temperature",
    "IMU_chest_3D_acceleration_data_16g_1",
    "IMU_chest_3D_acceleration_data_16g_2",
    "IMU_chest_3D_acceleration_data_16g_3",
    "IMU_chest_3D_acceleration_data_6g_1",
    "IMU_chest_3D_acceleration_data_6g_2",
    "IMU_chest_3D_acceleration_data_6g_3",
    "IMU_chest_3D_gyroscope_data_1",
    "IMU_chest_3D_gyroscope_data_2",
    "IMU_chest_3D_gyroscope_data_3",
    "IMU_chest_3D_magnetometer_data_1",
    "IMU_chest_3D_magnetometer_data_2",
    "IMU_chest_3D_magnetometer_data_3",
    "IMU_chest_orientation_1",
    "IMU_chest_orientation_2",
    "IMU_chest_orientation_3",
    "IMU_chest_orientation_4",
    "IMU_ankle_temperature",
    "IMU_ankle_3D_acceleration_data_16g_1",
    "IMU_ankle_3D_acceleration_data_16g_2",
    "IMU_ankle_3D_acceleration_data_16g_3",
    "IMU_ankle_3D_acceleration_data_6g_1",
    "IMU_ankle_3D_acceleration_data_6g_2",
    "IMU_ankle_3D_acceleration_data_6g_3",
    "IMU_ankle_3D_gyroscope_data_1",
    "IMU_ankle_3D_gyroscope_data_2",
    "IMU_ankle_3D_gyroscope_data_3",
    "IMU_ankle_3D_magnetometer_data_1",
    "IMU_ankle_3D_magnetometer_data_2",
    "IMU_ankle_3D_magnetometer_data_3",
    "IMU_ankle_orientation_1",
    "IMU_ankle_orientation_2",
    "IMU_ankle_orientation_3",
    "IMU_ankle_orientation_4",
]

# Обработка одного DataFrame
def preprocess_df(df):
    # Удаляем ненужные колонки
    col_for_drop = [col for col in df.columns if col.endswith(tuple(f"orientation_{i}" for i in range(1, 5)))]
    col_for_drop += [col for col in df.columns if col.endswith(tuple(f"acceleration_data_6g_{i}" for i in range(1, 4)))]
    col_for_drop += ["timestamp"]
    df.drop(columns=col_for_drop, inplace=True, errors='ignore')

    # Заполнение пропусков в heart_rate
    if 'heart_rate' in df.columns:
        hr_positions = np.flatnonzero(df['heart_rate'].notna())
        if len(hr_positions) >= 2:
            first_idx, second_idx = hr_positions[0], hr_positions[1]
            hr_1 = df['heart_rate'].iat[first_idx]
            hr_2 = df['heart_rate'].iat[second_idx]

            for pos in hr_positions[2:]:
                fill_val = (hr_1 + hr_2) / 2
                df.loc[first_idx + 1 : second_idx, 'heart_rate'] = (
                    df.loc[first_idx + 1 : second_idx, 'heart_rate']
                      .fillna(fill_val)
                )
                first_idx, second_idx = second_idx, pos
                hr_1, hr_2 = hr_2, df['heart_rate'].iat[pos]

            df.loc[second_idx + 1 :, 'heart_rate'] = (
                df.loc[second_idx + 1 :, 'heart_rate']
                  .fillna(hr_2)
            )
    return df

# Обработка всех файлов
csv_files = glob(os.path.join(input_dir, "*.csv"))

for file_path in csv_files:
    file_name = os.path.basename(file_path)
    output_path = os.path.join(output_dir, file_name)

    # Чтение
    df = pd.read_csv(file_path, sep=" ", header=None, names=column_names)

    # Обработка
    df = preprocess_df(df)

    # Сохранение
    df.to_csv(output_path, index=False)
    print(f"Processed: {file_name}")
