In [1]:
import pandas as pd
from pathlib import Path

In [2]:
here = Path.cwd()
root = next(p for p in [here, *here.parents] if (p / "data" / "raw").exists())

df = pd.read_csv(root / "data" / "raw" / "ios_turns.csv", header=0)

In [3]:
raw_df = df.copy()
print(f"[INFO] Loaded rows/cols: {raw_df.shape}")

[INFO] Loaded rows/cols: (61094, 34)


In [4]:
before = df.shape
df = df[df["eventStartSpeed"] >= 0]
print(f"[FILTER] eventStartSpeed >= 0: {before} -> {df.shape}")

before = df.shape
df = df[(df["eventEndSpeed"] >= 0) & (df["eventEndSpeed"] <= 120)]
print(f"[FILTER] 0 <= eventEndSpeed <= 120: {before} -> {df.shape}")

before = df.shape
df = df[df["eventDurationSeconds"] <= 15]
print(f"[FILTER] eventDurationSeconds <= 15: {before} -> {df.shape}")

[FILTER] eventStartSpeed >= 0: (61094, 34) -> (61080, 34)
[FILTER] 0 <= eventEndSpeed <= 120: (61080, 34) -> (61028, 34)
[FILTER] eventDurationSeconds <= 15: (61028, 34) -> (60203, 34)


In [5]:
before = df.shape
df = df[df["eventMilesDriven"] < 0.3]
print(f"[FILTER] eventMilesDriven < 0.3: {before} -> {df.shape}")

[FILTER] eventMilesDriven < 0.3: (60203, 34) -> (60189, 34)


In [6]:
df["speed_gap"] = (df["eventEndSpeed"] - df["eventStartSpeed"]).abs()

before = df.shape
df = df[~((df["speed_gap"] > 10) & (df["eventDurationSeconds"] < 2))]
print(f"[FILTER] remove |Δspeed|>10 in <2s: {before} -> {df.shape}")

[FILTER] remove |Δspeed|>10 in <2s: (60189, 35) -> (60026, 35)


In [7]:
out_dir = root / "data" / "processed" / "ios"
out_dir.mkdir(parents=True, exist_ok=True)

clean_path = out_dir / "parent_cleanV2.csv"
df.to_csv(clean_path, index=False)
print(f"[SAVE] Cleaned file written to: {clean_path}")

[SAVE] Cleaned file written to: /Users/shazi/Documents/VS Code/Arity - BTT Project 1/ARITY-BTT-PROJECT-1/data/processed/ios/parent_cleanV2.csv


In [8]:
df_check = pd.read_csv(clean_path, nrows=5)
print(f"[CHECK] Preview saved columns: {list(df_check.columns)}")

[CHECK] Preview saved columns: ['init_time', 'eventAction', 'payloadCreation', 'eventStart', 'eventEnd', 'eventType', 'eventSampleSpeed', 'eventDurationSeconds', 'eventMilesDriven', 'eventStartLatitude', 'eventStartLongitude', 'eventEndLatitude', 'eventEndLongitude', 'eventSensorDetectionMthd', 'eventGPSSignalStrength', 'eventStartSpeed', 'eventEndSpeed', 'eventSpeedChange', 'memsMedianLateralAccel', 'memsP75LateralAccel', 'memsP95LateralAccel', 'memsMedianHorizontalNorm', 'memsP75HorizontalNorm', 'memsP95HorizontalNorm', 'gpsMedianLateralAccel', 'gpsP75LateralAccel', 'gpsP95LateralAccel', 'bearing_angular_change_per_second', 'gyro_angular_change_per_second', 'bearingAngleChange', 'gyroAngleChange', 'mems_radius', 'gps_radius', 'validity_mean', 'speed_gap']
