In [64]:
import pandas as pd
import numpy as np
import datetime

time    = datetime.datetime(2024,4,20)
ts      = int(time.timestamp()*1000)
date    = str.format("{:%Y%m%d}",time)

ts      = 1713620244400
ts_end  = 1713628344400

path    = "/home/niig/Desktop"

In [65]:
def drop_invalid_lines(input_path: str, output_path: str):
    invalid_lines = []

    with open(input_path) as in_file, open(output_path, "w") as out_file:
        for line in in_file:
            if line.startswith("ts"):
                out_file.write(line)
            elif len(line) <= 75:
                if line.startswith("1") and len(line.split(",")[0]) == 13:
                    out_file.write(line)
                else:
                    invalid_lines.append(line)

    df = pd.read_csv(output_path)
    df = df[df.ts > ts]
    df.to_csv(output_path, index=False)
    print(f"Invalid lines count: {len(invalid_lines)}")


def downsample_data(df: pd.DataFrame, period_ms: int = 100):
    df["time"] = pd.to_datetime(df["ts"], unit="ms")
    df = df.drop(columns=["level_1"],errors="ignore")
    df = (
        df.set_index("time")
        .resample(f"{period_ms}ms")
        .mean()
        .dropna(how="all")
        .interpolate()
    )
    df["ts"] = ((df["ts"] / period_ms).round(0) * period_ms).astype("int64")    
    df["time"] = pd.to_datetime(df["ts"], unit="ms")    
    df = df.set_index("ts").drop_duplicates()
    return df

### Cleanup the GPS file from any invalid lines

In [66]:
input_path      =  f"{path}/ride_{date}/gps.csv"
output_path     =  f"{path}/ride_{date}/gps_clean.csv"

drop_invalid_lines(input_path, output_path)

Invalid lines count: 220


### Remove invalid lines from the . 

In [67]:
input_path      =  f"{path}/ride_{date}/measurements.csv"
output_path     =  f"{path}/ride_{date}/measurements_clean.csv"

drop_invalid_lines(input_path, output_path)

Invalid lines count: 1976


In [68]:
df["ts_d"]  = df["ts"].diff()
df[["ts","ts_d"]].sort_values(by="ts_d",axis=0,ascending=False).head()

Unnamed: 0,ts,ts_d
30223,1713625442700,7600.0
27820,1713624930000,6100.0
10974,1713621863000,4200.0
44506,1713628116000,4000.0
12908,1713622252200,4000.0


In [69]:
df_gps = pd.read_csv(f"{path}/ride_{date}/gps_clean.csv")
df = pd.read_csv(f"{path}/ride_{date}/measurements_clean.csv")
mapping = {
    0: "heartrate_bpm",
    1: "power_w",
    2: "cadence_rpm",
    3: "speed_kmh",
    4: "distance_km",
    5: "elevation_m",
    6: "ascent",
    7: "descent",
    8: "slope",
    9: "vam",
    10: "air_temperature",
    11: "air_pressure_abs",
    12: "air_humidity",
    13: "air_speed",
    14: "air_density",
    15: "diff_pressure_l_pa",
    16: "diff_pressure_r_pa",
    17: "ax_ms2",
    18: "ay_ms2",
    19: "az_ms2",
    20: "wind_speed",
    21: "wind_yaw",
    22: "gx",
    23: "gy",
    24: "gz",
    25: "mx",
    26: "my",
    27: "mz",
    28: "roll",
    29: "pitch",
    30: "yaw",
}
df["type"] = df["type"].replace(mapping)
df["ts"] = df["ts"].astype("int64", errors="ignore")
df = df[df.ts > ts]
df = df[df.ts < ts_end]


df = (
    df.pivot_table(index="ts", columns=["type"], values=["value"])
    .stack(0)
    .reset_index()
)
df = downsample_data(df)

In [70]:
df_gps = df_gps[df_gps.ts > ts]
df_gps = df_gps[df_gps.ts < ts_end]
df_gps = downsample_data(df_gps)

In [71]:
df = pd.concat([df, df_gps]).sort_index().reset_index()
df = downsample_data(df).reset_index()

In [72]:
df = df[df.ts > ts].round(8)
df = df[df.ts < ts_end]

df.to_csv(
    f"{path}/ride_{date}/all_measurement_combined_{date}.csv", index=False
)
df[["air_density","air_temperature","air_pressure_abs"]]

Unnamed: 0,air_density,air_temperature,air_pressure_abs
1,1.227717,12.356689,101441.0600
2,1.227718,12.355980,101441.0800
3,1.227719,12.355272,101441.1000
4,1.227720,12.352986,101440.9875
5,1.227720,12.351876,101440.8360
...,...,...,...
45680,1.227406,8.644837,101445.0200
45681,1.227406,8.658598,101445.0200
45682,1.227406,8.658598,101445.0200
45683,1.227406,8.658598,101445.0200
