# Least-Squares Estimate of Cycling Parameters

In [1]:
import sys; sys.path.append('..')
import numpy as np
import fitparse
import pandas as pd
import math
import plotly.express as px

from utils.paths import data_folder

In [39]:
def parse_fitfile(
  filepath: str,
  columns: list[str],
  verbose: bool = False,
) -> pd.DataFrame:
  """Parse a .fit file into a format the simulator can use.

  Notes
  -----
  The file must include at least timestamp, distance, and altitude.
  """
  ff = fitparse.FitFile(filepath)
  df = {col: [] for col in columns}

  generator = ff.get_messages("record", with_definitions=True)
  messages = list(generator)

  for m in messages:
    if type(m) == fitparse.records.DefinitionMessage:
      continue

    record_data = m.get_values()

    if verbose:
      print(record_data.keys())

    if any([col not in record_data for col in df]):
      ts = record_data["timestamp"]
      if verbose:
        print(f"Skipping incomplete record @ {ts}")
      continue

    for col in df:
      df[col].append(record_data[col])

  return pd.DataFrame(df)


def calculate_grade(df):
  """Preprocess grade."""
  df["dy"] = df.altitude.diff(1)
  df["dx"] = df.distance.diff(1)
  df["grade"] = (df.dy / df.dx).fillna(0)
  df["theta"] = df.grade.map(math.atan).fillna(0)
  return df


def calculate_speed_diff(df):
  df["speed_diff"] = df.speed.diff(1).fillna(0)
  return df


def calculate_dt(df):
  df["dt"] = df.timestamp.diff(1).fillna(0)
  df.dt = df.dt.map(lambda x: x.total_seconds() if type(x) == pd.Timedelta else x)
  return df

In [40]:
def estimate_parameters(df: pd.DataFrame):
  G = 9.81 # m/s2
  m = 72 + 10 # kg
  rho = 1.20 # kg/m3

  dv = df.speed_diff.to_numpy()
  dt = df.dt.to_numpy()
  vt = df.speed.to_numpy()
  P_legs = df.power.to_numpy()
  theta = df.theta.to_numpy()

  b = dv + dt * G * np.sin(theta)
  c1 = dt * P_legs / (m * vt)
  c2 = -dt * G * np.cos(theta)
  c3 = -dt * rho * vt**2 / (2 * m)
  A = np.column_stack((c1, c2, c3))

  x, residuals, rank, s = np.linalg.lstsq(A, b)

  yhat = A @ x

  return x, residuals, rank, s, yhat, b


def estimate_parameters_twostep(df: pd.DataFrame, verbose: bool = False):
  _, _, _, _, yhat, b = estimate_parameters(df)
  df["error"] = b - yhat
  df["abs_error"] = np.abs(df.error)
  before = len(df)

  df = df[df.abs_error < np.percentile(df.abs_error, 95)]

  if verbose:
    print(f"Removed {before - len(df)} outliers")

  x, residuals, rank, s, yhat, b = estimate_parameters(df)

  return x, residuals, rank, s, yhat, b, df

In [44]:
filepath = data_folder("fit/Boston_Tri.fit")
ff = fitparse.FitFile(filepath)
ff

list(ff.get_messages("record", with_definitions=True))

[<DefinitionMessage: record (#20) -- local mesg: #0, field defs: [timestamp, position_lat, position_long, gps_accuracy, distance, heart_rate, calories, cadence, speed, power], dev field defs: []>,
 <DataMessage: record (#20) -- local mesg: #0, fields: [timestamp: 2023-08-27 11:49:05, position_lat: 504942592, position_long: -847645810, gps_accuracy: 2, distance: 0.0, heart_rate: 117, calories: 0, cadence: 0, enhanced_speed: 1.427, speed: 1.427, power: 0]>,
 <DefinitionMessage: record (#20) -- local mesg: #0, field defs: [timestamp, position_lat, position_long, gps_accuracy, distance, heart_rate, calories, cadence, speed, power, temperature, battery_soc], dev field defs: []>,
 <DataMessage: record (#20) -- local mesg: #0, fields: [timestamp: 2023-08-27 11:49:06, position_lat: 504942792, position_long: -847645890, gps_accuracy: 2, distance: 1.8, heart_rate: 117, calories: 0, cadence: 4, enhanced_speed: 1.849, speed: 1.849, power: 0, temperature: 19, battery_soc: 90.0]>,
 <DefinitionMessag

In [41]:
cols = [
  'timestamp',
  'distance',
  'altitude',
  'position_lat',
  'position_long',
  'power',
  'speed',
]
filepath = data_folder("fit/Boston_Tri.fit")
# filepath = data_folder("fit/Great_Brook_Farm.fit")
# filepath = data_folder("fit/IM_Santa_Cruz_70.3.fit")
df = parse_fitfile(filepath, cols, verbose=False)
df = calculate_grade(df)
df = calculate_speed_diff(df)
df = calculate_dt(df)

df.drop(labels=[0], inplace=True)

# Get rid of any rows where there is no change in time, or a long pause.
df = df[df.dt == 1]
# # Get rid of rows where speed is zero.
df = df[df.speed > 1]

x, residuals, rank, s, y_hat, b, df2 = estimate_parameters_twostep(df, verbose=True)
L = 1 - x[0]
Crr = x[1]
CdA = x[2]

print(f"Optimized parameters:")
print(f"* L = {L:.4f}")
print(f"* Crr = {Crr:.4f}")
print(f"* CdA = {CdA:.4f}")
print(f"* Residuals = {residuals[0]:.4f}")
print(f"* Rank = {rank}")
print(f"* Singular values = {s}")

Removed 153 outliers
Optimized parameters:
* L = -0.0338
* Crr = 0.0470
* CdA = -0.1544
* Residuals = 70.0537
* Rank = 3
* Singular values = [531.09782434   7.8537652    5.18788841]



`rcond` parameter will change to the default of machine precision times ``max(M, N)`` where M and N are the input matrix dimensions.


`rcond` parameter will change to the default of machine precision times ``max(M, N)`` where M and N are the input matrix dimensions.



In [7]:
df

Unnamed: 0,timestamp,distance,altitude,position_lat,position_long,power,speed,dy,dx,grade,theta,speed_diff,dt,error,abs_error
1,2023-08-27 11:49:54,432.69,9.4,504987797,-847636027,333,10.545,0.2,10.53,0.018993,0.018991,0.159,1.0,0.276381,0.276381
2,2023-08-27 11:49:55,443.25,9.4,504988760,-847635306,354,10.537,0.0,10.56,0.000000,0.000000,-0.008,1.0,-0.104121,0.104121
3,2023-08-27 11:49:56,453.73,9.4,504989752,-847634498,325,10.515,0.0,10.48,0.000000,0.000000,-0.022,1.0,-0.080902,0.080902
4,2023-08-27 11:49:57,464.27,9.4,504990713,-847633678,306,10.373,0.0,10.54,0.000000,0.000000,-0.142,1.0,-0.178069,0.178069
5,2023-08-27 11:49:58,474.55,9.6,504991659,-847632949,256,10.387,0.2,10.28,0.019455,0.019453,0.014,1.0,0.234490,0.234490
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3056,2023-08-27 12:40:49,32763.37,7.4,504936313,-847643406,375,7.722,0.0,7.69,0.000000,0.000000,0.816,1.0,0.578562,0.578562
3057,2023-08-27 12:40:50,32771.25,7.2,504935639,-847642683,401,7.811,-0.2,7.88,-0.025381,-0.025375,0.089,1.0,-0.437040,0.437040
3058,2023-08-27 12:40:51,32779.38,7.2,504934989,-847641945,403,8.095,0.0,8.13,0.000000,0.000000,0.284,1.0,0.022700,0.022700
3059,2023-08-27 12:40:52,32787.59,7.0,504934349,-847641128,386,7.955,-0.2,8.21,-0.024361,-0.024356,-0.140,1.0,-0.620414,0.620414


In [6]:
fig = px.line(df2, x="timestamp", y=["error"], title="Actual vs. Predicted Acceleration")
fig.show()

In [None]:
fig = px.line(df2, x="timestamp", y=["speed", "altitude", "power"], title="Variables")
fig.show()

In [None]:
fig = px.histogram(df2, x="abs_error", title="Error Distribution")
fig.show()

np.percentile(df2.abs_error, 95)