In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.colors as pc

In [2]:
subjects = pd.read_csv("data/treadmill/subject-info.csv")
measures = pd.read_csv("data/treadmill/test_measure.csv")

subjects = subjects.sort_values(by=["ID_test"])
measures = measures.sort_values(by=["ID_test", "time"])

measures["O2_rate"] = measures["VO2"]
measures["CO2_rate"] = measures["VCO2"]
measures["air_rate"] = measures["VE"]

measures["O2_vol"] = measures["O2_rate"] / measures["RR"]
measures["CO2_vol"] = measures["CO2_rate"] / measures["RR"]
measures["air_vol"] = measures["air_rate"] / measures["RR"]

measures["O2_cum"] = measures.groupby("ID_test")["O2_vol"].cumsum()
measures["CO2_cum"] = measures.groupby("ID_test")["CO2_vol"].cumsum()
measures["air_cum"] = measures.groupby("ID_test")["air_vol"].cumsum()

measures = measures.drop(columns=["VO2", "VCO2", "VE"])

# Define numerical features
measures_numerical = {'time', 'Speed', 'HR', 'RR', 'O2_rate', 'CO2_rate',
       'air_rate', 'O2_vol', 'CO2_vol', 'air_vol', 'O2_cum', 'CO2_cum',
       'air_cum', 'Age', 'Weight', 'Height', 'Humidity', 'Temperature'}

In [3]:
# Normalizes the start times of the test from start speed of 5 km/h
measures_test = measures[measures["Speed"] > 5]
normal_starts = measures_test.groupby("ID_test")["time"].transform("first")
measures_test.loc[:,"time"] = measures_test["time"] - normal_starts

# Normalizes the start times of the test from start speed of 6.1 km/h
ids_slow = measures_test[(measures_test["Speed"]==6.1) &(measures_test["time"]>100)]["ID_test"].unique()
first_steps = measures_test[measures_test["ID_test"].isin(ids_slow) & (measures_test["Speed"] > 6.1)].groupby("ID_test")["time"].first()
measures_test.loc[:,"time"] = measures_test["time"] - measures["ID_test"].map(first_steps).fillna(0) + 60
measures_test = measures_test[measures_test["time"] > 0]

# Remove tests with non-standard speeds increase rates
outlier_ids = list(measures_test[(measures_test["time"].between(276,301)) & (measures_test["Speed"] > 11.4)]["ID_test"].unique())
outlier_ids += ["715_1", "796_1", "593_1", "789_1", "479_1", "73_1", "667_1", "318_1", "376_1", "695_1"]
measures_test = measures_test[(measures_test["time"] > 0) & (~measures_test["ID_test"].isin(outlier_ids))]

# Remove cooldown periods
measures_test = measures_test[~((measures_test["time"] > 600) & (measures_test["Speed"] < 12))]

# Start all tests at time 0
measures_test.loc[:,"time"] = measures_test["time"] - measures_test.groupby("ID_test")["time"].transform("min")

In [4]:
df = measures_test.groupby(["ID_test", "time"]).mean()
max_secs = measures_test.groupby("ID_test")["time"].agg("max")
tuples = [(key, second) for key, value in max_secs.items() for second in range(value + 1)]
index = pd.MultiIndex.from_tuples(tuples, names=["ID_test", "time"])
measures_test = df.reindex(index).interpolate(method="linear", limit_direction="both").reset_index()
measures_test.loc[:, "ID"] = measures_test["ID"].astype("int")

# Join with subjects
measures_test = measures_test.merge(subjects, on=["ID_test", "ID"], how="left")

# Compute max speed attained quintile
measures_test["max_speed_quintile"] = measures_test["ID_test"].map(pd.qcut(measures_test.groupby("ID_test")["Speed"].max(), 5, labels=False))

In [None]:
# Generate color scale based on max speed attained
color_data = measures_test.groupby("ID_test")["Speed"].max()

hover_texts = [f"Max Treadmill Speed: {d:.2f}" for d in color_data]

n_bins = 5
bin_ids = pd.qcut(color_data, q=n_bins, labels=False, duplicates='drop')

colorscale = pc.get_colorscale('bluered')
discrete_colors = pc.sample_colorscale(colorscale, [i / (n_bins - 1) for i in range(n_bins)])

colors = [discrete_colors[b] for b in bin_ids]

In [None]:
# Plot O2 Exchange Efficiency - Percent of O2 consumed by the body in each breath
fig = go.Figure()

for i, test in enumerate(measures_test["ID_test"].unique()):

    t1 = measures_test[measures_test["ID_test"]==test]

    fig.add_trace(
        go.Scatter(
            x=t1["time"] / 60,
            y=t1["Speed"],
            #y=t1["O2_vol"].rolling(window=10).mean() / (t1["air_vol"].rolling(window=10).mean() * 1000 * .2095) * 100,
            mode="lines",
            line=dict(color=colors[i], width=.2),
            name=test,
            showlegend=True,
            hovertext=hover_texts[i],
        )
    )

fig.update_layout(
    title = dict(text="O2 Exchange Efficiency in Maximal Treadmill Exercise Test"),
    xaxis=dict(title="Minutes"),
    yaxis=dict(title="O2 Exchange Effficiency (%)"),
    legend=dict(title="ID Test"),
    height= 800,
    width=1000
)

fig.show()

In [None]:
import pandas as pd
import numpy as np

# Sample: your existing DataFrame (with 'time' and 'speed')
# df = pd.read_csv("your_data.csv")

# Group by 'time' to count people and calculate average speed at each time
time_grouped = measures_test.groupby('time').agg(
    people_at_exact_time=('Speed', 'size'),
    avg_speed=('Speed', 'mean')
)

# Filter the counts to only include multiples of 5 (e.g., 0, 5, 10, ...)
checkpoints = time_grouped[time_grouped.index % 5 == 0]

# Reset index to make the result clearer
result_df = checkpoints.reset_index()
result_df.columns = ['time', 'speed', 'count']
result_df