# initial review of polar hr csv files

In [1]:
import pandas as pd
import numpy as np

In [8]:
sup_8002 = pd.read_csv('/mnt/lss/Projects/BOOST/InterventionStudy/3-experiment/data/polarhrcsv/Supervised/sub8005/8005_wk8_ses.CSV', skiprows=2, header=0)
sup_8002.head()

Unnamed: 0,Sample rate,Time,HR (bpm),Speed (km/h),Pace (min/km),Cadence,Altitude (m),Stride length (m),Distances (m),Temperatures (C),Power (W),Unnamed: 11
0,1.0,00:00:00,101,,,,,,,,,
1,,00:00:01,101,,,,,,,,,
2,,00:00:02,101,,,,,,,,,
3,,00:00:03,101,,,,,,,,,
4,,00:00:04,100,,,,,,,,,


In [9]:
len(sup_8002)

2434

In [10]:
sup_8002 = sup_8002[['Time', 'HR (bpm)']]
sup_8002 = sup_8002.rename(columns = {
    'Time':'time',
    'HR (bpm)':'hr'})

In [11]:
sup_8002

Unnamed: 0,time,hr
0,00:00:00,101
1,00:00:01,101
2,00:00:02,101
3,00:00:03,101
4,00:00:04,100
...,...,...
2429,00:40:29,113
2430,00:40:30,112
2431,00:40:31,111
2432,00:40:32,111


In [12]:
import json

# 1) Copy your DataFrame
df = sup_8002.copy()

# 2) Prefix each "time" with a date so it's full ISO. 
#    Pick the actual date of your session, e.g. "2025-07-23T"
base_date = "2025-07-23T"
df['time'] = base_date + df['time'].str.slice(0,8) + 'Z'

# 3) Convert to a list of dicts
records = df.rename(columns={'time':'time','hr':'hr'}) \
            .to_dict(orient='records')

# 4) Write out to JSON
with open('sup_8002.json','w') as f:
    json.dump(records, f, indent=2)

In [42]:
zones = pd.read_excel('/mnt/lss/Projects/BOOST/InterventionStudy/1-projectManagement/participants/ExerciseSessionMaterials/Intervention Materials/BOOST HR ranges.xlsx')

In [43]:
cols_to_keep = [0] + list(range(3,15))
zones = zones.iloc[:, cols_to_keep]
zones

Unnamed: 0,BOOST ID,Max HR,Rest HR,Zone 1 (55-60%),Unnamed: 6,Zone 2 (60-65%),Unnamed: 8,Zone 3 (65-70%),Unnamed: 10,Zone 4 (70-75%),Unnamed: 12,Zone 5 (75-80%),Unnamed: 14
0,8000,165.3,72.0,90.915,99.18,100.18,107.445,108.445,115.71,116.71,123.975,124.975,132.24
1,8001,163.2,66.0,89.760,97.92,98.92,106.080,107.080,114.24,115.24,122.400,123.400,130.56
2,8002,163.2,83.0,89.760,97.92,98.92,106.080,107.080,114.24,115.24,122.400,123.400,130.56
3,8003,167.4,75.0,92.070,100.44,101.44,108.810,109.810,117.18,118.18,125.550,126.550,133.92
4,8004,177.9,69.0,97.845,106.74,107.74,115.635,116.635,124.53,125.53,133.425,134.425,142.32
...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,8126,208.0,,114.400,124.80,125.80,135.200,136.200,145.60,146.60,156.000,157.000,166.40
127,8127,208.0,,114.400,124.80,125.80,135.200,136.200,145.60,146.60,156.000,157.000,166.40
128,8128,208.0,,114.400,124.80,125.80,135.200,136.200,145.60,146.60,156.000,157.000,166.40
129,8129,208.0,,114.400,124.80,125.80,135.200,136.200,145.60,146.60,156.000,157.000,166.40


In [21]:
zones_8002 = zones[zones['BOOST ID']==8002]
zones_8002.to_clipboard()

In [22]:
# Extract raw zone values for BOOST ID 8002
z = zones[zones['BOOST ID'] == 8002]

# Floor and ceil to get full integer coverage
zone_bounds = [
    (np.floor(z['Zone 1 (55-60%)'].values[0]), np.ceil(z['Unnamed: 6'].values[0]) - 1),
    (np.floor(z['Zone 2 (60-65%)'].values[0]), np.ceil(z['Unnamed: 8'].values[0]) - 1),
    (np.floor(z['Zone 3 (65-70%)'].values[0]), np.ceil(z['Unnamed: 10'].values[0]) - 1),
    (np.floor(z['Zone 4 (70-75%)'].values[0]), np.ceil(z['Unnamed: 12'].values[0]) - 1),
    (np.floor(z['Zone 5 (75-80%)'].values[0]), np.ceil(z['Unnamed: 14'].values[0]) - 1),
]


In [23]:
zone_bounds

[(np.float64(89.0), np.float64(97.0)),
 (np.float64(98.0), np.float64(106.0)),
 (np.float64(107.0), np.float64(114.0)),
 (np.float64(115.0), np.float64(122.0)),
 (np.float64(123.0), np.float64(130.0))]

In [24]:
# Verify that intervals are continuous and non-overlapping
for i in range(1, len(zone_bounds)):
    prev_upper = zone_bounds[i-1][1]
    curr_lower = zone_bounds[i][0]
    if curr_lower != prev_upper + 1:
        raise ValueError(f"Gap or overlap detected between Zone {i} and Zone {i+1}")


In [25]:
hr_values = sup_8002['hr'].values

conditions = [
    hr_values < zone_bounds[0][0],  # Zone 0
]

labels = ['Zone 0']

# Add zones 1-5
for i, (low, high) in enumerate(zone_bounds):
    conditions.append((hr_values >= low) & (hr_values <= high))
    labels.append(f'Zone {i+1}')

# Zone 6 (above last zone)
conditions.append(hr_values > zone_bounds[-1][1])
labels.append('Zone 6')

# Vectorized zone assignment
sup_8002['Zone'] = np.select(conditions, labels, default='Unknown')


In [26]:
time_in_zones = sup_8002['Zone'].value_counts().sort_index()

# Display time in seconds
print(time_in_zones)


Zone
Zone 0      14
Zone 1     212
Zone 2     128
Zone 3      35
Zone 4     117
Zone 5     262
Zone 6    1699
Name: count, dtype: int64


In [31]:
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "iframe"
fig = go.Figure()

# Add HR line
fig.add_trace(go.Scatter(
    x=sup_8002['dt'],
    y=sup_8002['hr'],
    mode='lines',
    name='HR',
    line=dict(color='gray', width=2)
))

# Add colored zone points
fig.add_trace(go.Scatter(
    x=sup_8002['dt'],
    y=sup_8002['hr'],
    mode='markers',
    marker=dict(size=6, color=sup_8002['Zone'].astype('category').cat.codes, colorscale='Viridis'),
    name='HR Zone'
))

fig.update_layout(
    title='Heart Rate Over Time with Zones',
    xaxis_title='Time',
    yaxis_title='Heart Rate (bpm)'
)

fig.show()


In [28]:
import plotly.io as pio
print(pio.renderers)


Renderers configuration
-----------------------
    Default renderer: 'iframe'
    Available renderers:
        ['plotly_mimetype', 'jupyterlab', 'nteract', 'vscode',
         'notebook', 'notebook_connected', 'kaggle', 'azure', 'colab',
         'cocalc', 'databricks', 'json', 'png', 'jpeg', 'jpg', 'svg',
         'pdf', 'browser', 'firefox', 'chrome', 'chromium', 'iframe',
         'iframe_connected', 'sphinx_gallery', 'sphinx_gallery_png']



In [54]:
import pandas as pd
import numpy as np
import plotly.express as px

# 1) Parse & sort your time column
sup_8002['dt'] = pd.to_datetime(sup_8002['dt'])
sup_8002 = sup_8002.sort_values('dt')

# 2) Compute trim thresholds
start_cut = sup_8002['dt'].iloc[0] + pd.Timedelta(minutes=5)
end_cut   = sup_8002['dt'].iloc[-1] - pd.Timedelta(minutes=5)

# 3) Trim the DataFrame
sup_trim = sup_8002[(sup_8002['dt'] >= start_cut) & (sup_8002['dt'] <= end_cut)].copy()

# 4) Re‑build your zone bounds (floored/ceiled integer intervals)
z = zones[zones['BOOST ID'] == 8005].iloc[0]
zone_bounds = [
    (int(np.floor(z['Zone 1 (55-60%)'])), int(np.ceil(z['Unnamed: 6']))),
    (int(np.floor(z['Zone 2 (60-65%)'])), int(np.ceil(z['Unnamed: 8']))),
    (int(np.floor(z['Zone 3 (65-70%)'])), int(np.ceil(z['Unnamed: 10']))),
    (int(np.floor(z['Zone 4 (70-75%)'])), int(np.ceil(z['Unnamed: 12']))),
    (int(np.floor(z['Zone 5 (75-80%)'])), int(np.ceil(z['Unnamed: 14']))),
]

# 5) Assign zones 0 through 6
hr = sup_trim['hr'].values
conds = [
    hr < zone_bounds[0][0],  # Zone 0
]
labels = ['Zone 0']

for i, (low, high) in enumerate(zone_bounds, start=1):
    conds.append((hr >= low) & (hr <= high))
    labels.append(f'Zone {i}')

# Zone 6: above last
conds.append(hr > zone_bounds[-1][1])
labels.append('Zone 6')

sup_trim['Zone'] = np.select(conds, labels, default='Unknown')

# 6) Re‑compute time in each zone (in seconds)
time_in_zones = sup_trim['Zone'].value_counts().sort_index()
print(time_in_zones)

# 7) Plot the trimmed HR trace with color by zone
fig = px.line(
    sup_trim,
    x='dt',
    y='hr',
    color='Zone',
    title='Trimmed HR Over Time by Zone',
    labels={'HR':'Heart Rate (bpm)', 'Time':'Time'},
)
fig.update_traces(mode='markers+lines', marker=dict(size=4), line=dict(width=1))
fig.show()


Zone
Zone 0     22
Zone 1    522
Zone 2    509
Zone 3    666
Zone 4    115
Name: count, dtype: int64


In [55]:
zone_bounds

[(99, 108), (109, 117), (118, 126), (126, 135), (136, 144)]