# notebook for creating funcs for meta data for zones

### goals 
- create zones as bar graphs to see time spent in zones over whole sections
- add summary metrics (total time trained - total sessions - missed sessions)

In [5]:
import os
import pandas as pd
import numpy as np
import json
import re
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "iframe"

In [6]:
def create_data_json(subject, session_type='sup', zones_path='zones.xlsx'):
    # 1) map session_type → directory name
    session_mapping = {
        'sup':'Supervised',
        'uns':'Unsupervised',
        'sub':'Submax'
    }
    stage = session_mapping.get(session_type)
    if stage is None:
        raise ValueError(f"unknown session_type: {session_type!r}")

    # 2) build base_dir
    base_dir = os.path.join(
        '/mnt/lss/Projects/BOOST/InterventionStudy/3-experiment/data/polarhrcsv',
        stage,
        f'sub{subject}'
    )

    # 3) load zones excel & pick subject’s row
    zones_df = pd.read_excel(zones_path)
    row = zones_df[zones_df['BOOST ID']==subject]
    if row.empty:
        raise ValueError(f"No zone info for BOOST ID {subject}")
    z = row.iloc[0]

    # 4) create integer, closed intervals for Zones 1–5
    #    then we’ll add Zone 0 (<Z1_min) and Zone 6 (>Z5_max)
    raw = [
        (z['Zone 1 (55-60%)'], z['Unnamed: 6']),
        (z['Zone 2 (60-65%)'], z['Unnamed: 8']),
        (z['Zone 3 (65-70%)'], z['Unnamed: 10']),
        (z['Zone 4 (70-75%)'], z['Unnamed: 12']),
        (z['Zone 5 (75-80%)'], z['Unnamed: 14']),
    ]
    zone_bounds = []
    for low_f, high_f in raw:
        low_i  = int(np.floor(low_f))
        high_i = int(np.ceil(high_f))
        zone_bounds.append((low_i, high_i))
    # drop the big zones_df
    del zones_df

    # 5) find all CSVs
    files = sorted(f for f in os.listdir(base_dir) if f.lower().endswith('.csv'))

    total_time = 0
    max_session = 0
    overall_zone_time = {f'Zone {i}':0 for i in range(7)}
    file_summaries = []


    for fname in files:
        path = os.path.join(base_dir, fname)
    
        # 1) read with your skipping and header
        df = pd.read_csv(path, skiprows=2, header=0)
    
        # 2) normalize column names: strip whitespace and lowercase
        df.columns = df.columns.str.strip().str.lower()
    
        # 3) rename the HR column (in place)
        if 'hr (bpm)'.lower() in df.columns:
            df.rename(columns={'hr (bpm)'.lower(): 'hr'}, inplace=True)
        elif 'hr' in df.columns:
            # already good
            pass
        else:
            raise KeyError(f"{fname!r} has columns {df.columns.tolist()}, "
                           "but neither 'HR (bpm)' nor 'HR' was found.")
    
        # now you can safely do:
        hr = df['hr'].values
    
        # … build conds/labels and np.select as before …
        conds = []
        labels = []
    
        # Zone 0
        conds.append(hr < zone_bounds[0][0]); labels.append('Zone 0')
        # Zones 1–5
        for i,(low,high) in enumerate(zone_bounds, start=1):
            conds.append((hr>=low)&(hr<=high)); labels.append(f'Zone {i}')
        # Zone 6
        conds.append(hr > zone_bounds[-1][1]); labels.append('Zone 6')
    
        df['Zone'] = np.select(conds, labels, default='Unknown')


        # 7) tally times (1 s per row)
        per_zone = df['Zone'].value_counts().to_dict()
        for zname in overall_zone_time:
            per_zone.setdefault(zname, 0)
            overall_zone_time[zname] += per_zone[zname]

        file_time = len(df)
        total_time += file_time

        # 8) extract session number from filename
        m = re.search(r'_ses(\d+)', fname)
        ses_num = int(m.group(1)) if m else None
        if ses_num and ses_num > max_session:
            max_session = ses_num

        file_summaries.append({
            'file': fname,
            'session_number': ses_num,
            'total_time_sec': file_time,
            'time_per_zone': per_zone
        })

        # 9) free memory
        del df

    # 10) assemble JSONable dict
    result = {
        'subject': subject,
        'session_type': session_type,
        'stage': stage,
        'num_files': len(files),
        'max_session_number': max_session,
        'total_time_sec': total_time,
        'time_per_zone': overall_zone_time,
        'files': file_summaries
    }

    return result
            
    

In [7]:
zones = '/mnt/lss/Projects/BOOST/InterventionStudy/1-projectManagement/participants/ExerciseSessionMaterials/Intervention Materials/BOOST HR ranges.xlsx'
data = create_data_json(8005, session_type='sup', zones_path=zones)


In [8]:
# create heart rate zone bar graphs that are verticallt stacked rounded bars that where zone 6 is at the top and descends
# 2) Extract times, in descending zone order
zone_times = data['time_per_zone']
zones = [f'Zone {i}' for i in range(6, -1, -1)]
times = [zone_times.get(z, 0) for z in zones]

# 3) Build DataFrame
df_plot = pd.DataFrame({
    'Zone': zones,
    'Time (s)': times
})

# 4) Define a color map for each zone
color_map = {
    'Zone 6': '#DC96A3',  # red
    'Zone 5': '#D09877',  # orange
    'Zone 4': '#8ABB88',  # green
    'Zone 3': '#7496D0',  # blue
    'Zone 2': '#9467bd',  # purple
    'Zone 1': '#F9E2AF',  # brown
    'Zone 0': '#7f7f7f',  # gray
}

# 5) Create horizontal bar chart with discrete colors
fig = px.bar(
    df_plot,
    x='Time (s)',
    y='Zone',
    orientation='h',
    color='Zone',
    color_discrete_map=color_map,
    text='Time (s)',
    title='Total Time per HR Zone (across all sessions)'
)

# 6) Keep Zone 6 on top, tidy margins
fig.update_layout(
    #yaxis={'categoryorder':'array', 'categoryarray':zones},
    margin=dict(l=100, r=20, t=50, b=20),
    showlegend=False  # hide duplicate legend entries
)


fig.update_traces(marker_line_width=1, marker_line_color='black')

fig.show()


In [13]:
zones_path = '/mnt/lss/Projects/BOOST/InterventionStudy/1-projectManagement/participants/ExerciseSessionMaterials/Intervention Materials/BOOST HR ranges.xlsx'
subject = 8005
zones_df = pd.read_excel(zones_path)
row = zones_df[zones_df['BOOST ID']==subject]
if row.empty:
    raise ValueError(f"No zone info for BOOST ID {subject}")
z = row.iloc[0]

# 4) create integer, closed intervals for Zones 1–5
#    then we’ll add Zone 0 (<Z1_min) and Zone 6 (>Z5_max)
raw = [
    (z['Zone 1 (55-60%)'], z['Unnamed: 6']),
    (z['Zone 2 (60-65%)'], z['Unnamed: 8']),
    (z['Zone 3 (65-70%)'], z['Unnamed: 10']),
    (z['Zone 4 (70-75%)'], z['Unnamed: 12']),
    (z['Zone 5 (75-80%)'], z['Unnamed: 14']),
]

# drop the big zones_df

In [14]:
zones = [{'zone': i+1, 'low': low, 'high': high} for i, (low, high) in enumerate(raw)]

In [16]:
zones

[{'zone': 1, 'low': np.float64(99.00000000000001), 'high': np.float64(108.0)},
 {'zone': 2, 'low': np.float64(109.0), 'high': np.float64(117.0)},
 {'zone': 3, 'low': np.float64(118.0), 'high': np.float64(125.99999999999999)},
 {'zone': 4, 'low': np.float64(126.99999999999999), 'high': np.float64(135.0)},
 {'zone': 5, 'low': np.float64(136.0), 'high': np.float64(144.0)}]

In [21]:
def fix_midpoint_snap_integer(zones):
    fixed = [zones[0].copy()]
    for i in range(1, len(zones)):
        prev = fixed[-1]
        curr = zones[i].copy()

        # Midpoint rounded to integer
        m = round((prev['high'] + curr['low']) / 2)

        # Stitch by snapping both ends to midpoint
        prev['high'] = m
        curr['low'] = m

        fixed.append(curr)
    # Final rounding to ensure integer 'low'/'high'
    for z in fixed:
        z['low'] = int(round(z['low']))
        z['high'] = int(round(z['high']))-1
    return fixed

zones = fix_midpoint_snap_integer(zones)
zones

[{'zone': 1, 'low': 99, 'high': 107},
 {'zone': 2, 'low': 108, 'high': 117},
 {'zone': 3, 'low': 118, 'high': 125},
 {'zone': 4, 'low': 126, 'high': 135},
 {'zone': 5, 'low': 136, 'high': 143}]

In [18]:
def fix_proportional(zones):
    fixed = [zones[0].copy()]
    for i in range(1, len(zones)):
        prev = fixed[-1]
        curr = zones[i].copy()
        delta = curr['low'] - prev['high']  # positive = gap, negative = overlap
        if delta != 0:
            w1 = prev['high'] - prev['low']
            w2 = curr['high'] - curr['low']
            adj1 = delta * (w1 / (w1 + w2))
            adj2 = delta * (w2 / (w1 + w2))
            prev['high'] += adj1
            curr['low'] -= adj2
        fixed.append(curr)
    return fixed

fix_proportional(zones)

[{'zone': 1,
  'low': np.float64(99.00000000000001),
  'high': np.float64(108.52941176470588)},
 {'zone': 2,
  'low': np.float64(108.52941176470588),
  'high': np.float64(117.51428571428572)},
 {'zone': 3,
  'low': np.float64(117.51428571428572),
  'high': np.float64(126.51473136915077)},
 {'zone': 4,
  'low': np.float64(126.51473136915077),
  'high': np.float64(135.51471825063078)},
 {'zone': 5, 'low': np.float64(135.51471825063078), 'high': np.float64(144.0)}]