In [1]:
print('test')

test


In [2]:
#pip install mycolorpy

In [3]:
#pip install colour

In [4]:
#pip install fitparse


In [9]:
from fitparse import FitFile
import pandas as pd
import matplotlib.pyplot as plt
import glob
import os
import numpy as np
import matplotlib.colors as mcolors
import matplotlib.cm as cm
import datetime
from mycolorpy import colorlist as mcp
import numpy as np
import re
import matplotlib.dates as mdates

cycle_starts = [pd.Timestamp("2024-11-23"), pd.Timestamp("2024-12-21"), pd.Timestamp("2025-01-18"), pd.Timestamp("2025-02-11"), pd.Timestamp("2025-03-10"),
               pd.Timestamp("2025-04-08"), pd.Timestamp("2025-05-03"),pd.Timestamp("2025-05-30"),
                pd.Timestamp("2025-06-25"),pd.Timestamp("2025-06-21"),
               ]



In [5]:
class FitDataProcessor:
    def __init__(self, file_path):
        self.file_path = file_path
        self.df, summary = self._read_fit_file_and_summarize()
        
        self._date = summary["start_time"].strftime('%Y-%m-%d')
        if self._date == "2019-01-01":
            print("error in file, set to correct date")
            self._date = "2025-02-17"
        if self._date == "2019-01-04":
            print("error in file, set to correct date")
            self._date = "2025-06-17"
        if self._date == "2019-01-03":
            self._date = "2025-06-18"
            
        
        
        print(self._date)
        self._distance = summary["distance_km"] * 1000 if summary["distance_km"] else None
        self._average_heartrate = summary["avg_heart_rate"]
        self._max_heartrate = summary["max_heart_rate"]
        self._pause = summary["pause_sec"]
        self._elevation = summary["elevation_gain_m"]
        self.compute_cycle_day()
        self.compute_vo2()
        
    @property
    def date(self):
        return self._date

    @property
    def cycle_day(self):
        return self._cycle_day

    @property
    def average_heartrate(self):
        return self._average_heartrate

    @property
    def max_heartrate(self):
        return self._max_heartrate

    @property
    def time(self):
        return (self.df["timestamp"].max() - self.pause) / 60

    @property
    def pace(self):
        minuten = (self.df["timestamp"].max() - self.pause) / 60  
        avg_pace = minuten / (self.distance / 1000)
        return avg_pace

    @property
    def distance(self):
        return self._distance

    @property
    def pause(self):
        return self._pause
    
    def set_pause(self, value):
        self._pause = value

    @property
    def elevation_gain(self):
        return self._elevation

    def compute_cycle_day(self):
        date = pd.Timestamp(self.date)
        cycle_start = max([d for d in cycle_starts if d <= date])
        self._cycle_day = (date - cycle_start).days

    def _read_fit_file_and_summarize(self):
        from fitparse import FitFile
        import numpy as np
        import scipy.ndimage

        fitfile = FitFile(self.file_path)
        records = []

        for record in fitfile.get_messages("record"):
            values = {d.name: d.value for d in record}
            records.append(values)

        records = [r for r in records if "timestamp" in r]
        if len(records) < 2:
            print("Nicht genug Daten.")
            return pd.DataFrame(), {}

        # Build DataFrame for time-based access
        df = pd.DataFrame(records)
        df = df.dropna(subset=["timestamp", "distance", "heart_rate"])
        df["timestamp"] = (pd.to_datetime(df["timestamp"]) - pd.to_datetime(df["timestamp"].iloc[0])).dt.total_seconds()

        timestamps = [r["timestamp"] for r in records]
        heart_rates = [r.get("heart_rate") for r in records if "heart_rate" in r]
        cadences = [r.get("cadence") for r in records if "cadence" in r]
        distances = [r.get("distance") for r in records if "distance" in r]
        altitudes = [r.get("altitude") for r in records if "altitude" in r]

        # Pause-Erkennung
        pause_seconds = 0
        for i in range(1, len(records)):
            r0 = records[i - 1]
            r1 = records[i]
            dt = (r1["timestamp"] - r0["timestamp"]).total_seconds()
            speed = r0.get("speed")
            if speed is None:
                speed = 0.0
            d0 = r0.get("distance")
            d1 = r1.get("distance")
            if d0 is None or d1 is None:
                distance_delta = 0.0
            else:
                distance_delta = d1 - d0

            if speed < 0.3 or (dt > 5 and distance_delta < 1.0):
                pause_seconds += dt

        # Höhenmeter
        elevation_gain_m = None
        if altitudes:
            smoothed_alt = scipy.ndimage.gaussian_filter1d(altitudes, sigma=5)
            diffs = np.diff(smoothed_alt)
            elevation_gain_m = float(np.sum(diffs[diffs > 2]))

        duration_sec = (timestamps[-1] - timestamps[0]).total_seconds()
        distance_km = distances[-1] / 1000 if distances else None

        return df, {
            "start_time": records[0]["timestamp"],
            "duration_sec": duration_sec,
            "pause_sec": pause_seconds,
            "avg_heart_rate": int(np.mean(heart_rates)) if heart_rates else None,
            "max_heart_rate": int(np.max(heart_rates)) if heart_rates else None,
            "avg_cadence": int(np.mean(cadences)) if cadences else None,
            "distance_km": round(distance_km, 2) if distance_km else None,
            "elevation_gain_m": round(elevation_gain_m, 1) if elevation_gain_m else None,
        }

    def pace_to_speed(self, pace_min_per_km):
        return 1000 / pace_min_per_km

    def estimate_vo2(self, speed_m_per_min, incline=0.0):
        return 0.2 * speed_m_per_min + 0.9 * speed_m_per_min * incline + 3.5

    
    def estimate_vo2max(self, vo2, hr, hr_rest=60, hr_max=200):
        hr_reserve = hr_max - hr_rest
        rel_hr = (hr - hr_rest) / hr_reserve
        return vo2 / rel_hr if rel_hr > 0 else None

    def compute_vo2(self, hr_rest=60, hr_max=200):
        speed = self.pace_to_speed(self.pace)
        vo2 = self.estimate_vo2(speed)
        vo2max = self.estimate_vo2max(vo2, self.average_heartrate, hr_rest, hr_max)
        self.vo2max = vo2max


In [7]:
class CollatedFitDataProcessors:
    def __init__(self, folder_path):
        self.folder_path = folder_path
        self.sorted_files, self.sorted_dates = self._get_fit_file_names_and_dates()
        self.processors = [FitDataProcessor(file) for file in self.sorted_files]
    
    def _get_fit_file_names_and_dates(self):
        """Returns sorted lists of FIT file names and their corresponding dates."""
        files = glob.glob(os.path.join(self.folder_path, "Running*.fit"))
        
        file_dates =  [re.search("([0-9]{4}\-[0-9]{2}\-[0-9]{2})", fileName).group(1) for fileName in files]
        
        # Sort files by date
        sorted_files_dates = sorted(zip(files, file_dates), key=lambda x: x[1])
        sorted_files, sorted_dates = zip(*sorted_files_dates) if sorted_files_dates else ([], [])
        
        return list(sorted_files), np.array(sorted_dates)
    
    def _format_pace(self, pace_in_minutes):
        """Convert pace from minutes per kilometer to 'min' 'sec' format."""
        minutes = int(pace_in_minutes)
        seconds = int((pace_in_minutes - minutes) * 60)
        return f"{minutes}' {seconds}\""

    def get_color_for_distance(self):
        c_short = 'tab:orange'
        c_long = 'tab:green'
        c_5 = 'tab:red'
        c_sonst = 'tab:blue'
        colors, markers = [], []
        for p in self.processors:
            c = c_sonst
            if p.distance > 10000:
                c = c_long
            if p.distance < 2500:
                c = c_short
            if p.distance < 5100 and p.distance > 4900:
                c = c_5
            colors.append(c)
            if '2024-12' in p.date:
                markers.append('x')
            if '2025-01' in p.date:
                markers.append('o')
            if '2025-02' in p.date:
                markers.append('+')
            if '2025-03' in p.date:
                markers.append('*')
            if '2025-04' in p.date:
                markers.append('s')
            if '2025-05' in p.date:
                markers.append('x')
        return colors, markers
    
    def plot_heart_rate_vs_pace(self, interval=1000, fit = True,
                               style = 'default'):
        """Plots all heart rate vs pace data from different files with a gradient color scale based on time."""
        if not self.processors:
            print("No data available for plotting.")
            return
        if style == 'default':
            colors=mcp.gen_color(cmap="spring_r",n = len(self.sorted_dates))
            markers = ['x','o','+','s','^','>', '*'] * 10
        else:
            colors, markers = self.get_color_for_distance()
        plt.figure(figsize=(8, 5))
        
        i = 0 
        paces, heartrates = [], []
        for processor in (self.processors):
            paces.append(processor.pace)
            heartrates.append(processor._average_heartrate)
            formatted_pace = self._format_pace(processor.pace)
            label_text = f"{processor.date} ({formatted_pace}, {(processor.distance/1000):.3} km)"
            plt.errorbar(processor.pace, processor._average_heartrate, 
                                  color=colors[i],  label=label_text, 
                         elinewidth = 0.2, markersize = markersize[markers[i]],
                        marker = markers[i])
            i+=1
        
        plt.xlabel("Pace (min/km)")
        plt.ylabel("Heart Rate (bpm)")
        plt.title("Heart Rate as a Function of Pace (Color by Time)")
        plt.grid(True)
        plt.legend(loc="upper left", bbox_to_anchor=(1, 1), ncol = 2, fontsize="small",)
        
        
        if fit: 
            #m, b = np.polyfit(paces, heartrates, 2)
            pace_range = np.linspace(min(paces), max(paces), 1000)
            #fit_line = m * pace_range + b
            z = np.polyfit(paces, heartrates, 3)
            p = np.poly1d(z)
            _ = plt.plot(pace_range, p(pace_range), '-')
            fit_line  = p(pace_range)
            #plt.plot(pace_range, fit_line, color="grey",
            #         linestyle="--", alpha= 0.3)#, label=f"Fit: HR = {m:.2f} * Pace + {b:.2f}")
            distances = []
            for p_x, p_y in zip(paces, heartrates):
                ddx = (pace_range - p_x)
                ddy = (fit_line - p_y)
                dd = np.min(np.sqrt(ddx**2 + ddy**2))
                
                sign = -1
                pace_soll = pace_range[np.argmin(( pace_range- p_x)**2)]
                heartrate_soll =  p(pace_soll)# m * pace_soll + b
                if heartrate_soll > p_y:
                    sign = +1 
                distances.append(dd *sign )
            self.distances = distances
        plt.show()

    def plot_distances_vs_cycle_day(self):
        
        for pro, dist in zip(self.processors, self.distances):
            plt.scatter(pro.cycle_day, dist, color= 'tab:blue')
            #plt.scatter(pro.pace(), dist, color= 'tab:blue')
        plt.scatter(self.processors[-1].cycle_day, self.distances[-1], marker = 'x')
        plt.ylabel("Deviation from Fit [arb. unit]")
        plt.xlabel("Cycle Day")
        plt.gca().set_ylim(np.min(self.distances) -2, 
                           np.max(self.distances) + 2)
        yy = plt.gca().get_ylim()
        
        if True:
            plt.fill_betweenx(y = (yy[0],yy[1]), x1 = 0, x2 = 4,  color = 'tab:red', alpha = 0.2)
            plt.text(-1, yy[1]- 1 , 'Menstruation', color = 'tab:red')
            plt.fill_betweenx(y = (yy[0],yy[1]), x1 = 4, x2 = 13,  color = 'tab:orange', alpha = 0.2)
            plt.text(6.6, yy[1]- 1 , 'Follicular', color = 'tab:orange')
            plt.fill_betweenx(y = (yy[0],yy[1]), x1 = 13, x2 = 15,  color = 'tab:green', alpha = 0.2)
            plt.text(12, yy[1]- 1 , 'Ovulation', color = 'tab:green')
            plt.fill_betweenx(y = (yy[0],yy[1]), x1 = 15, x2 = 28,  color = 'tab:purple', alpha = 0.2)
            plt.text(20, yy[1]- 1 , 'Luteal', color = 'tab:purple')
            
        plt.plot([0,28], [0,0], color = 'grey', alpha = 0.4)
        plt.text(25, 2, 'better',  color = 'grey',)       
        plt.text(25, -2, 'worse',  color = 'grey',)
    
    def select_by_date(self, date):
        for processor in self.processors:
            if processor.date == date:
                return processor
        

In [10]:
# Example usage:
collated_processors = CollatedFitDataProcessors("data/")

2024-12-19
2024-12-19
2025-01-02
2025-01-04
2025-01-14
2025-01-20
2025-01-21
2025-01-23
2025-01-26
2025-01-28
2025-02-03
2025-02-04
2025-02-05
2025-02-08
2025-02-08
2025-02-10
error in file, set to correct date
2025-02-17
2025-02-18
2025-02-19
2025-02-21
2025-02-23
2025-02-24
2025-02-28
2025-03-03
2025-03-06
2025-03-07
2025-03-10
2025-03-11
2025-03-12
2025-03-15
2025-03-20
2025-03-22
2025-03-24
2025-03-26
2025-03-27
2025-03-28
2025-04-01
2025-04-03
2025-04-08
2025-04-10
2025-04-13
2025-04-15
2025-04-17
2025-04-21
2025-04-22
2025-04-24
2025-04-24
2025-04-25
2025-04-28
2025-04-30
2025-05-05
2025-05-08
2025-05-12
2025-05-17
2025-05-18
2025-05-20
2025-05-25
error in file, set to correct date
2025-02-17
2025-06-06
error in file, set to correct date
2025-02-17
2025-06-18
2025-06-22
2025-06-28
2025-07-03


In [11]:

pr = collated_processors.select_by_date('2025-07-03').set_pause(1*60 + 30 )
pr = collated_processors.select_by_date('2025-06-28').set_pause(1*60 + 19 )
#pr = collated_processors.select_by_date('2025-06-26').set_pause(5*60 + 17 )
pr = collated_processors.select_by_date('2025-06-22').set_pause(3*60 + 4 )
#pr = collated_processors.select_by_date('2025-06-17').set_pause(3*60 + 40 )
pr = collated_processors.select_by_date('2025-06-06').set_pause(9*60 + 18 )
#pr = collated_processors.select_by_date('2025-06-04').set_pause(2*60 + 30 )
pr = collated_processors.select_by_date('2025-05-20').set_pause(13*60 + 4 )
pr = collated_processors.select_by_date('2025-05-18').set_pause(9*60   )
pr = collated_processors.select_by_date('2025-05-17').set_pause(6*60   )
pr = collated_processors.select_by_date('2025-05-12').set_pause(0*60 + 38 )
pr = collated_processors.select_by_date('2025-05-08').set_pause(0*60 + 39 )
pr = collated_processors.select_by_date('2025-05-05').set_pause(0*60 + 41 )
pr = collated_processors.select_by_date('2025-04-30').set_pause(8*60 + 46 )
pr = collated_processors.select_by_date('2025-04-28').set_pause(1*60 + 57 )
pr = collated_processors.select_by_date('2025-04-25').set_pause(4*60 + 0 )
pr = collated_processors.select_by_date('2025-04-22').set_pause(1*60 + 45 )
pr = collated_processors.select_by_date('2025-04-21').set_pause(2*60 + 31 )
pr = collated_processors.select_by_date('2025-04-17').set_pause(1*60 + 56 )
pr = collated_processors.select_by_date('2025-04-15').set_pause(1*60 + 31 )
pr = collated_processors.select_by_date('2025-04-13').set_pause(2*60 + 0 )
pr = collated_processors.select_by_date('2025-04-10').set_pause(2*60 + 3 )
pr = collated_processors.select_by_date('2025-04-08').set_pause(1*60 + 36 )
pr = collated_processors.select_by_date('2025-04-03').set_pause(2*60 + 8 )
pr = collated_processors.select_by_date('2025-04-01').set_pause(5*60)
pr = collated_processors.select_by_date('2025-03-28').set_pause(110)
pr = collated_processors.select_by_date('2025-04-03').set_pause(128)
pr = collated_processors.select_by_date('2025-03-27').set_pause(120)
pr = collated_processors.select_by_date('2025-03-26').set_pause(115)
pr = collated_processors.select_by_date('2025-03-26').set_pause(33)
pr = collated_processors.select_by_date('2025-03-24').set_pause(2*60 + 8)
pr = collated_processors.select_by_date('2025-03-22').set_pause(37)
pr = collated_processors.select_by_date('2025-03-20').set_pause(16*60 + 46)
pr = collated_processors.select_by_date('2025-03-15').set_pause(2*60 + 46)
pr = collated_processors.select_by_date('2025-03-12').set_pause(4*60 + 40)
#pr = collated_processors.select_by_date('2025-03-11').set_pause(-(20*60))
pr = collated_processors.select_by_date('2025-03-10').set_pause(120)
pr = collated_processors.select_by_date('2025-03-07').set_pause(146)
pr = collated_processors.select_by_date('2025-03-03').set_pause(11* 60)
pr = collated_processors.select_by_date('2025-02-28').set_pause(15* 60)
pr = collated_processors.select_by_date('2025-02-24').set_pause(90)
pr = collated_processors.select_by_date('2025-02-23').set_pause(8*60 + 33)
pr = collated_processors.select_by_date('2025-02-21').set_pause(15*60 + 25)
pr = collated_processors.select_by_date('2025-02-19').set_pause(2*60 + 17)
pr = collated_processors.select_by_date('2025-02-18').set_pause(1*60 + 12)
pr = collated_processors.select_by_date('2025-02-17').set_pause(8*60 + 53)
pr = collated_processors.select_by_date('2025-02-17').set_pause(8*60 + 53)
pr = collated_processors.select_by_date('2025-02-10').set_pause(2*60 + 21)
pr = collated_processors.select_by_date('2025-02-08').set_pause(1*60 + 12)
pr = collated_processors.select_by_date('2025-02-05').set_pause(1*60 + 2)


In [12]:
import os
import yaml
from pathlib import Path

def save_processors_as_yaml(processors, output_folder="data_compact", overwrite=True):
    os.makedirs(output_folder, exist_ok=True)

    for p in processors:
        date_str = str(p.date)
        filepath = Path(output_folder) / f"{date_str}.yaml"

        run_entry = {
            "date": date_str,
            "distance_km": float(p.distance),
            "time": str(p.time * 60),
            "pace": str(p.pace),
            "heartrate_avg": int(p.average_heartrate),
            "max_heartrate": int(p.max_heartrate),
            "cycle_day": int(p.cycle_day),
            "vo2max": int(p.vo2max),
        }

        # Logik für Overwrite oder Anhängen
        if filepath.exists() and not overwrite:
            with open(filepath, "r") as f:
                day_data = yaml.safe_load(f) or {}
            runs = day_data.get("runs", [])
            runs.append(run_entry)
        else:
            # Datei wird überschrieben (entweder weil sie nicht existiert oder overwrite=True)
            runs = [run_entry]

        # Speichern
        data = {
            "date": date_str,
            "runs": runs
        }

        with open(filepath, "w") as f:
            yaml.dump(data, f, sort_keys=False)

        print(f"{filepath} gespeichert ({len(runs)} Läufe).")
save_processors_as_yaml(collated_processors.processors)


data_compact/2024-12-19.yaml gespeichert (1 Läufe).
data_compact/2024-12-19.yaml gespeichert (1 Läufe).
data_compact/2025-01-02.yaml gespeichert (1 Läufe).
data_compact/2025-01-04.yaml gespeichert (1 Läufe).
data_compact/2025-01-14.yaml gespeichert (1 Läufe).
data_compact/2025-01-20.yaml gespeichert (1 Läufe).
data_compact/2025-01-21.yaml gespeichert (1 Läufe).
data_compact/2025-01-23.yaml gespeichert (1 Läufe).
data_compact/2025-01-26.yaml gespeichert (1 Läufe).
data_compact/2025-01-28.yaml gespeichert (1 Läufe).
data_compact/2025-02-03.yaml gespeichert (1 Läufe).
data_compact/2025-02-04.yaml gespeichert (1 Läufe).
data_compact/2025-02-05.yaml gespeichert (1 Läufe).
data_compact/2025-02-08.yaml gespeichert (1 Läufe).
data_compact/2025-02-08.yaml gespeichert (1 Läufe).
data_compact/2025-02-10.yaml gespeichert (1 Läufe).
data_compact/2025-02-17.yaml gespeichert (1 Läufe).
data_compact/2025-02-18.yaml gespeichert (1 Läufe).
data_compact/2025-02-19.yaml gespeichert (1 Läufe).
data_compact