In [2]:
import pandas as pd
import numpy as np
from scipy.signal import welch
from scipy.signal import find_peaks
from scipy.integrate import trapz


# HRV

In [3]:
# Load RR intervals from the file
rr_intervals = pd.read_csv('PCS27_CTRL_RRIntervals.csv')  # Replace with your file name
rr_intervals = rr_intervals[' rr'].values  # Assuming the column is named 'RR_intervals'

# Calculate SDNN
sdnn = np.std(rr_intervals)

# Calculate RMSSD
differences = np.diff(rr_intervals)
rmssd = np.sqrt(np.mean(differences ** 2))

# Calculate Heart Rate
heart_rate = 60000 / np.mean(rr_intervals)  # Assuming RR intervals are in milliseconds

# Calculate LF and HF
time = np.cumsum(rr_intervals) / 1000.0  # Convert to seconds
fs = 4.0  # Sampling frequency (Hz)
f, psd = welch(rr_intervals, fs=fs, nperseg=1024)
lf_band = (0.04, 0.15)  # Low-frequency band
hf_band = (0.15, 0.4)   # High-frequency band
lf_power = np.trapz(psd[(f >= lf_band[0]) & (f <= lf_band[1])], f[(f >= lf_band[0]) & (f <= lf_band[1])])
hf_power = np.trapz(psd[(f >= hf_band[0]) & (f <= hf_band[1])], f[(f >= hf_band[0]) & (f <= hf_band[1])])
total_power = lf_power + hf_power
lf_percent = (lf_power / total_power) * 100
hf_percent = (hf_power / total_power) * 100
lf_hf_ratio = lf_power / hf_power

print(f"SDNN: {sdnn:.2f} ms")
print(f"RMSSD: {rmssd:.2f} ms")
print(f"Heart Rate: {heart_rate:.2f} bpm")
print(f"LF%: {lf_percent:.2f}%")
print(f"HF%: {hf_percent:.2f}%")
print(f"LF/HF Ratio: {lf_hf_ratio:.2f}")

SDNN: 40.03 ms
RMSSD: 53.40 ms
Heart Rate: 74.29 bpm
LF%: 32.68%
HF%: 67.32%
LF/HF Ratio: 0.49


  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,


# CPET

## Anaerobic Threshold

In [4]:

# Import CPET data for anaerobic threshold analysis
df_cpet = pd.read_csv("PCS01_V1_CPET.csv", header=0)

# Remove any spaces in the names of the columns
df_cpet.columns = df_cpet.columns.str.replace(' ', '')

# Display the first few rows
print(df_cpet.head())


# Assuming df_cpet has columns: 'VO2', 'VCO2', 'VE', 'VE/VO2', 'VE/VCO2'
# calculate the first derivative of VE/VO2 and VE/VCO2 to find the inflection points.

# Calculate the derivative of VE/VO2 and VE/VCO2
df_cpet['dVE_VO2'] = np.gradient(df_cpet['VE/VO2'])
df_cpet['dVE_VCO2'] = np.gradient(df_cpet['VE/VCO2'])

# Find peaks in the derivative which correspond to the anaerobic threshold
peaks_VO2 = find_peaks(df_cpet['dVE_VO2'])[0]
peaks_VCO2 = find_peaks(df_cpet['dVE_VCO2'])[0]

# Assuming the anaerobic threshold is at the first peak (can be adjusted based on the data)
at_VO2 = df_cpet['VO2'].iloc[peaks_VO2[0]] if len(peaks_VO2) > 0 else None

print(f"Anaerobic Threshold (VO2): {at_VO2}")


          TIME       VO2       VO2/kg      METS         VCO2         VE  \
0   ----------       NaN          NaN       NaN          NaN        NaN   
1  1.027833343  0.221697  2.917063951  0.833447  0.171730533   6.180395   
2   2.00733304  0.326049  4.290122509  1.225749  0.239537001   8.347985   
3  3.044999599  0.506384  6.662941456  1.903698  0.331292003  10.755207   
4   4.02566576  0.567372  7.465418816  2.132977   0.36837855  11.916273   

           RER  WorkR        Vt       FEO2     FECO2   HR      VE.1  \
0          NaN    NaN       NaN        NaN       NaN  NaN       NaN   
1  0.774618685    0.0  0.529368  16.817022  3.392320  0.0  5.123333   
2  0.734664977    0.0  0.511053  16.490217  3.501923  0.0  6.920189   
3  0.654231369    0.0  0.744021  15.671541  3.756616  0.0  8.915692   
4  0.649271846    0.0  0.730368  15.618137  3.770022  0.0  9.878176   

      VE/VO2    VE/VCO2  
0        NaN        NaN  
1  27.877684  35.988911  
2  25.603445  34.850506  
3  21.239250  32.4

# V2

In [19]:
import pandas as pd
import numpy as np
from scipy.signal import find_peaks
from sklearn.linear_model import LinearRegression

# Import and clean data
df_cpet = pd.read_csv("PCS01_V1_CPET.csv", header=0)
df_cpet.columns = df_cpet.columns.str.replace(' ', '')
print(df_cpet.head())

# Dictionary to store results
results = {}

def safe_max(series):
    """Handle max calculation with NaN/inf values"""
    clean_series = pd.to_numeric(series, errors='coerce').replace([np.inf, -np.inf], np.nan)
    return clean_series.max()

# Find row with peak VO2 for parameter calculations
if 'VO2' in df_cpet.columns:
    vo2_peak_abs = safe_max(df_cpet['VO2'])
    peak_vo2_row = df_cpet[df_cpet['VO2'] == vo2_peak_abs].iloc[0] if not pd.isna(vo2_peak_abs) else None
else:
    peak_vo2_row = None

# Anaerobic Threshold Calculation (2 decimal places)
try:
    if all(col in df_cpet.columns for col in ['VE/VO2', 'VE/VCO2', 'VO2']):
        df_cpet['dVE_VO2'] = np.gradient(df_cpet['VE/VO2'])
        df_cpet['dVE_VCO2'] = np.gradient(df_cpet['VE/VCO2'])
        
        peaks_VO2 = find_peaks(df_cpet['dVE_VO2'])[0]
        at_VO2 = df_cpet['VO2'].iloc[peaks_VO2[0]] if len(peaks_VO2) > 0 else None
        results['Anaerobic Threshold (VO2)'] = f"{at_VO2:.2f} mL/min" if at_VO2 else "Could not determine"
    else:
        missing = [col for col in ['VE/VO2', 'VE/VCO2', 'VO2'] if col not in df_cpet.columns]
        results['Anaerobic Threshold (VO2)'] = f"Missing columns: {', '.join(missing)}"
except Exception as e:
    results['Anaerobic Threshold (VO2)'] = f"Calculation error: {str(e)}"

# VO2 Calculations (2 decimal places)
if 'VO2' in df_cpet.columns:
    results['VO2 Peak (absolute)'] = f"{vo2_peak_abs:.2f} mL/min" if not pd.isna(vo2_peak_abs) else "Invalid VO2 data"
    
    if 'VO2/kg' in df_cpet.columns:
        vo2_peak_rel = peak_vo2_row['VO2/kg'] if peak_vo2_row is not None else safe_max(df_cpet['VO2/kg'])
        results['VO2 Peak (relative)'] = f"{vo2_peak_rel:.2f} mL/kg/min" if not pd.isna(vo2_peak_rel) else "Invalid VO2/kg data"
    else:
        results['VO2 Peak (relative)'] = "VO2/kg column missing"
else:
    results['VO2 Peak (absolute)'] = "VO2 column missing"
    results['VO2 Peak (relative)'] = "VO2 column missing"

# Parameter calculations at peak VO2 (all to 2 decimal places)
parameter_map = {
    'Peak HR': ('HR', 'bpm'),
    'Peak VE': ('VE', 'L/min'),
    'Peak VE/VO2': ('VE/VO2', ''),
    'Peak O2 Pulse': ('O2_Pulse', 'mL/beat'),
    'Peak RER': ('RER', ''),
    'Peak Respiratory Rate': ('RR', 'breaths/min'),
    'Peak PetCO2': ('PetCO2', 'mmHg')
}

for param, (col, unit) in parameter_map.items():
    if col in df_cpet.columns:
        if peak_vo2_row is not None:
            value = peak_vo2_row[col]
            results[param] = f"{value:.2f} {unit}" if unit else f"{value:.2f}"
        else:
            # Fallback to max value if peak VO2 row not available
            value = safe_max(df_cpet[col]) if 'Peak' in param else df_cpet[col].iloc[-1]
            results[param] = f"{value:.2f} {unit}" if not pd.isna(value) else f"Invalid {col} data"
    else:
        results[param] = f"{col} column missing"

# Calculate O2_Pulse if not in original data but we have VO2 and HR
if 'O2_Pulse' not in df_cpet.columns and all(col in df_cpet.columns for col in ['VO2', 'HR']):
    df_cpet['O2_Pulse'] = np.where(df_cpet['HR'] > 0, df_cpet['VO2']/df_cpet['HR'], np.nan)

# Print formatted results
print("\nCPET Analysis Results:")
max_len = max(len(k) for k in results.keys())
for param in sorted(results.keys()):
    print(f"{param.ljust(max_len)} : {results[param]}")

       TIME       VO2    VO2/kg      METS      VCO2         VE       RER  \
0  1.027833  0.221697  2.917064  0.833447  0.171731   6.180395  0.774619   
1  2.007333  0.326049  4.290123  1.225749  0.239537   8.347985  0.734665   
2  3.045000  0.506384  6.662941  1.903698  0.331292  10.755207  0.654231   
3  4.025666  0.567372  7.465419  2.132977  0.368379  11.916273  0.649272   
4  5.001165  0.575461  7.571852  2.163386  0.383137  12.328949  0.665792   

   WorkR        Vt       FEO2     FECO2   HR       VE.1     VE/VO2    VE/VCO2  
0    0.0  0.529368  16.817022  3.392320  0.0   5.123333  27.877684  35.988911  
1    0.0  0.511053  16.490217  3.501923  0.0   6.920189  25.603445  34.850506  
2    0.0  0.744021  15.671541  3.756616  0.0   8.915692  21.239250  32.464432  
3    0.0  0.730368  15.618137  3.770022  0.0   9.878176  21.002581  32.347900  
4    0.0  0.707464  15.703459  3.789552  0.0  10.220269  21.424484  32.178925  

CPET Analysis Results:
Anaerobic Threshold (VO2) : 0.58 mL/min

In [17]:
import pandas as pd
import numpy as np
from scipy.signal import find_peaks
from sklearn.linear_model import LinearRegression

def process_cpet_report(file_path):
    try:
        # Read the Excel file without headers first
        df_raw = pd.read_excel(file_path, header=None)
        
        # Find the start of the data table (look for 'TIME' in first column)
        start_row = None
        for i in range(len(df_raw)):
            if df_raw.iloc[i, 0] == 'TIME':
                start_row = i
                break
                
        if start_row is None:
            raise ValueError("Could not find 'TIME' in first column to identify data start")
        
        # Get the three header rows
        header_row1 = df_raw.iloc[start_row].values
        header_row2 = df_raw.iloc[start_row+1].values
        header_row3 = df_raw.iloc[start_row+2].values
        
        # Combine headers into single row
        combined_headers = []
        for i in range(len(header_row1)):
            # Combine the three header parts
            col_name = f"{str(header_row1[i]).strip()}"
            if pd.notna(header_row2[i]):
                col_name += f" {str(header_row2[i]).strip()}"
            if pd.notna(header_row3[i]):
                col_name += f" {str(header_row3[i]).strip()}"
            combined_headers.append(col_name.strip())
        
        # Read the data with combined headers
        df = pd.read_excel(file_path, header=start_row+2)  # Skip the three header rows
        df.columns = [col.split()[0] for col in combined_headers]  # Use just the main name
        
        # Clean column names
        df.columns = [col.strip() for col in df.columns]
        
        # Convert all columns to numeric, coerce errors to NaN
        for col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
        
        # Drop rows with all NaN values (footer/header rows)
        df = df.dropna(how='all')
        
        # Try to extract VE/VCO2 slope from footer
        ve_vco2_slope = None
        for i in range(len(df_raw)-5, len(df_raw)):  # Check last 5 rows
            row = df_raw.iloc[i].values
            for val in row:
                if isinstance(val, str) and ('VE/VCO2' in val or 'slope' in val):
                    try:
                        # Extract numeric value from string
                        parts = val.split()
                        for part in parts:
                            try:
                                ve_vco2_slope = float(part)
                                break
                            except ValueError:
                                continue
                    except:
                        pass
        
        return df, ve_vco2_slope
        
    except Exception as e:
        raise ValueError(f"Error processing Excel file: {str(e)}")

def analyze_cpet_data(df, ve_vco2_slope=None):
    results = {}
    
    # Find row with peak VO2
    if 'VO2' in df.columns:
        vo2_peak_abs = df['VO2'].max()
        peak_vo2_row = df[df['VO2'] == vo2_peak_abs].iloc[0] if not pd.isna(vo2_peak_abs) else None
    else:
        peak_vo2_row = None
    
    # Anaerobic Threshold Calculation
    try:
        if all(col in df.columns for col in ['VE/VO2', 'VE/VCO2', 'VO2']):
            df['dVE_VO2'] = np.gradient(df['VE/VO2'])
            df['dVE_VCO2'] = np.gradient(df['VE/VCO2'])
            
            peaks_VO2 = find_peaks(df['dVE_VO2'])[0]
            at_VO2 = df['VO2'].iloc[peaks_VO2[0]] if len(peaks_VO2) > 0 else None
            results['Anaerobic Threshold (VO2)'] = f"{at_VO2:.2f} mL/min" if at_VO2 else "Could not determine"
        else:
            missing = [col for col in ['VE/VO2', 'VE/VCO2', 'VO2'] if col not in df.columns]
            results['Anaerobic Threshold (VO2)'] = f"Missing columns: {', '.join(missing)}"
    except Exception as e:
        results['Anaerobic Threshold (VO2)'] = f"Calculation error: {str(e)}"
    
    # VO2 Calculations
    if 'VO2' in df.columns:
        results['VO2 Peak (absolute)'] = f"{vo2_peak_abs:.2f} mL/min"
        
        if 'VO2/kg' in df.columns:
            vo2_peak_rel = peak_vo2_row['VO2/kg'] if peak_vo2_row is not None else df['VO2/kg'].max()
            results['VO2 Peak (relative)'] = f"{vo2_peak_rel:.2f} mL/kg/min"
        else:
            results['VO2 Peak (relative)'] = "VO2/kg column missing"
    else:
        results['VO2 Peak (absolute)'] = "VO2 column missing"
        results['VO2 Peak (relative)'] = "VO2 column missing"
    
    # VE/VCO2 Calculations
    if ve_vco2_slope is not None:
        results['VE/VCO2 slope'] = f"{ve_vco2_slope:.2f} (from report footer)"
    elif all(col in df.columns for col in ['VE', 'VCO2']):
        try:
            # Calculate VE/VCO2 slope using linear regression
            X = df['VCO2'].values.reshape(-1, 1)
            y = df['VE'].values
            model = LinearRegression().fit(X, y)
            ve_vco2_slope_calc = model.coef_[0]
            results['VE/VCO2 slope'] = f"{ve_vco2_slope_calc:.2f} (calculated)"
        except:
            results['VE/VCO2 slope'] = "Calculation error"
    else:
        results['VE/VCO2 slope'] = "Missing VE or VCO2 columns"
    
    # Parameter calculations at peak VO2
    parameter_map = {
        'Peak O2 Pulse': ('O2_Pulse', 'mL/beat'),
        'Peak RER': ('RER', ''),
        'Peak Respiratory Rate': ('RR', 'breaths/min'),
        'PetCO2 at peak': ('PetCO2', 'mmHg')
    }
    
    for param, (col, unit) in parameter_map.items():
        if col in df.columns:
            if peak_vo2_row is not None:
                value = peak_vo2_row[col]
                results[param] = f"{value:.2f} {unit}" if unit else f"{value:.2f}"
            else:
                value = df[col].max()
                results[param] = f"{value:.2f} {unit}" if not pd.isna(value) else f"Invalid {col} data"
        else:
            results[param] = f"{col} column missing"
    
    # Calculate O2_Pulse if not in original data but we have VO2 and HR
    if 'O2_Pulse' not in df.columns and all(col in df.columns for col in ['VO2', 'HR']):
        df['O2_Pulse'] = np.where(df['HR'] > 0, df['VO2']/df['HR'], np.nan)
    
    return results

# Example usage:
file_path = "PCS01_V1_CPET.xls"
try:
    df_cpet, ve_vco2_slope = process_cpet_report(file_path)
    results = analyze_cpet_data(df_cpet, ve_vco2_slope)
    
    # Print results
    print("\nCPET Analysis Results:")
    max_len = max(len(k) for k in results.keys())
    for param in sorted(results.keys()):
        print(f"{param.ljust(max_len)} : {results[param]}")
        
    # Optionally save the cleaned data
    df_cpet.to_csv("cleaned_cpet_data.csv", index=False)
    print("\nCleaned data saved to cleaned_cpet_data.csv")
    
except Exception as e:
    print(f"Error processing file: {str(e)}")

Error processing file: Error processing Excel file: Excel file format cannot be determined, you must specify an engine manually.


# FMD

In [15]:
# Import the FMD dataset as df_fmd

filename = "PCS04_V2_FMD.csv"

df_fmd2 = pd.read_csv(filename, engine='python')


df_fmd = pd.read_csv(filename, skiprows=7, skipfooter=2, engine='python')

df_fmd.head()

Unnamed: 0,Time [min:sec],Mean Diameter [mm],Positive Shear Rate [sec-1],Negative Shear Rate [sec-1],Positive Velocity [cm/sec],Negative Velocity [cm/sec],Unnamed: 6,Time [ms],Instant Diameter [mm],Mean Diameter [mm].1,Positive Velocity [cm/sec].1,Negative Velocity [cm/sec].1
0,00:00,4.27,308.9,-61.9,32.97,-6.61,,0,4.273,4.27,32.97,-6.61
1,00:01,4.27,284.2,-68.8,30.34,-7.34,,33,4.271,4.27,32.88,-6.64
2,00:02,4.269,268.7,-68.9,28.67,-7.35,,66,4.278,4.27,32.82,-6.65
3,00:03,4.269,267.8,-70.4,28.58,-7.51,,100,4.279,4.271,32.78,-6.67
4,00:04,4.268,262.1,-70.2,27.96,-7.49,,133,4.275,4.271,32.7,-6.7


In [16]:
# Convert 'Time [min:sec]' to total seconds
df_fmd[['Minutes', 'Seconds']] = df_fmd['Time [min:sec]'].str.split(':', expand=True).astype(float)
df_fmd['Time_seconds'] = df_fmd['Minutes'] * 60 + df_fmd['Seconds']
df_fmd.drop(columns=['Minutes', 'Seconds'], inplace=True)

# Remove rows with missing values in key columns
df_fmd_clean = df_fmd.dropna(subset=['Mean Diameter [mm]', 'Positive Shear Rate [sec-1]', 
                                      'Negative Shear Rate [sec-1]', 'Positive Velocity [cm/sec]', 
                                      'Negative Velocity [cm/sec]'])

# Define baseline and hyperemia periods based on time
baseline_period = df_fmd_clean[df_fmd_clean['Time_seconds'] < 60]  # First 60 sec
hyperemia_period = df_fmd_clean[(df_fmd_clean['Time_seconds'] >= 420) & (df_fmd_clean['Time_seconds'] < 480)]  # 7 to 8 min

# Diameter Baseline
diameter_baseline = baseline_period['Mean Diameter [mm]'].mean() / 10  # Convert mm to cm

# Diameter Maximum
df_fmd['Mean Diameter [mm]'] = pd.to_numeric(df_fmd['Mean Diameter [mm]'], errors='coerce')
diameter_max = hyperemia_period['Mean Diameter [mm]'].max() / 10  # Convert mm to cm

# FMD Percent
fmd_percent = ((diameter_max - diameter_baseline) / diameter_baseline) * 100

# Shear Rate
shear_rate_baseline = df_fmd2['Positive Shear Rate Baseline [sec-1]'][0]
shear_rate_maximum = df_fmd2['Positive Shear Rate Maximum [sec-1]'][0]
shear_rate_areatomaximum = df_fmd2['Positive Shear Rate Area to Maximum []'][0]

# Hyperemia Shear Rate Max AUC
shear_rate_auc = trapz(4 * hyperemia_period['Positive Velocity [cm/sec]'] / hyperemia_period['Mean Diameter [mm]'])

# Mean Velocity
velocity_baseline = df_fmd2['Positive Velocity Baseline [cm/sec]'][0]
velocity_maximum = df_fmd2['Positive Velocity Maximum [cm/sec]'][0]
velocity_mean = df_fmd['Positive Velocity [cm/sec]'].mean()

# Flow Rate
flow_rate = (3.1416 * ((df_fmd['Mean Diameter [mm]'] / 10) / 2) ** 2 * df_fmd['Positive Velocity [cm/sec]'] * 60).mean()

# FMD Normalized
fmd_normalized = fmd_percent / shear_rate_auc

print(f"Diameter Baseline (cm): {diameter_baseline:.3f}")
print(f"Diameter Maximum (cm): {diameter_max:.3f}")
print(f"FMD (%): {fmd_percent:.2f}")
print(f"Shear Rate Baseline (s⁻¹): {shear_rate_baseline:.2f}")
print(f"Shear Rate Maximum (s⁻¹): {shear_rate_maximum:.2f}")
print(f"Shear Rate Area to Maximum (s⁻¹): {shear_rate_areatomaximum:.2f}")
print(f"Hyperemia Shear Rate Max (AUC): {shear_rate_auc:.2f}")
print(f"Baseline Velocity (cm/s): {velocity_baseline:.2f}")
print(f"Maximum Velocity (cm/s): {velocity_maximum:.2f}")
print(f"Mean Velocity (cm/s): {velocity_mean:.2f}")
print(f"Flow Rate (ml/min): {flow_rate:.2f}")
print(f"FMD Normalized: {fmd_normalized:.5f}")


Diameter Baseline (cm): 0.427
Diameter Maximum (cm): 0.475
FMD (%): 11.32
Shear Rate Baseline (s⁻¹): 325.15
Shear Rate Maximum (s⁻¹): 894.22
Shear Rate Area to Maximum (s⁻¹): 19562.50
Hyperemia Shear Rate Max (AUC): 2197.76
Baseline Velocity (cm/s): 34.70
Maximum Velocity (cm/s): 97.44
Mean Velocity (cm/s): 29.97
Flow Rate (ml/min): 282.98
FMD Normalized: 0.00515


  shear_rate_auc = trapz(4 * hyperemia_period['Positive Velocity [cm/sec]'] / hyperemia_period['Mean Diameter [mm]'])


[]
