### Feature extraction with Pyppg

*Date:* 2025-06-19 <br> 
*Author:* salil apte  <br> 
*Version:* 1.0  <br> 
*Filename:* `02-feature-extraction-pyppg.ipynb`

This notebook contains the initial steps in exploring the feature extraction using the [pyPPG](https://pyppg.readthedocs.io) package. It includes extraction of fiducial points, their visualization and extraction of biomarkers.

In [None]:
import pandas as pd
import numpy as np
from pyPPG import PPG, Fiducials, Biomarkers
import pyPPG.preproc as PP
import pyPPG.fiducials as FP
import pyPPG.biomarkers as BM
import pyPPG.ppg_sqi as SQI
from pyPPG.datahandling import plot_fiducials, save_data
import pandas as pd

In [None]:
# Load the training data
df = pd.read_csv(r"E:\repos\vital-sign-estimation\data\raw\train.csv")

In [None]:
ppg_signal = df.iloc[0, :3000].values

signal = PP.Preprocess(fL=0.5, fH=12, order=4, sm_wins={'ppg': 50, 'vpg': 10, 'apg': 10, 'jpg': 10})
signal.ppg, signal.vpg, signal.apg, signal.jpg = signal.get_signals(s=signal_200hz)

In [None]:
from dotmap import DotMap
def load_data_array(input_sig: np.ndarray, fs = np.nan, start_sig = 0, end_sig = -1, use_tk=True, print_flag=True):
    """
    Load raw PPG data.

    :param input_sig: array containing the PPG signal
    :type data_path: ndarray
    :param start_sig: the first sample of the signal to be analysed
    :type start_sig: int
    :param fs: the sampling frequency of the PPG in Hz
    :type fs: int
    :param end_sig: the last sample of the signal to be analysed
    :type end_sig: int
    :param use_tk: a bool for using tkinter interface
    :type use_tk: bool
    :param print_flag: a bool for print message
    :type print_flag: bool

    :return: s: dictionary of the PPG signal:

        * s.start_sig: the first sample of the signal to be analysed
        * s.end_sig: the last sample of the signal to be analysed
        * s.v: a vector of PPG values
        * s.fs: the sampling frequency of the PPG in Hz
        * s.name: name of the record
        * s.v: 1-d array, a vector of PPG values
        * s.fs: the sampling frequency of the PPG in Hz
        * s.ppg: 1-d array, a vector of the filtered PPG values
        * s.vpg: 1-d array, a vector of the filtered PPG' values
        * s.apg: 1-d array, a vector of the filtered PPG" values
        * s.jpg: 1-d array, a vector of the filtered PPG'" values
        * s.filtering: a bool for filtering
        * s.correct: a bool for correcting fiducial points
    """

    sig = input_sig

    if fs<=0:
        fs = 125
        if print_flag: print('The default sampling frequency is 125 Hz for .txt.')

    s = DotMap()

    s.start_sig = start_sig
    if start_sig<end_sig:
        s.end_sig = end_sig
    else:
        s.end_sig = len(sig)

    try:
        s.v=sig[s.start_sig:s.end_sig]
    except:
        raise('There is no valid PPG signal!')

    s.fs=fs
    s.name="default"

    return s

In [None]:
ppg_signal = df.iloc[0, :3000].values
s = load_data(input_sig= ppg_signal, fs = 100, start_sig = 500, end_sig = 2500, use_tk=True, print_flag=True)

In [None]:
s.filtering = True # whether or not to filter the PPG signal
s.fL=0.5000001 # Lower cutoff frequency (Hz)
s.fH=12 # Upper cutoff frequency (Hz)
s.order=4 # Filter order
s.sm_wins={'ppg':50,'vpg':10,'apg':10,'jpg':10} # smoothing windows in millisecond for the PPG, PPG', PPG", and PPG'"

In [None]:
prep = PP.Preprocess(fL=s.fL, fH=s.fH, order=s.order, sm_wins=s.sm_wins)
s.ppg, s.vpg, s.apg, s.jpg = prep.get_signals(s=s)

In [None]:
import matplotlib.pyplot as plt
# setup figure
fig, (ax1,ax2,ax3,ax4) = plt.subplots(4, 1, sharex = True, sharey = False)

# create time vector
t = np.arange(0, len(s.ppg))/s.fs

# plot filtered PPG signal
ax1.plot(t, s.ppg)
ax1.set(xlabel = '', ylabel = 'PPG')

# plot first derivative
ax2.plot(t, s.vpg)
ax2.set(xlabel = '', ylabel = 'PPG\'')

# plot second derivative
ax3.plot(t, s.apg)
ax3.set(xlabel = '', ylabel = 'PPG\'\'')

# plot third derivative
ax4.plot(t, s.jpg)
ax4.set(xlabel = 'Time (s)', ylabel = 'PPG\'\'\'')

# show plot
plt.show()

In [None]:
# Initialise the correction for fiducial points
corr_on = ['on', 'dn', 'dp', 'v', 'w', 'f']
correction=pd.DataFrame()
correction.loc[0, corr_on] = True
s.correction=correction

# Create a PPG class
s = PPG(s)

In [None]:
fpex = FP.FpCollection(s=s)
fiducials = fpex.get_fiducials(s=s)
fp = Fiducials(fp=fiducials)

In [None]:
from pyPPG import PPG, Fiducials, Biomarkers
from pyPPG.datahandling import load_data, plot_fiducials, save_data
import pyPPG.preproc as PP
import pyPPG.fiducials as FP
import pyPPG.biomarkers as BM
import pyPPG.ppg_sqi as SQI
# Create a fiducials class
fp = Fiducials(fp=fiducials)

savingfolder = r"E:\repos\vital-sign-estimation\reports\figures"

# Plot fiducial points
plot_fiducials(s, fp, savingfolder, legend_fontsize=12)

In [None]:
fpex = FP.FpCollection(s=s)
fiducials = fpex.get_fiducials(s=s)
fp = Fiducials(fp=fiducials)
# Init the biomarkers package
bmex = BM.BmCollection(s=s, fp=fp)

# Extract biomarkers
bm_defs, bm_vals, bm_stats = bmex.get_biomarkers()
tmp_keys=bm_stats.keys()
print('Statistics of the biomarkers:')
for i in tmp_keys: print(i,'\n',bm_stats[i])

# Create a biomarkers class
bm = Biomarkers(bm_defs=bm_defs, bm_vals=bm_vals, bm_stats=bm_stats)

In [None]:
x = bm.get_bm()

In [None]:
print(bm_stats)

In [None]:
# Get PPG SQI
ppgSQI = round(np.mean(SQI.get_ppgSQI(ppg=s.ppg, fs=s.fs, annotation=fp.sp)) * 100, 2)
print('Mean PPG SQI: ', ppgSQI, '%')