In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from reader import samples_reader

In [2]:
''' 
This file contains a sample dataset that includes timestamp and pixel arrays.
The pixel array has a dimension of (1145, 5, 5, 3),
which represents (number of measurements, number of pixels in x axis,  number of pixels in y axis, rgb),
with cells containing the r/g/b value.

Files from SID OptiBP measurements can be downloaded from optibp-indo.appspot.com in Cloud Storage,
Accessible from OptiBP-Indo project under sid-indonesia.org Google Cloud Platform.
'''

filename = r"./sample.zip"
samples = samples_reader.read_file(filename)

In [3]:
# Initialize a dictionary with the timestamps
valBin = {'t': samples.timestamps.tolist()}

# Loop through each dimension of the pixel array: width (x), height (y), and color channels (r(c = 0) / g(c = 1) / b(c = 2)))
for x in range(samples.pixels.shape[1]):
    for y in range(samples.pixels.shape[2]):
        for c in range(samples.pixels.shape[3]):
            # Create a dictionary entry for the pixel values over time at a specific (x, y, c) location
            val = {f'{x}_{y}_{c}': [samples.pixels[t, x, y, c] for t in range(samples.pixels.shape[0])]}
            # Merge the new dictionary entry into the existing valBin dictionary
            valBin = valBin | val

# Convert the dictionary into a pandas DataFrame
valBin = pd.DataFrame.from_dict(valBin)

# Compute the difference between each row and the previous row, adding a prefix 'd_' to each column name
difBin = valBin.diff().add_prefix('d_')

# valBin.to_excel(r'waveform sample.xlsx')
# difBin.to_excel(r'waveform sample delta.xlsx')

In [4]:
# Explanation from OptiBp developer team

'''
The color represent the wavelength that is being detected by the red, blue and green sensors on the CMOS array of the camera.

The pixels are either 25 specifically selected pixels across the CMOS array, or more likely the averages of pixels in 25 ‘zones’,
for each image frame captured by the camera each 0.033 second (i.e., 30 frames per second).
Your determination is correct that 0_0_0 means the pixel or area at x=0 and y=0 for the red CMOS channel.

The pixel sampling is being done because each finger placement may be a bit different, so the average signal across the image may be more reliable for the analysis.

Hemoglobin, which is the main molecule being detected by the camera as blood pulses through the finger, has a maximum absorption in the green and red ranges,
so those channels will tend to decrease when blood volume expands at the skin surface with each pulse (please see attached paper figure 1).
There is likely not much useful information in the blue channel, and it is probably why the script plots 2_2_1 as that is one of the green channels.

The time, e.g., 7 seconds, to start assessing blood pressure is based on some stability in the images,
and so when there is some minimal delta across all 25 pixels or areas and for a certain amount of time,
the image frames are then initiated into the real time algorithm for blood pressure.
'''

'\nThe color represent the wavelength that is being detected by the red, blue and green sensors on the CMOS array of the camera.\n\nThe pixels are either 25 specifically selected pixels across the CMOS array, or more likely the averages of pixels in 25 ‘zones’,\nfor each image frame captured by the camera each 0.033 second (i.e., 30 frames per second).\nYour determination is correct that 0_0_0 means the pixel or area at x=0 and y=0 for the red CMOS channel.\n\nThe pixel sampling is being done because each finger placement may be a bit different, so the average signal across the image may be more reliable for the analysis.\n\nHemoglobin, which is the main molecule being detected by the camera as blood pulses through the finger, has a maximum absorption in the green and red ranges,\nso those channels will tend to decrease when blood volume expands at the skin surface with each pulse (please see attached paper figure 1).\nThere is likely not much useful information in the blue channel, an

In [5]:
# Select the rows from difBin where the corresponding timestamp in valBin is greater than or equal to samples.recording_start
# Specifically, select the 'd_2_2_1' column and reset its index
sampleSelect = difBin[valBin['t'] >= samples.recording_start]['d_2_2_1'].reset_index()

# Create a Series that identifies peaks in the 'd_2_2_1' column
# A peak is identified where the current value is greater than 0 and the next value is less than 0
sampleIsPeak = pd.Series(
    [(sampleSelect['d_2_2_1'][i] > 0) & (sampleSelect['d_2_2_1'][i + 1] < 0) for i in sampleSelect.index if i != sampleSelect.index.max()],
    name='is_peak'
)

# Concatenate the is_peak Series to sampleSelect DataFrame along the columns
sampleSelect = pd.concat([sampleSelect, sampleIsPeak], axis=1)

# Find the timestamps in valBin corresponding to the indices where 'is_peak' is True
# Subtract samples.recording_start to get the peak times relative to the start of recording
samplePeaks = valBin['t'].loc[sampleSelect[sampleSelect['is_peak'] == True]['index'].to_list()] - samples.recording_start
samplePeaks.reset_index(drop=True, inplace=True)

# Calculate the differences between consecutive peaks and add a prefix 'd_' to the column names
sampleDelta = samplePeaks.diff().to_frame().add_prefix('d_')

# Concatenate the peak times and their differences into a single DataFrame
samplePeaks = pd.concat([samplePeaks, sampleDelta], axis=1)

# Calculate the upper and lower boundaries for classifying peak intervals
# Upper boundary: mean of the peak differences + 1 standard deviation
# Lower boundary: mean of the peak differences - 1 standard deviation
upperBoundary = samplePeaks['d_t'].mean() + 1 * samplePeaks['d_t'].std()
lowerBoundary = samplePeaks['d_t'].mean() - 1 * samplePeaks['d_t'].std()

# Classify each peak interval as 'over', 'under', or 'within' based on the boundaries
samplePeaks['d_t_class'] = np.where(samplePeaks['d_t'] > upperBoundary, 'over', 
                                    np.where(samplePeaks['d_t'] < lowerBoundary, 'under', 'within'))

In [6]:
# See the attached image for the definitions of each letter class

# Initialize an empty list to store the classifications
pikBin = []

# Iterate over the indices of samplePeaks DataFrame
for i in samplePeaks.index:
    if i != samplePeaks.index.min() and i != samplePeaks.index.max():
        # Check the current and next classification and append corresponding letter to pikBin
        if samplePeaks['d_t_class'][i] == 'over' and samplePeaks['d_t_class'][i + 1] == 'over':
            pikBin.append('A')
        elif samplePeaks['d_t_class'][i] == 'over' and samplePeaks['d_t_class'][i + 1] == 'under':
            pikBin.append('B')
        elif samplePeaks['d_t_class'][i] == 'over' and samplePeaks['d_t_class'][i + 1] == 'within':
            pikBin.append('C')
        elif samplePeaks['d_t_class'][i] == 'under' and samplePeaks['d_t_class'][i + 1] == 'under':
            pikBin.append('D')
        elif samplePeaks['d_t_class'][i] == 'under' and samplePeaks['d_t_class'][i + 1] == 'over':
            pikBin.append('E')
        elif samplePeaks['d_t_class'][i] == 'under' and samplePeaks['d_t_class'][i + 1] == 'within':
            pikBin.append('F')
        elif samplePeaks['d_t_class'][i] == 'within' and samplePeaks['d_t_class'][i + 1] == 'within':
            pikBin.append('G')
        elif samplePeaks['d_t_class'][i] == 'within' and samplePeaks['d_t_class'][i + 1] == 'over':
            pikBin.append('H')
        elif samplePeaks['d_t_class'][i] == 'within' and samplePeaks['d_t_class'][i + 1] == 'under':
            pikBin.append('I')
    else:
        # Append 'not used' for the first and last index
        pikBin.append('not used')

# Add the classification to the samplePeaks DataFrame
samplePeaks['peak_class'] = pikBin

# Concatenate the classifications into a single string, excluding 'not used'
pikStr = ''.join([l for l in pikBin if l != 'not used'])

print(pikStr)

CGGIFGGIFGGGIFGIFIDFGIFIFHACGIFGHACIFG
