# Test Your Algorithm

## Instructions
1. From the **Pulse Rate Algorithm** Notebook you can do one of the following:
   - Copy over all the **Code** section to the following Code block.
   - Download as a Python (`.py`) and copy the code to the following Code block.
2. In the bottom right, click the <span style="color:blue">Test Run</span> button. 

### Didn't Pass
If your code didn't pass the test, go back to the previous Concept or to your local setup and continue iterating on your algorithm and try to bring your training error down before testing again.

### Pass
If your code passes the test, complete the following! You **must** include a screenshot of your code and the Test being **Passed**. Here is what the starter filler code looks like when the test is run and should be similar. A passed test will include in the notebook a green outline plus a box with **Test passed:** and in the Results bar at the bottom the progress bar will be at 100% plus a checkmark with **All cells passed**.
![Example](example.png)

1. Take a screenshot of your code passing the test, make sure it is in the format `.png`. If not a `.png` image, you will have to edit the Markdown render the image after Step 3. Here is an example of what the `passed.png` would look like 
2. Upload the screenshot to the same folder or directory as this jupyter notebook.
3. Rename the screenshot to `passed.png` and it should show up below.
![Passed](passed.png)
4. Download this jupyter notebook as a `.pdf` file. 
5. Continue to Part 2 of the Project. 

In [None]:
# replace the code below with your pulse rate algorithm
import glob

import numpy as np
import scipy as sp
import scipy.signal
import scipy.io
import matplotlib.pyplot as plt

# Constants for bandpass filtering
LOW_BPM = 40
HIGH_BPM = 220

def LoadTroikaDataset():
    """
    Retrieve the .mat filenames for the troika dataset.

    Review the README in ./datasets/troika/ to understand the organization of the .mat files.

    Returns:
        data_fls: Names of the .mat files that contain signal data
        ref_fls: Names of the .mat files that contain reference data
        <data_fls> and <ref_fls> are ordered correspondingly, so that ref_fls[5] is the 
            reference data for data_fls[5], etc...
    """
    data_dir = "./datasets/troika/training_data"
    data_fls = sorted(glob.glob(data_dir + "/DATA_*.mat"))
    ref_fls = sorted(glob.glob(data_dir + "/REF_*.mat"))
    return data_fls, ref_fls

def LoadTroikaDataFile(data_fl):
    """
    Loads and extracts signals from a troika data file.

    Usage:
        data_fls, ref_fls = LoadTroikaDataset()
        ppg, accx, accy, accz = LoadTroikaDataFile(data_fls[0])

    Args:
        data_fl: (str) filepath to a troika .mat file.

    Returns:
        numpy arrays for ppg, accx, accy, accz signals.
    """
    data = sp.io.loadmat(data_fl)['sig']
    return data[2:]

def AggregateErrorMetric(pr_errors, confidence_est):
    """
    Computes an aggregate error metric based on confidence estimates.

    Computes the MAE at 90% availability. 

    Args:
        pr_errors: a numpy array of errors between pulse rate estimates and corresponding 
            reference heart rates.
        confidence_est: a numpy array of confidence estimates for each pulse rate
            error.

    Returns:
        the MAE at 90% availability
    """
    # Higher confidence means a better estimate. The best 90% of the estimates
    #    are above the 10th percentile confidence.
    percentile90_confidence = np.percentile(confidence_est, 10)

    # Find the errors of the best pulse rate estimates
    best_estimates = pr_errors[confidence_est >= percentile90_confidence]

    # Return the mean absolute error
    return np.mean(np.abs(best_estimates))

def Evaluate():
    """
    Top-level function evaluation function.

    Runs the pulse rate algorithm on the Troika dataset and returns an aggregate error metric.

    Returns:
        Pulse rate error on the Troika dataset. See AggregateErrorMetric.
    """
    # Retrieve dataset files
    data_fls, ref_fls = LoadTroikaDataset()
    errs, confs = [], []
    for data_fl, ref_fl in zip(data_fls, ref_fls):
        # Run the pulse rate algorithm on each trial in the dataset
        errors, confidence = RunPulseRateAlgorithm(data_fl, ref_fl)
        errs.append(errors)
        confs.append(confidence)
        # Compute aggregate error metric
    errs = np.hstack(errs)
    confs = np.hstack(confs)
    return AggregateErrorMetric(errs, confs)

def BandpassFilter(sig, freq_filter=(LOW_BPM / 60, HIGH_BPM / 60), fs=125):
    """Bandpass Filter.

    Args:
        signal: (np.array) The input signal
        pass_band: (tuple) The pass band
        Frequency components outside two elements in the tuple will be removed
        fs: (number) The sampling rate of <signal>

    Returns:
        (np.array) The filtered signal
    """
    b, a = scipy.signal.butter(3, freq_filter, btype='bandpass', fs=fs)
    return scipy.signal.filtfilt(b, a, sig)


def ProcessSignal(sig, fs):
    """
    Takes the fourier transform of the signal within the frequency filter window

    Args:
        signal: (np.array) The input signal
        fs: (number) The sampling rate of <signal>

    Returns:
        (np.array) The fourier transform of the signal
        (np.array) Signal frequencies
    """
    
    freqs = np.fft.rfftfreq(len(sig), 1/fs)
    fft = np.abs(np.fft.rfft(sig))
    
    fft[freqs <= LOW_BPM/60.0] = 0.0
    fft[freqs >= HIGH_BPM/60.0] = 0.0
    
    return fft, freqs

def RunPulseRateAlgorithm(data_fl, ref_fl):
    """
    Algorithm to estimate heart rate and return an error and confidence value for each estimate. Error is with respect to reference heart rate.
    The algorithm:
    (1) Processes the ppg signal through a bandpass filter
    (2) Aggregates the 3 channel accelerometer signal into a single acc signal and processes it through the bandpass filter
    (3) For each 8-second sliding window with a 6-second overlap between successive windows:
        (i) Takes the fourier transforms of the ppg and acc signals and sorts the frequencies in descending order of the FFT magnitudes.
        This gives us the dominant frequencies for the ppg and accelerometer signals. 
        (ii) If the dominant ppg frequency is greater than the dominant accelerometer frequency by at least 0.1,
        take the dominant ppg frequency as the heart rate estimate/second.
        (iii) Else if the next dominant ppg frequency is not in the top 5 dominant acc frequencies,
        take this ppg frequency as the heart rate estimate/second.
        (iv) Else if the next dominant ppg frequency is not in the top 5 dominant acc frequencies,
        take this ppg frequency as the heart rate estimate/second.
        (v) If none of the above conditions are true, then simply take the most dominant ppg frequency as the heart rate estimate.
        (vi) Compute the confidence by totalling the fft magnitudes for frequencies near the above estimate and
        dividing it by the sum of the entire magnitude of frequencies for the ppg signal.
        (vii) Compute error by taking the difference between the heart rate estimate and reference value for the same window.
     
    Usage:
        errors, confidence = RunPulseRateAlgorithm(data_fl, ref_fl)

    Args:
        data_fl: (str) filepath to a troika .mat file containing ppg and acc signals.
        ref_fl: (str) filepath to the reference or "ground truth" heart rate corresponding to the data_fl

    Returns:
        2-D numpy arrays for errors and confidence. 
        Each entry represents an error and confidence value for an 8-second signal window with 6-second overlap between successive windows.
    """
    
    
    # Initialize constants
    fs = 125
    pks = []
    pks_bpm = []
    errors = []
    confidence = []
    
    window_length = fs * 8   # 8 second window
    window_shift = fs * 2   # 2 second outputs
    window_confidence = 5/60   # frequency window around peak frequency (estimated heart rate) to calculate signal power 
    
    # Load signals
    ppg, accx, accy, accz = LoadTroikaDataFile(data_fl)
    ppg = BandpassFilter(ppg)
    
    # Aggregate the accelerometer signal. Since the y-channel measures gravity, we normalize it
    acc = np.sqrt(accx**2 + (accy - np.mean(accy))**2 + accz**2)
    acc = BandpassFilter(acc)
        
    ref_pks = sp.io.loadmat(ref_fl)['BPM0']
    for i in range(0, len(ppg) - window_length, window_shift):
        ppg_window = ppg[i:i + window_length]
        acc_window = acc[i:i + window_length]

        fft_ppg, freqs_ppg = ProcessSignal(ppg_window, fs)
    
        # Sort for dominant ppg frequencies 
        order = np.argsort(np.abs(fft_ppg))[::-1]
        most_imp_freqs_ppg = list(freqs_ppg[order])[:5]
    
        fft_acc, freqs_acc = ProcessSignal(acc_window, fs)
    
        # Sort for dominant acc frequencies
        order = np.argsort(np.abs(fft_acc))[::-1]
        most_imp_freqs_acc = list(freqs_acc[order])[:5]
    
        # Heuristic to estimate heart rate from ppg and acc dominant frequencies
        threshold = 0.1
        if (most_imp_freqs_ppg[0] - most_imp_freqs_acc[0] > threshold):
            peak_freq = most_imp_freqs_ppg[0]
        elif (most_imp_freqs_ppg[1] not in most_imp_freqs_acc):
            peak_freq = most_imp_freqs_ppg[1]
        elif (most_imp_freqs_ppg[2] not in most_imp_freqs_acc):
            peak_freq = most_imp_freqs_ppg[2]
        else:
            peak_freq = most_imp_freqs_acc[0]
            
        pks.append(peak_freq)
        
        # Find confidence
        fundamental_frequency_window = (freqs_ppg > peak_freq - window_confidence) & (freqs_ppg > peak_freq + window_confidence)
        ppg_power = np.sum(fft_ppg[fundamental_frequency_window])
        total_power = np.sum(fft_ppg)
        conf = ppg_power / total_power
        confidence.append(conf)
    
    # Trim the estimated peaks and confidence array to the size of reference array
    ref_fl_len = len(ref_pks)
    pks = pks[:ref_fl_len]
    confidence = confidence[:ref_fl_len]
    
    # Convert cycles/sec to cycles/min or bpm 
    pks_bpm = [element * 60 for element in pks]
    
    # Compute element-wise error between estimated and reference heart rates
    zip_object = zip(pks_bpm, ref_pks)
    for pks_i, ref_i in zip_object:
        error = np.abs(pks_i-ref_i)[0]
        errors.append(error)

    # Return per-estimate mean absolute error and confidence as a 2-tuple of numpy arrays.
    return errors, confidence