## Parameters for all Patients 

In [None]:
from __future__ import division, print_function

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from collections import OrderedDict
from scipy.signal import savgol_filter
from scipy.interpolate import InterpolatedUnivariateSpline

Massage spreadsheet data into a dictionary of patients indexed by integer participant number and with each entry a dataframe with LA and LV volumes indexed as time from trigger in millisecs.

There is also a dataframe of RR and frame intervals intervals indexed on participant number.

In [None]:
volumes = 'FullVolumes.xlsx'
rrintervals = 'RRIntervals.xlsx'

df_vols = pd.read_excel(volumes)
df_rrintervals = pd.read_excel(rrintervals).reset_index(drop=True)
df_rrintervals.index = ['RR_ms']

df_la = df_vols.filter(regex='LA-[0-9]{3,3}$')
df_lv = df_vols.filter(regex='LV-[0-9]{3,3}$')

# map index to integers
df_times = df_rrintervals.filter(regex='LV-[0-9]{3,3}$').T
df_times['PN'] = [int(i[-3:]) for i in df_times.index]
df_times = df_times.set_index('PN')

nrows = df_la.count(axis=0)[0]
df_times['Frame_interval_ms'] = df_times['RR_ms'] / nrows
patients = {}
for patno in [int(col[-3:]) for col in df_la]:
    patients[patno] = pd.concat([
        pd.Series(np.arange(nrows) * df_times['Frame_interval_ms'][patno], name='Time'),
        df_lv['LV-%d' % patno],
        df_la['LA-%d' % patno],
    ], axis=1).set_index('Time')
    patients[patno].columns = ('LV', 'LA')
    patients[patno].pat_name = 'Patient %d' % patno

Parameters of Interest. Work on numpy arrays of volumes without the frame rate so all time values are indices ie in 'frames' rather than millisecs.

In [None]:
def lv_es_index(volumes):
    return np.argmin(volumes)

def lv_split_at_es(volumes):
    es = lv_es_index(volumes)
    return volumes[:es], volumes[es:]

def lv_vol_at_es(volumes):
    return volumes[lv_es_index(volumes)]

def lv_vol_at_ss(volumes):
    return volumes[0]

def lv_vol_at_ed(volumes):
    return volumes[-1]

def idx_nearest(array, value, after=0):
    return np.abs(array[after:]-value).argmin() + after

def lv_idx_for_refill80_full(volumes, use_vss=True):
    vss = lv_vol_at_ss(volumes)
    ves = lv_vol_at_es(volumes)
    ved = lv_vol_at_ed(volumes)
    
    fullvol = vss if use_vss else (vss + ved) / 2
    emptyvol = ves
    
    eighty_vol = 0.2*emptyvol + 0.8*fullvol
    return idx_nearest(volumes, value=eighty_vol, after=lv_es_index(volumes))

def lv_idx_max_systolic_down_slope(volumes, slopes):
    esi = lv_es_index(volumes)
    systolic_down_slopes = -slopes[:esi]
    return systolic_down_slopes.argmax()

def lv_idx_max_recovery_slopes(volumes, slopes):
    esi = lv_es_index(volumes)
    # need to further split into early and late.
    idx80 = lv_idx_for_refill80_full(volumes)
    
    diastolic_early_up_slopes = slopes[esi:idx80]
    diastolic_late_up_slopes = slopes[idx80:]
    idx_early = diastolic_early_up_slopes.argmax() + esi
    idx_late = diastolic_late_up_slopes.argmax() + idx80
    
    return idx_early, idx_late

def la_es_index(volumes):
    return np.argmax(volumes)

def la_split_at_es(volumes):
    es = la_es_index(volumes)
    return volumes[:es], volumes[es:]

def la_vol_at_es(volumes):
    return volumes[la_es_index(volumes)]

def la_vol_at_es(volumes, es):
    return volumes[es]

def la_vol_at_ss(volumes):
    return volumes[0]

def la_vol_at_ed(volumes):
    return volumes[-1]

def la_idx_max_systolic_up_slope(volumes, slopes):
    esi = la_es_index(volumes)
    systolic_down_slopes = slopes[:esi]
    return systolic_down_slopes.argmax()

def la_idx_max_emptying_slopes(volumes, slopes):
    esi = la_es_index(volumes)

    # need to further split into early and late.
    idxsplit = esi + (len(volumes) - esi) // 2
    
    diastolic_early_up_slopes = -slopes[esi:idxsplit]
    diastolic_late_up_slopes = -slopes[idxsplit:]
    idx_early = diastolic_early_up_slopes.argmax() + esi
    idx_late = diastolic_late_up_slopes.argmax() + idxsplit
    
    return idx_early, idx_late


In [None]:
def lv_params(patient, region='LV'):
    units, time_units = 'ml', 'ms'
    patient_name = patient.pat_name
    
    volume_series = patient[region]
    time = np.asarray(volume_series.index)
    volume = np.asarray(volume_series.values) 
    diffs = np.gradient(volume) / np.diff(time)[0]
    r_to_r = time[-1]
    sg_volume     = savgol_filter(x=volume, window_length=5, polyorder=2, mode='interp')
    dt_msecs      = np.mean(np.diff(time))
    # one order higher for the derivative sounds right .. increase window to match
    sg_derivative = savgol_filter(x=volume, window_length=7, polyorder=3, deriv=1, delta=dt_msecs)

    spline = InterpolatedUnivariateSpline(time, sg_volume)
    spline_deriv = InterpolatedUnivariateSpline(time, sg_derivative)

    interpolated_time   = np.linspace(0, time[-1], len(time)*3)
    interpolated_savgol = spline(interpolated_time)
    interpolated_deriv  = spline_deriv(interpolated_time)

    min_vol_index = lv_es_index(interpolated_savgol)
    min_vol_time  = interpolated_time[min_vol_index]
    min_volume    = interpolated_savgol[min_vol_index]
    eighty_percent_idx = lv_idx_for_refill80_full(interpolated_savgol)
    eighty_percent_time = interpolated_time[eighty_percent_idx]
    eighty_percent_vol = interpolated_savgol[eighty_percent_idx]

    max_emptying_index = lv_idx_max_systolic_down_slope(interpolated_savgol, interpolated_deriv)
    max_emptying_time = interpolated_time[max_emptying_index]
    max_emptying_vol = interpolated_savgol[max_emptying_index]
    max_emptying_slope = interpolated_deriv[max_emptying_index]

    max_early_filling_index, max_late_filling_index = lv_idx_max_recovery_slopes(interpolated_savgol, interpolated_deriv)
    max_early_filling_time = interpolated_time[max_early_filling_index]
    max_late_filling_time = interpolated_time[max_late_filling_index]

    max_early_filling_vol = interpolated_savgol[max_early_filling_index]
    max_late_filling_vol = interpolated_savgol[max_late_filling_index]

    max_early_filling_slope = interpolated_deriv[max_early_filling_index]    
    max_late_filling_slope = interpolated_deriv[max_late_filling_index]    
    diastolic_time = r_to_r - min_vol_time
    
    return OrderedDict([
        ('LV_time_end_systole', min_vol_time),
        ('LV_peak_ejection_ml_sec', (-1000 *max_emptying_slope)),
        ('LV_time_peak_ejection',  max_emptying_time),
        ('LV_vol_peak_ejection', max_emptying_vol),
        ('LV_early_peak_filling_ml_sec', (1000 *max_early_filling_slope)),
        ('LV_time_early_peak_filling', (max_early_filling_time - min_vol_time)),
        ('LV_vol_early_peak_filling', max_early_filling_vol),
        ('LV_late_peak_filling_ml_sec', (1000 *max_late_filling_slope)),
        ('LV_time_late_peak_filling', (max_late_filling_time - min_vol_time)),
        ('LV_vol_late_peak_filling', max_late_filling_vol),
        ('LV_time_fill80', (eighty_percent_time - min_vol_time)),
        ('LV_ratio_fill80', (eighty_percent_time - min_vol_time) / diastolic_time)
   ])


In [None]:
params178 = lv_params(patients[178])

In [None]:
def la_params(patient, region='LA'):
    units, time_units = 'ml', 'ms'
    patient_name = patient.pat_name
    
    volume_series = patient[region]
    time = np.asarray(volume_series.index)
    volume = np.asarray(volume_series.values) 
    diffs = np.gradient(volume) / np.diff(time)[0]
    r_to_r = time[-1]
    sg_volume     = savgol_filter(x=volume, window_length=5, polyorder=2, mode='interp')
    dt_msecs      = np.mean(np.diff(time))
    # one order higher for the derivative sounds right .. increase window to match
    sg_derivative = savgol_filter(x=volume, window_length=7, polyorder=3, deriv=1, mode='interp', delta=dt_msecs)

    spline = InterpolatedUnivariateSpline(time, sg_volume)
    spline_deriv = InterpolatedUnivariateSpline(time, sg_derivative)

    interpolated_time   = np.linspace(0, time[-1], len(time)*3)
    interpolated_savgol = spline(interpolated_time)
    interpolated_deriv  = spline_deriv(interpolated_time)

    max_vol_index = la_es_index(interpolated_savgol)
    max_vol_time  = interpolated_time[max_vol_index]
    max_volume    = interpolated_savgol[max_vol_index]

    max_filling_index = la_idx_max_systolic_up_slope(interpolated_savgol, interpolated_deriv)
    max_filling_time = interpolated_time[max_filling_index]
    max_filling_vol = interpolated_savgol[max_filling_index]
    max_filling_slope = interpolated_deriv[max_filling_index]
    
    max_early_emptying_index, max_late_emptying_index = la_idx_max_emptying_slopes(interpolated_savgol, interpolated_deriv)

    max_early_emptying_time = interpolated_time[max_early_emptying_index]
    max_early_emptying_vol = interpolated_savgol[max_early_emptying_index]
    max_early_emptying_slope = interpolated_deriv[max_early_emptying_index]    

    max_late_emptying_time = interpolated_time[max_late_emptying_index]
    max_late_emptying_vol = interpolated_savgol[max_late_emptying_index]
    max_late_emptying_slope = interpolated_deriv[max_late_emptying_index]    

    return OrderedDict([
        ('LA_time_end_systole', max_vol_time),
        ('LA_peak_filling_ml_sec', (1000 * max_filling_slope)),
        ('LA_time_peak_filling', max_filling_time),
        ('LA_vol_peak_filling', max_filling_vol),
        ('LA_early_peak_emptying_ml_sec', (-1000 *max_early_emptying_slope)),
        ('LA_time_early_peak_emptying', (max_early_emptying_time - max_vol_time)),
        ('LA_vol_early_peak_emptying', max_early_emptying_vol),
        ('LA_late_peak_emptying_ml_sec', (-1000 *max_late_emptying_slope)),
        ('LA_time_late_peak_emptying', (max_late_emptying_time - max_vol_time)),
        ('LA_vol_late_peak_emptying', max_late_emptying_vol),
    ])

In [None]:
params178.update(la_params(patients[178]))

In [None]:
def all_params(patient):
    params =lv_params(patient)
    params.update(la_params(patient))
    return params

results = {k: all_params(p) for (k, p) in patients.items()}

In [None]:
df_results = pd.DataFrame(results).T
df_results.insert(0, 'RR_ms', df_times['RR_ms'])
df_results.head(20).T

We need some quality control - some values are dubious here: eg 182, 192, 209 ...

 - 182
   - LV
     - fails to recover to 80%
     - misses early filling as as later is steeper
   - LA
     - missidentifies ES as second peak higher
     - final slope down too steep as seems just at final point (edge behaviour of SG?)

- Need something better to distinguish early and late phases ...
  - could get multiple peaks with some sort of peak finder and choose two highest
  - will need some sort of robust peak finder ....

In [None]:
df_results[df_results['LV_ratio_fill80'] > 0.95]