# To run prediction on 3 averaged beats

Each averaged beat has a slightly longer length than the average beat.

1. Find average length of the beat per subject
2. Use 1/3 from left and 2/3 from right to trim the average.
3. Use 3 (or n) beats to concatenate
4. Fill the array with offset from each subject and channel

In [None]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import sys
sys.path.append("../")

from evaluate_script import predict

# The following code will not work without running the cells below the "END" comment

In [None]:
df_out = pd.read_csv("../data/mult_average_beats_preds.csv")
df_out.head()

In [None]:
df_out['nn_predicted_age'].corr(df_out['3_avg_beats_pred']).round(4)

In [None]:
df_out['nn_predicted_age'].corr(df_out['5_avg_beats_pred']).round(4)

In [None]:
df_out['nn_predicted_age'].corr(df_out['8_avg_beats_pred']).round(4)

In [None]:
n_beats = 3
plt.scatter(df_out['nn_predicted_age'], df_out[f'{n_beats}_avg_beats_pred'], alpha=0.2)
plt.xlabel("Original Prediction")
plt.ylabel(f"{n_beats} averaged beats Prediction")
plt.show()

In [None]:
n_beats = 5
plt.scatter(df_out['nn_predicted_age'], df_out[f'{n_beats}_avg_beats_pred'], alpha=0.2)
plt.xlabel("Original Prediction")
plt.ylabel(f"{n_beats} averaged beats Prediction")
plt.show()

In [None]:
n_beats = 8
plt.scatter(df_out['nn_predicted_age'], df_out[f'{n_beats}_avg_beats_pred'], alpha=0.2)
plt.xlabel("Original Prediction")
plt.ylabel(f"{n_beats} averaged beats Prediction")
plt.show()

In [None]:
x = 3
y = 5
plt.scatter(df_out[f'{x}_avg_beats_pred'], df_out[f'{y}_avg_beats_pred'], alpha=0.2)
plt.xlabel(f"{x} beats prediction")
plt.ylabel(f"{y} beats prediction")
plt.show()

In [None]:
x = 5
y = 8
plt.scatter(df_out[f'{x}_avg_beats_pred'], df_out[f'{y}_avg_beats_pred'], alpha=0.2)
plt.xlabel(f"{x} beats prediction")
plt.ylabel(f"{y} beats prediction")
plt.show()


In [None]:
x = 3
y = 8
plt.scatter(df_out[f'{x}_avg_beats_pred'], df_out[f'{y}_avg_beats_pred'], alpha=0.2)
plt.xlabel(f"{x} beats prediction")
plt.ylabel(f"{y} beats prediction")
plt.show()


# END

Run the following code to generate the dataframe used above

In [None]:
df = pd.read_csv('../data/exams.csv')
df.head()

### Stop
Before running this cell, run the script `python one_beat.py`

In [None]:
df_meta = pd.read_csv("../data/average_beat_metadata.csv")
df_meta.head()

In [None]:
channel_dict = {}
for idx, chan in zip(df_meta['data_arr_idx'], df_meta['channel_used']):
    channel_dict[idx] = chan

# dictionary of ids and channels
channel_dict[4]

In [None]:
df_peaks = pd.read_csv("../data/beats_summary_frame.csv")
col = 'Unnamed: 0'
if col in df_peaks.columns:
    df_peaks.drop(columns=col, inplace=True)
df_peaks.head(1).T

In [None]:
peaks_dict = {}
for idx, chan in channel_dict.items():
    peaks = df_peaks[
        (df_peaks['subject'] == idx) &
        (df_peaks['channel'] == chan)
    ]['peaks'].values[0]
    peaks = [int(item) for item in peaks.replace("[", "").replace("]", "").split()]
    peaks_dict[idx] = peaks

In [None]:
# Calculate average beat length for each subject

In [None]:
avg_beat_dict = {}

for subject, peaks in peaks_dict.items():    
    diff = []
    for i in range(len(peaks) - 1):
        diff.append(peaks[i + 1] - peaks[i])
    
    avg_beat_length = int(np.array(diff).mean())
    avg_beat_dict[subject] = avg_beat_length

In [None]:
df_beats = pd.DataFrame.from_dict(avg_beat_dict, orient='index').reset_index()
df_beats.columns = ['subject', 'average_beat_len']
df_beats.loc[:, 'before_peak'] = (df_beats['average_beat_len'] * 0.33).astype(int)


In [None]:
df_beats

In [None]:
df_out = df_meta[['exam_id', 'data_arr_idx', 'channel_used']].merge(
    df[['exam_id', 'age', 'nn_predicted_age']],
    how='left',
    on=['exam_id']
)
# removing these to save memory! :-(
del df
del df_meta
del df_peaks

Trim and n beats

In [None]:
def concat_n_averaged_beats(data_array, n_beats, df_beats):
    """
    data_array: input array of a single averaged beat per channel, per subject
    n_beats: how many averaged beats to concatenate
    df_beats: metadata for the subjects, with average_beat_len, and where to start the before_peak
    """

    PEAK_AT = 2048

    for i in range(len(data_array)):
    
        avg_beat_len = int(df_beats.iloc[i]['average_beat_len'])
        before = int(df_beats.iloc[i]['before_peak'])
        after = avg_beat_len - before
    
        data_array[i, :PEAK_AT - before, :] = 0
        data_array[i, PEAK_AT +after:, :] = 0
    
        for chan in range(12):
            avg_beat = data_array[i, PEAK_AT - before: PEAK_AT + after, chan]
            avg_beat_n = np.tile(avg_beat, n_beats)
    
            fill_before = data_array[i, PEAK_AT - before, chan]
            fill_after = data_array[i, PEAK_AT + after, chan]
    
            centre = int(len(avg_beat_n) / 2)
            left_over = len(avg_beat_n) - centre
    
            data_array[i, PEAK_AT - centre: PEAK_AT + left_over, chan] = avg_beat_n
    
            data_array[i, :PEAK_AT - centre, chan] = fill_before
            data_array[i, PEAK_AT + left_over:, chan] = fill_after
    return data_array

In [None]:
data_array = np.load("../data/one_beat_array.npy")
n_beats = 3
data_array = concat_n_averaged_beats(data_array, n_beats=n_beats, df_beats=df_beats)

# predictions in the dataframe
df_out.loc[:, f'{n_beats}_avg_beats_pred'] = predict(data_array)

In [None]:
i = 0
chan = 10
plt.title(f"Plotting {n_beats} times averaged beat appended")
plt.plot(data_array[i, :, chan])
plt.show()

In [None]:
i = 23
chan = 3
plt.title(f"Plotting {n_beats} times averaged beat appended")
plt.plot(data_array[i, :, chan])
plt.show()

In [None]:
plt.scatter(df_out['nn_predicted_age'], df_out[f'{n_beats}_avg_beats_pred'], alpha=0.2)
plt.xlabel("Original Prediction")
plt.ylabel(f"{n_beats} averaged beats Prediction")
plt.show()

In [None]:
data_array = np.load("../data/one_beat_array.npy")
n_beats = 5
data_array = concat_n_averaged_beats(data_array, n_beats=n_beats, df_beats=df_beats)

# predictions in the dataframe
df_out.loc[:, f'{n_beats}_avg_beats_pred'] = predict(data_array)

In [None]:
i = 23
chan = 3
plt.title(f"Plotting {n_beats} times averaged beat appended")
plt.plot(data_array[i, :, chan])
plt.show()

In [None]:
plt.scatter(df_out['nn_predicted_age'], df_out[f'{n_beats}_avg_beats_pred'], alpha=0.2)
plt.xlabel("Original Prediction")
plt.ylabel(f"{n_beats} averaged beats Prediction")
plt.show()

In [None]:
plt.scatter(df_out['3_avg_beats_pred'], df_out['5_avg_beats_pred'], alpha=0.2)
plt.xlabel("3 beats prediction")
plt.ylabel("5 beats prediction")
plt.show()


In [None]:
data_array = np.load("../data/one_beat_array.npy")
n_beats = 8
data_array = concat_n_averaged_beats(data_array, n_beats=n_beats, df_beats=df_beats)

# predictions in the dataframe
df_out.loc[:, f'{n_beats}_avg_beats_pred'] = predict(data_array)

plt.scatter(df_out['nn_predicted_age'], df_out[f'{n_beats}_avg_beats_pred'], alpha=0.2)
plt.xlabel("Original Prediction")
plt.ylabel(f"{n_beats} averaged beats Prediction")
plt.show()

In [None]:
plt.scatter(df_out['5_avg_beats_pred'], df_out['8_avg_beats_pred'], alpha=0.2)
plt.xlabel("5 beats prediction")
plt.ylabel("8 beats prediction")
plt.show()


In [None]:
plt.scatter(df_out['3_avg_beats_pred'], df_out['8_avg_beats_pred'], alpha=0.2)
plt.xlabel("3 beats prediction")
plt.ylabel("8 beats prediction")
plt.show()


In [None]:
df_out.head()

In [None]:
df_out.to_csv("../data/mult_average_beats_preds.csv", index=False)