# Libraries

In [None]:
import joblib

import pandas 			 as pd
import numpy 			 as np
import matplotlib.pyplot as plt

import scipy.signal 	 as signal
import scipy.stats 		 as stats

from sklearn.metrics import mean_squared_error

# ECG Definitions

ECG file headers

In [None]:
ecgHeaders = [
	"sample",
	"LI", 
	"LII", 
	"LIII", 
	"aVR", 
	"aVL",
	"aVF", 
	"V1",
	"V2",
	"V3",
	"V4",
	"V5",
	"V6"
]

Loading ECGs labeled as acceptable

In [None]:
ecgsDataset = []

with open("../../data/labels/RECORDS-acceptable") as file:
	ecgsDataset = map(
		lambda ecgName: ecgName.strip(), 
		file.readlines()
	)
	ecgsDataset = list(ecgsDataset) 

	print("Dataset len =", len(ecgsDataset))

ECG preprocessing function

- highpass butterworth filter with $ f_c = 1$ Hz
- Truncation in the tails of $N = 500$ samples

In [None]:
order = 2
fc = 1
fs = 500

b, a = signal.butter(
	N = order,
	Wn = fc,
	btype = "highpass",
	fs = fs
)

print("Numerator: ",   b)
print("Denominator: ", a)

In [None]:
def preprocessing(ecgDf):
	ecgFiltred = ecgDf.apply(
		lambda serie: 2.5 * signal.filtfilt(b, a, serie), 
		axis = 0
	)
	ecgCroped = ecgFiltred.iloc[500: -500]

	return ecgCroped

# Linear Regression Definitions

Features Vector

In [None]:
ecgFeatures = [
	"LI", 
	"aVF", 
	"V2"
]

Target Vector

In [None]:
ecgTarget = [
	"LII", 
	"LIII", 
	"aVR", 
	"aVL",
	"V1",
	"V3",
	"V4",
	"V5",
	"V6"
]

Loading Linear Regression Model and Scaler

In [None]:
linearRegression = joblib.load("../../models/v0/reglin-v0.pkl")

print(linearRegression.coef_)
print(linearRegression.intercept_)

In [None]:
scaler = joblib.load("../../models/v0/reglin-v0.scaler.pkl")

print(scaler.mean_)
print(scaler.var_)

# Graphical analysis of reconstruction performance

Calculating Pearson Correlations and Root Mean Square Error for each ECG

In [None]:
correlations = pd.DataFrame(columns = ecgTarget)
mseScores 	 = pd.DataFrame(columns = ecgTarget)

for ecgID in ecgsDataset:
	
	# Load and clean ECG file

	ecg      = pd.read_csv(
		filepath_or_buffer = f"../../data/set-a/{ecgID}.txt",
		names              = ecgHeaders,
		index_col 		   = 0
	)

	ecgClean = preprocessing(ecg)


	# Extract features and expected vector

	features   = ecgClean[ecgFeatures].values
	expected   = ecgClean[ecgTarget]

	
	# Reconstructing the derivations
	
	predicted  = linearRegression.predict(
		X = scaler.transform(
			X = features
		)
	)
	predicted  = pd.DataFrame(
		data    = predicted,
		columns = ecgTarget,
	)


	# Calculating MSE and CORR for each lead

	mseRow  = pd.DataFrame(columns = ecgTarget)
	corrRow = pd.DataFrame(columns = ecgTarget)

	for derivation in ecgTarget:
		
		mseRow[derivation]  = [mean_squared_error(
			y_true = expected[derivation],
			y_pred = predicted[derivation]
		)]

		corrRow[derivation] = [stats.pearsonr(
			x = expected[derivation],
			y = predicted[derivation]
		).statistic]


	# Storing the metrics

	mseScores 	 = pd.concat(
		[mseScores, mseRow], 
		ignore_index = True
	)
	correlations = pd.concat(
		[correlations, corrRow],  
		ignore_index = True
	)


Description of statistical measures

In [None]:
correlations = correlations.dropna()
print(correlations.describe())

In [None]:
mseScores = mseScores.dropna()
print(mseScores.describe())

The comparative plot function

In [None]:
def methodComparativePlot(df, derivation, method):
	dfMean = np.mean(df[derivation])
	dfMean = np.round(dfMean, 3)

	figure, axes = plt.subplots(nrows = 1, ncols = 2, figsize = (12, 6))

	axes[0].set_title(f"{method}($ {derivation} $, $ {derivation}_{{rec}} $)")
	axes[1].set_title(f"Histograma - {method}($ {derivation} $, $ {derivation}_{{rec}} $)")

	axes[0].set_xlabel("n")
	axes[0].set_ylabel(f"{method}")
	
	axes[1].set_xlabel(f"{method}")
	axes[1].set_ylabel("Frequência")


	axes[0].scatter(
		df.index, 
		df[derivation]
	)
	axes[0].axhline(
		dfMean, 
		color     = 'r', 
		linestyle = '--', 
		label     = f"Média = {dfMean}"
	)


	counts, bins = np.histogram(df[derivation], 50)
	axes[1].stairs(counts / len(df[derivation]), bins, fill = True)
	axes[1].axvline(
		dfMean, 
		color     = 'r', 
		linestyle = '--', 
		label     = f"Média = {dfMean}"
	)

	axes[1].legend()
	axes[0].legend()

	plt.show()

	plt.close()

## Results

In [None]:
for derivation in ecgTarget:
	methodComparativePlot(mseScores, derivation, "MSE")

In [None]:
for derivation in ecgTarget:
	methodComparativePlot(correlations, derivation, "CORR")