# Libraries

In [None]:
import pandas 			 as pd
import numpy 			 as np
import matplotlib.pyplot as plt

import scipy.signal as signal

from sklearn.preprocessing   import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics 		 import root_mean_squared_error
from sklearn.linear_model 	 import LinearRegression

import joblib


# ECG Definitions

ECG file headers

In [None]:
ecgHeaders = [
	"sample",
	"LI", 
	"LII", 
	"LIII", 
	"aVR", 
	"aVL",
	"aVF", 
	"V1",
	"V2",
	"V3",
	"V4",
	"V5",
	"V6"
]

ECG plot definitions

In [None]:
ecgPlotHeaders = [
    "LI", "aVR", "V1", "V4",
	"LII", "aVL", "V2", "V5",
	"LIII", "aVF", "V3", "V6"
]

ecgPlotColors = {
	"LI":   "seagreen",
	"aVR":  "black",
	"V1":   "gold",
	"V4":   "orangered",
	"LII":  "cornflowerblue",
	"aVL":  "seagreen",
	"V2":   "gold",
	"V5":   "crimson",
	"LIII": "cornflowerblue",
	"aVF":  "cornflowerblue",
	"V3":   "orangered",
	"V6":   "crimson"
}

Loading ECGs labeled as acceptable

In [None]:
ecgsDataset = []

with open("../../data/labels/RECORDS-acceptable") as file:
	ecgsDataset = map(
		lambda ecgName: ecgName.strip(), 
		file.readlines()
	)
	ecgsDataset = list(ecgsDataset) 

	print("Dataset len =", len(ecgsDataset))

Split dataset

In [None]:
trainEcgs, validationEcgs = train_test_split(
	ecgsDataset, 
	train_size = .70, 
	test_size = .30
)

print("Train dataset len =",     len(trainEcgs))
print("Validation dataset le =", len(validationEcgs))

# Linear Regression Definitions

Features Vector

In [None]:
ecgFeatures = [
	"LII", 
	"Z"  , # Z = -0,5 * V2 
	"V6" , 
]

Target Vector

In [None]:
ecgTarget = [
	"LI", 
	"LIII", 
	"aVR", 
	"aVL",
	"aVF", 
	"V1",
	"V3",
	"V4",
	"V5",
]

# Preprocessing Dataset

ECG preprocessing function

- highpass butterworth filter with $ f_c = 1$ Hz
- Truncation in the tails of $N = 500$ samples

In [None]:
order = 2
fc    = 1
fs    = 500

b, a = signal.butter(
	N 	  = order,
	Wn 	  = fc,
	btype = "highpass",
	fs 	  = fs
)

print("Numerator: ",   b)
print("Denominator: ", a)

In [None]:
def preprocessing(ecgDf):
	ecgClean      = ecgDf.apply(
		lambda column: signal.filtfilt(b, a, column), 
		axis = 0
	)
	ecgClean      = ecgClean.iloc[500: -500]
	ecgClean["Z"] = -0.5 * ecgClean["V2"]

	return ecgClean

Load and preprocessing dataset

In [None]:
xTrainAll = []
yTrainAll = []

for trainEcgFile in trainEcgs:
	ecg = pd.read_csv(
		filepath_or_buffer = f"../../data/set-a/{trainEcgFile}.txt",
		names = ecgHeaders,
		index_col = 0
	)

	ecgClean = preprocessing(ecg)
	
	xTrain = ecgClean[ecgFeatures]
	yTrain = ecgClean[ecgTarget]
	
	xTrainAll.extend(xTrain.values)
	yTrainAll.extend(yTrain.values)

In [None]:
xValidationAll = []
yValidationAll = []

for validationEcgFile in validationEcgs:
	ecg = pd.read_csv(
		filepath_or_buffer = f"../../data/set-a/{validationEcgFile}.txt",
		names = ecgHeaders,
		index_col = 0
	)

	ecgClean = preprocessing(ecg)

	xValidation = ecgClean[ecgFeatures]
	yValidation = ecgClean[ecgTarget]

	xValidationAll.extend(xValidation.values)
	yValidationAll.extend(yValidation.values)

Normalization

In [None]:
scaler = StandardScaler()

In [None]:
xTrainScaled      = scaler.fit_transform(xTrainAll)
xValidationScaled = scaler.transform(xValidationAll)

# Training

In [None]:
linearModel = LinearRegression()

In [None]:
linearModel.fit(xTrainScaled, yTrainAll)

In [None]:
print("Coefficients: ")
print(np.round(linearModel.coef_, 3))
print(np.round(linearModel.intercept_, 3))

# Validation

In [None]:
yPredicted = linearModel.predict(xValidationScaled)

rmseValidation = root_mean_squared_error(
	y_true 		= yValidationAll, 
	y_pred 		= yPredicted,
	multioutput = "raw_values"
)
rmseValidation = np.round(rmseValidation, 3)
rmseValidation = rmseValidation.tolist()

rmseScores = pd.DataFrame(
    columns = ecgTarget,
    data    = [rmseValidation]
)

print(rmseScores)

# Ploting

In [None]:
def plotECG(ecg, headers, colors): 
	figure, axes = plt.subplots(
		nrows   = 3,
		ncols   = 4,
		sharex  = True,
		figsize = (16, 9)
	)

	figure.suptitle("ECG 12-Lead")
	figure.supxlabel("Sample")
	figure.supylabel("Dpp")

	axes = axes.flatten()

	for idx, header in enumerate(headers):
		axes[idx].plot(ecg[header], color = colors[header])
		axes[idx].set_title(f"{header}")
	

	plt.tight_layout(pad = 1.5)

	plt.show()

	plt.close()

In [None]:
def comparativeFullEcgPlot(ecgOring, ecgRec, headers):
	figure, axes = plt.subplots(
		nrows   = 3,
		ncols   = 4,
		sharex  = True,
		figsize = (16, 9)
	)

	figure.suptitle("Comparison: ECG 12-Lead")
	figure.supxlabel("Sample")
	figure.supylabel("Dpp")

	axes = axes.flatten()

	for idx, header in enumerate(headers):
		corr  = np.round(ecgOring[header].corr(ecgRec[header]), 3)
		rmse  = np.round(root_mean_squared_error(ecgOring[header], ecgRec[header]), 3)

		axes[idx].plot(
			ecgOring[header], 
			color = "blue", 
			alpha = 0.75
		)
		axes[idx].plot(
			ecgRec[header], 
			color = "red", 
			alpha = 0.75
		)

		axes[idx].set_title(f"{header} CORR = {corr} RMSE = {rmse}")
	

	plt.tight_layout(pad = 1.5)

	plt.show()

	plt.close()

Loading a ECG sample for reconstruction

In [None]:
ecgSampleDf = pd.read_csv(
	filepath_or_buffer = f"../../samples/1002867.txt",
	names 			   = ecgHeaders,
	index_col 		   = 0
)

In [None]:
ecgSampleDf = preprocessing(ecgSampleDf)

Trying to reconstruct the ECG

In [None]:
xFeaturesSampleEcgDf = ecgSampleDf[ecgFeatures]
yTargetSampleEcg = ecgSampleDf[ecgTarget].values

ecgSampleTargetPredicted = linearModel.predict(
	X = scaler.transform(
		X = xFeaturesSampleEcgDf.values
	)
)

In [None]:
ecgSampleTargetPredictedDf = pd.DataFrame(
	data    = ecgSampleTargetPredicted,
	columns = ecgTarget,
	index   = ecgSampleDf.index
)

In [None]:
ecgSampleRecDf = pd.concat([
	xFeaturesSampleEcgDf,
	ecgSampleTargetPredictedDf     
], axis = 1)


In [None]:
ecgSampleRecDf["V2"] = ecgSampleDf["V2"]

Comparison between original and reconstructed ECG

In [None]:
comparativeFullEcgPlot(
	ecgOring = ecgSampleDf,
	ecgRec   = ecgSampleRecDf,
    headers  = ecgPlotHeaders
)

Seeing a strip of reconstruction

In [None]:
viewport = slice(1000, 1500)

In [None]:
comparativeFullEcgPlot(
	ecgOring = ecgSampleDf[viewport],
	ecgRec   = ecgSampleRecDf[viewport],
    headers  = ecgPlotHeaders
)

# Save Model

In [None]:
joblib.dump(
	value    = linearModel,
	filename = "../../models/t1/reglin-t1-v0.pkl"
)

joblib.dump(
	value    = scaler,
	filename = "../../models/t1/reglin-t1-v0.scaler.pkl"
)