# Regression Analysis for improve performace

## Essential Libraries

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import cv2

from sklearn.linear_model import LinearRegression

## ECG Metadata

In [None]:
sampleFolders = "../ecgs/set-a"

ecgHeaders = [
	"sample",
	"LI", 
	"LII", 
	"LIII", 
	"aVR", 
	"aVF", 
	"aVL",
	"V1",
	"V2",
	"V3",
	"V4",
	"V5",
	"V6"
]

acceptableEcgsFiles = []

with open("../ecgs/labels/RECORDS-acceptable") as file:
	acceptableEcgsFiles = [id.strip() for id in file.readlines()]

print(acceptableEcgsFiles[:5])


## Correlation Matrix images

In [None]:
for ecgId in acceptableEcgsFiles:
	ecgDf = pd.read_csv(
		filepath_or_buffer = f"{sampleFolders}/{ecgId}.txt",
		header = None,
		index_col = 0,
		names = ecgHeaders 
	)

	_, axes = plt.subplots()

	correlations = ecgDf.corr(method = "pearson")

	image = axes.imshow(
		correlations, 
		aspect = "auto"
	)

	axes.set_title("Correlação entre as derivações")

	axes.set_xticks(np.arange(12), labels = ecgDf.columns)
	axes.set_yticks(np.arange(12), labels = ecgDf.columns)

	for i in range(correlations.shape[0]):
		for j in range(correlations.shape[1]):
			correlation = np.round(
				correlations.iloc[i, j], 
				1
			)

			axes.text(
				i, j, 
				correlation, 
				ha = "center", 
				va = "center", 
				color = "red" if correlation == 0 else "w"
			)

	plt.colorbar(image, ax = axes)
	plt.savefig(f"./correlations/{ecgId}.png")
	plt.close()

	del ecgDf


## Correlation Matrix Video

In [None]:
correlationsImages = os.listdir("./correlations")

frame = cv2.imread(f"./correlations/{correlationsImages[0]}")
height, width, _ = frame.shape

video = cv2.VideoWriter(
    filename = "correlations.avi", 
    fourcc = cv2.VideoWriter_fourcc(*"XVID"),
    fps = 1, 
    frameSize = (width, height)
)

for image in correlationsImages:
    video.write(cv2.imread(f"./correlations/{image}"))

cv2.destroyAllWindows()
video.release()

# ffmpeg -i correlations.avi -c:v libx264 -crf 23 correlations.mp4

## RegTables Images

In [None]:
recHeaders = set(ecgHeaders) - set(["LI", "LII", "V2", "sample"])
print(recHeaders)

In [None]:
regression = LinearRegression()

for ecgId in acceptableEcgsFiles:
	ecgDf = pd.read_csv(
		filepath_or_buffer = f"{sampleFolders}/{ecgId}.txt",
		header = None,
		index_col = 0,
		names = ecgHeaders 
	)
	
	regressionDf = pd.DataFrame(
		columns = ["lead", "LI", "LII", "V2", "Intercept"],
	)
	
	for recLead in recHeaders:
		regression.fit(
			X = ecgDf[["LI", "LII", "V2"]][100: 500], 
			y = ecgDf[recLead][100: 500]
		)
		
		regressionDf.loc[len(regressionDf)] = [
			recLead,
			regression.coef_[0],
			regression.coef_[1],
			regression.coef_[2],
			regression.intercept_
		]

		recLeadSeries = regression.predict(X = ecgDf[["LI", "LII", "V2"]])
		recLeadSeries = pd.Series(recLeadSeries, index = ecgDf.index)
		
		ecgDf[recLead] = recLeadSeries

	
	figure, axes = plt.subplots()

	axes.axis("off")

	figure.suptitle("Tabelas das regressões usadas para cada derivação")

	figure.subplots_adjust(top = .5)

	regressionDf = regressionDf.set_index("lead")

	plt.table(
		cellText = np.round(regressionDf.values, 5),
		colLabels = regressionDf.columns,
		
		rowLabels = regressionDf.index,
		
		loc = "center",
	)

	plt.tight_layout(pad = 1.5)

	plt.savefig(f"./regtables/{ecgId}.png")

	plt.close()

	del regressionDf
	del ecgDf

## RegTables Video

In [None]:
regTableImages = os.listdir("./regtables")

frame = cv2.imread(f"./regtables/{regTableImages[0]}")
height, width, _ = frame.shape

video = cv2.VideoWriter(
    filename = "regtables.avi", 
    fourcc = cv2.VideoWriter_fourcc(*"XVID"),
    fps = 1, 
    frameSize = (width, height)
)

for image in regTableImages:
    video.write(cv2.imread(f"./regtables/{image}"))

cv2.destroyAllWindows()
video.release()

# ffmpeg -i regtables.avi -c:v libx264 -crf 23 regtables.mp4

## ECG regression coefficients dataframe

In [None]:
regression = LinearRegression()

regressionDf = pd.DataFrame(
	columns = ["ecgId", "lead", "LI", "LII", "V2", "Intercept"],
)

for ecgId in acceptableEcgsFiles:
	ecgDf = pd.read_csv(
		filepath_or_buffer = f"{sampleFolders}/{ecgId}.txt",
		header = None,
		index_col = 0,
		names = ecgHeaders 
	)
	
	for recLead in recHeaders:
		regression.fit(
			X = ecgDf[["LI", "LII", "V2"]][100: 500], 
			y = ecgDf[recLead][100: 500]
		)
		
		regressionDf.loc[len(regressionDf)] = [
			ecgId,
			recLead,
			regression.coef_[0],
			regression.coef_[1],
			regression.coef_[2],
			regression.intercept_
		]

		recLeadSeries = regression.predict(X = ecgDf[["LI", "LII", "V2"]])
		recLeadSeries = pd.Series(recLeadSeries, index = ecgDf.index)
		
		ecgDf[recLead] = recLeadSeries
	
	del ecgDf

In [None]:
print(np.round(regressionDf.describe(), 3))