In [63]:
import matplotlib.pyplot as plt
import pandas as pd
import os
from random import choice

### Pré-processamento dos ECG's

#### Labels

In [64]:
LABELS_FOLDER = "../dataset/labels"

ecgsAcceptable = []

with open(os.path.join(LABELS_FOLDER, "RECORDS-acceptable")) as file:
    ecgsAcceptable = file.readlines()

ecgsAcceptable = map(
    lambda ecg: ecg[:-1], 
    ecgsAcceptable
)
ecgsAcceptable = list(ecgsAcceptable)

In [65]:
print(f"{ len(ecgsAcceptable) / 10 }% is acceptable")

77.3% is acceptable


#### ECG Summary

In [66]:
DATASET_FOLDER = "../dataset/set-a"

ecgs = os.listdir(DATASET_FOLDER) 
ecgs = filter(lambda ecg: ".txt" in ecg, ecgs)
ecgs = list(ecgs)

columns = [
    "amostra",
    "lead I", 
    "lead II", 
    "lead III", 
    "aVR", 
    "aVL",
    "aVF", 
    "V1",
    "V2",
    "V3",
    "V4",
    "V5",
    "V6"
]

ecgSummary = pd.DataFrame(
    columns = [
        "id", 
        "mean", 
        "median", 
        "std", 
        "variance", 
        "skew", 
        "kurtosis", 
        "snr", 
        "max", 
        "min", 
        "isAcceptable"
    ]
)

targetDerivation = "lead I"

for ecg in ecgs: 
    ecgDf = pd.read_csv(
        os.path.join(DATASET_FOLDER, ecg)
    )
    ecgDf.columns = columns

    id = ecg.split(".").pop(0)

    derivation = ecgDf[targetDerivation]

    mean = derivation.mean()
    median = derivation.median()
    std = derivation.std()
    variance = derivation.var()
    skew = derivation.skew()
    kurtosis = derivation.kurtosis()
    
    snr = 0
    if std != 0:
        snr = mean / std

    max = derivation.max()
    min = derivation.min()

    isAcceptable = "V" if id in ecgsAcceptable else "F"

    ecgSummary.loc[len(ecgSummary)] = [
        id, 
        mean, 
        median, 
        std, 
        variance, 
        skew, 
        kurtosis, 
        snr, 
        max, 
        min, 
        isAcceptable
    ]

In [67]:
print(ecgSummary.shape)
ecgSummary.sample(12)

(1000, 11)


Unnamed: 0,id,mean,median,std,variance,skew,kurtosis,snr,max,min,isAcceptable
134,1281719,1.212042,-5.0,18.319467,335.6029,1.568388,2.488239,0.066161,69,-44,V
506,1166425,1.164433,-5.0,27.054954,731.9706,4.882152,26.247094,0.04304,204,-30,V
106,2790899,0.404081,-8.0,27.412313,751.4349,2.103047,6.66933,0.014741,185,-51,V
851,2470929,2.293459,3.0,20.171357,406.8837,-0.100541,2.845743,0.113699,87,-60,V
551,1354464,0.776355,-2.0,11.438896,130.8483,3.436208,11.874804,0.06787,64,-10,V
777,2547535,-14956.60152,-15976.0,3904.5562,15245560.0,3.570277,10.756775,-3.830551,142,-15992,F
493,1480991,4.30166,-2.0,23.693779,561.3952,3.613066,15.365922,0.181552,162,-32,F
762,2622365,3.493299,0.0,16.120728,259.8779,1.558688,5.72359,0.216696,95,-42,V
717,2436645,0.325265,-11.0,30.21594,913.003,2.031577,3.900091,0.010765,150,-38,V
578,2530638,-19.391878,16.0,244.074306,59572.27,-0.276493,10.912571,-0.079451,1406,-1091,F


### Datasets para treino e validação

In [68]:
labelsGroups = ecgSummary.groupby("isAcceptable")

ecgAcceptable = labelsGroups.get_group("V")
ecgUnacceptable = labelsGroups.get_group("F")

validationLen = int(0.3 * len(ecgSummary))

ecgsValidation = ecgSummary.sample(validationLen)
ecgsValidation.head()

acceptableCount = ecgsValidation["isAcceptable"]\
    .loc[ecgsValidation["isAcceptable"] == "V"]\
    .count()

unacceptableCount = validationLen - acceptableCount

acceptablePercent = acceptableCount * 100 / validationLen
unacceptablePercent = unacceptableCount * 100 / validationLen

acceptablePercent = round(acceptablePercent, 2)
unacceptablePercent = round(unacceptablePercent, 2)

print(f"Acceptable: { acceptablePercent }%")
print(f"Unacceptable: { unacceptablePercent }%")


Acceptable: 80.33%
Unacceptable: 19.67%


### Modelos de Classificação

In [69]:
class PredictModel():
    def __init__(self, name: str):
        self.name = name

    def predict(self, ecg: pd.DataFrame):
        raise NotImplementedError()

class RandomModel(PredictModel):
    def __init__(self):
        super().__init__("Random")

    def predict(self, ecg: pd.DataFrame):
        return choice([ "V", "F" ])
    

### Avaliação de Modelos    

In [70]:
class Evaluator():
    def __init__(self, model: PredictModel):
        self.model = model
        self.results = pd.DataFrame(
            columns = [
                "id", 
                "predict", 
                "correct"
            ]
        )

        self.TP = 0 
        self.FN = 0
        self.FP = 0
        self.TN = 0

        self.accuracy = 0
        self.precision = 0
        self.sensitivity = 0
        self.f1Score = 0

    def getResults(self, ecgs: pd.DataFrame):
        self.results = pd.DataFrame(
            columns = [
                "id", 
                "predict", 
                "correct"
            ]
        )
        
        for _, ecg in ecgs.iterrows():
            labelPredicted = self.model.predict(ecg)
            
            self.results.loc[len(self.results)] = [
                ecg["id"],
                labelPredicted,
                ecg["isAcceptable"]
            ]


    def evaluate(self, ecgs: pd.DataFrame): 
        self.getResults(ecgs)

        self.TP = 0 
        self.FN = 0
        self.FP = 0
        self.TN = 0

        for _, result in self.results.iterrows():
            predict = result["predict"]
            correct = result["correct"]

            if correct == "V" and predict == "V":
                self.TP += 1

            if correct == "F" and predict == "V":
                self.FP += 1

            if correct == "V" and predict == "F":
                self.FN += 1

            if correct == "F" and predict == "F":
                self.TN += 1

        self.accuracy = (self.TP + self.TN) / (self.TP + self.TN + self.FP + self.FN)
        self.precision = self.TP / (self.TP + self.FP)
        self.sensitivity = self.TP / (self.TP + self.FN)
        self.f1Score = 2 * (self.precision * self.sensitivity) 
        self.f1Score /= (self.precision + self.sensitivity)

    def toString(self):
        accuracy = round(self.accuracy, 2)
        precision = round(self.precision, 2)
        sensitivity = round(self.sensitivity, 2)
        f1Score = round(self.f1Score, 2)

        out = f"--- { self.model.name} ---\n"
        out += f"Acurácia: { accuracy * 100 }%\n"
        out += f"Precisão: { precision * 100 }%\n"
        out += f"Sensibilidade: { sensitivity * 100 }%\n"
        out += f"F1-Score: { f1Score * 100 }%\n"

        return out


In [71]:
randomModel = RandomModel()

evaluator = Evaluator(randomModel)
evaluator.evaluate(ecgsValidation)

print(evaluator.toString())

--- Random ---
Acurácia: 47.0%
Precisão: 79.0%
Sensibilidade: 46.0%
F1-Score: 57.99999999999999%

