In [82]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
from random import choice

### Pré-processamento dos ECG's

#### Labels

In [83]:
LABELS_FOLDER = "../dataset/labels"

ecgsAcceptable = []

with open(os.path.join(LABELS_FOLDER, "RECORDS-acceptable")) as file:
    ecgsAcceptable = file.readlines()

ecgsAcceptable = map(
    lambda ecg: ecg[:-1], 
    ecgsAcceptable
)
ecgsAcceptable = list(ecgsAcceptable)

In [84]:
print(f"{ len(ecgsAcceptable) / 10 }% is acceptable")

77.3% is acceptable


#### ECG Summary

In [85]:
DATASET_FOLDER = "../dataset/set-a"

ecgs = os.listdir(DATASET_FOLDER) 
ecgs = filter(lambda ecg: ".txt" in ecg, ecgs)
ecgs = list(ecgs)

ecgColumns = [
    "amostra",
    "lead I", 
    "lead II", 
    "lead III", 
    "aVR", 
    "aVL",
    "aVF", 
    "V1",
    "V2",
    "V3",
    "V4",
    "V5",
    "V6"
]

ecgsLabels = pd.DataFrame(
    columns = [
        "id", 
        "isAcceptable"
    ]
)

for ecg in ecgs: 
    ecgId = ecg.split(".").pop(0)

    isAcceptable = "V" if ecgId in ecgsAcceptable else "F"

    ecgsLabels.loc[len(ecgsLabels)] = [
        ecgId, 
        isAcceptable
    ]

In [86]:
print(ecgsLabels.shape)
ecgsLabels.sample(12)

(1000, 2)


Unnamed: 0,id,isAcceptable
789,2679451,V
911,2868054,V
818,2855979,V
861,1073952,V
972,2681017,V
537,1780407,V
468,1050325,F
254,2116607,V
984,2760946,V
675,2129226,V


### Datasets para treino e validação

In [87]:
labelsGroups = ecgsLabels.groupby("isAcceptable")

ecgAcceptable = labelsGroups.get_group("V")
ecgUnacceptable = labelsGroups.get_group("F")

validationLen = int(0.3 * len(ecgsLabels))

ecgsValidation = ecgsLabels.sample(validationLen)
print(ecgsValidation.head())

acceptableCount = ecgsValidation["isAcceptable"]\
    .loc[ecgsValidation["isAcceptable"] == "V"]\
    .count()

unacceptableCount = validationLen - acceptableCount

acceptablePercent = acceptableCount * 100 / validationLen
unacceptablePercent = unacceptableCount * 100 / validationLen

acceptablePercent = round(acceptablePercent, 2)
unacceptablePercent = round(unacceptablePercent, 2)

print(f"Acceptable: { acceptablePercent }%")
print(f"Unacceptable: { unacceptablePercent }%")


          id isAcceptable
762  2622365            V
536  1035858            V
615  1812105            V
416  1882255            F
645  1085468            V
Acceptable: 77.0%
Unacceptable: 23.0%


### Modelos de Classificação

#### Flatline

In [88]:
class FlatlineModel():
    def __init__(self, threshold):
        self.name = "Flatline Model"
        
        self.threshold = threshold

    def predict(self, ecgId, derivation):
        ecgFilename = os.path.join(
            DATASET_FOLDER, 
            f"{ ecgId }.txt"
        )

        ecg = pd.read_csv(ecgFilename)
        ecg.columns = ecgColumns

        derivate = np.diff(ecg[derivation])
        derivateAbs = np.abs(derivate)
        derivateMean = np.mean(derivateAbs)

        if derivateMean <= self.threshold:
            return "F"
        return "V"


#### STD

In [89]:
class STDModel():
    def __init__(self, threshold):
        self.name = "STD Model"
        self.threshold = threshold

    def predict(self, ecgId, derivation):
        ecgFilename = os.path.join(
            DATASET_FOLDER, 
            f"{ ecgId }.txt"
        )

        ecg = pd.read_csv(ecgFilename)
        ecg.columns = ecgColumns

        noise = ecg[derivation].std()

        if noise >= self.threshold:
            return "F"
        return "V"


#### Compound

In [90]:
class CompoundModel():
    def __init__(self, flatlineThreshold, noiseThreshold):
        self.name = "Compound Model"

        self.flatlineModel = FlatlineModel(flatlineThreshold)
        self.STDModel = STDModel(noiseThreshold)

    def predict(self, ecgId):
        derivations = ecgColumns[1:]

        for derivation in derivations:
            flatlineLabel = self.flatlineModel.predict(
                ecgId, 
                derivation
            )

            if flatlineLabel == "F":
                return "F"
            
        derivationWithNoise = 0

        for derivation in derivations:
            stdLabel = self.STDModel.predict(
                ecgId, 
                derivation
            )

            if stdLabel == "F":
                derivationWithNoise += 1

            if derivationWithNoise >= 2:
                return "F"
            
        return "V"

### Avaliação de Modelos    

#### Avaliador

In [91]:
class Evaluator():
    def __init__(self, model):
        self.model = model
        self.results = pd.DataFrame(
            columns = [
                "id", 
                "predict", 
                "correct"
            ]
        )

        self.TP = 0 
        self.FN = 0
        self.FP = 0
        self.TN = 0

        self.accuracy = 0
        self.precision = 0
        self.sensitivity = 0
        self.f1Score = 0

    def getResults(self, ecgsValidation):
        self.results = pd.DataFrame(
            columns = [
                "id", 
                "predict", 
                "correct"
            ]
        )
        
        for _, ecgLabel in ecgsValidation.iterrows():
            labelPredicted = self.model.predict(ecgLabel["id"])
            
            self.results.loc[len(self.results)] = [
                ecgLabel["id"],
                labelPredicted,
                ecgLabel["isAcceptable"]
            ]


    def evaluate(self, ecgsValidation): 
        self.getResults(ecgsValidation)

        self.TP = 0 
        self.FN = 0
        self.FP = 0
        self.TN = 0

        for _, result in self.results.iterrows():
            predict = result["predict"]
            correct = result["correct"]

            if correct == "V" and predict == "V":
                self.TP += 1

            if correct == "F" and predict == "V":
                self.FP += 1

            if correct == "V" and predict == "F":
                self.FN += 1

            if correct == "F" and predict == "F":
                self.TN += 1

        self.accuracy = (self.TP + self.TN) / (self.TP + self.TN + self.FP + self.FN)
        self.precision = self.TP / (self.TP + self.FP)
        self.sensitivity = self.TP / (self.TP + self.FN)
        self.f1Score = 2 * (self.precision * self.sensitivity) 
        self.f1Score /= (self.precision + self.sensitivity)

    def toString(self):
        accuracy = round(self.accuracy, 2)
        precision = round(self.precision, 2)
        sensitivity = round(self.sensitivity, 2)
        f1Score = round(self.f1Score, 2)

        out = f"--- { self.model.name} ---\n"
        out += f"Acurácia: { accuracy * 100 }%\n"
        out += f"Precisão: { precision * 100 }%\n"
        out += f"Sensibilidade: { sensitivity * 100 }%\n"
        out += f"F1-Score: { f1Score * 100 }%\n"

        return out

#### Avaliação

In [96]:
compoundModel = CompoundModel(1, 50)

evaluator = Evaluator(compoundModel)
evaluator.evaluate(ecgsValidation)

print(evaluator.toString())


--- Compound Model ---
Acurácia: 69.0%
Precisão: 95.0%
Sensibilidade: 63.0%
F1-Score: 76.0%

