# Clasificador de señales ECG patologicas en edge impulse

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import glob
import requests
from scipy.io import loadmat
import re
import json
import time, hmac, hashlib
import requests

## Funciones de ayuda para subir data a Edge impulse

In [None]:
path = "/kaggle/input/ecg-signals-1000-fragments/MLII"

In [None]:
dirs = glob.glob(path+"/*", recursive=True)
dirs

In [None]:
def get_name_dir(_dir_text):
    pattern = r'\b(\w+)\b$'
    match = re.search(pattern, _dir_text)
    if match:
        last_word = match.group(1)
        return last_word
    return None

text = '/kaggle/input/ecg-signals-1000-fragments/MLII/10 VT'
get_name_dir(text)

In [None]:
def get_files_from_dir(_dir):
    return glob.glob(_dir+"/*", recursive=True)

## Test de data

In [None]:
files_in_dir = get_files_from_dir(dirs[0])
mat = loadmat(files_in_dir[0])
#mat['val'], mat['val'].shape
#print(mat['val'][0].reshape(-1,1))
plt.plot(mat['val'][0])
plt.title("An example of signal from database")
plt.grid(ls=":")
plt.show()

## UPload to Edge impulse

In [None]:
def upload_ei(_name_label, _values, hmac_key, api_key):
    #HMAC_KEY = "94226d2e1dc61c38d6660a570a9a90e7"
    #API_KEY = "ei_9c948998d604e83d6fdeb8e9a753ffce2af6733fdd8fe374efce339b611bb959"
    HMAC_KEY = hmac_key #"479df84bf18b1722a5f6fcb094265f98"
    API_KEY = api_key #"ei_3d1d5dce11f879e02009083dc58f828de7f450e550dee0d503acca07e110e4db"
    # empty signature (all zeros). HS256 gives 32 byte signature, and we encode in hex, so we need 64 characters here
    emptySignature = ''.join(['0'] * 64)

    Fs=360
    Ts=(1/Fs)*1000

    data = {
        "protected": {
            "ver": "v1",
            "alg": "HS256",
            "iat": time.time() # epoch time, seconds since 1970
        },
        "signature": emptySignature,
        "payload": {
            "device_name": "ac:87:a3:0a:2d:1b",
            "device_type": "NANO33BLE",
            "interval_ms": Ts,
            "sensors": [
                { "name": "Volts", "units": "adu/mv" },
                #{ "name": "Volts", "units": "adu/mv" },
                #{ "name": "Volts", "units": "adu/mv" }
            ],
            "values": _values.tolist()
        }
    }

    # encode in JSON
    encoded = json.dumps(data)

    # sign message
    signature = hmac.new(bytes(HMAC_KEY, 'utf-8'), msg = encoded.encode('utf-8'), digestmod = hashlib.sha256).hexdigest()

    # set the signature again in the message, and encode again
    data['signature'] = signature
    encoded = json.dumps(data)

    # and upload the file
    res = requests.post(url='https://ingestion.edgeimpulse.com/api/training/data',
                        data=encoded,
                        headers={
                            'Content-Type': 'application/json',
                            'x-file-name': _name_label,
                            'x-api-key': API_KEY
                        })
    if (res.status_code == 200):
        print('Uploaded file to Edge Impulse', res.status_code, res.content)
    else:
        print('Failed to upload file to Edge Impulse', res.status_code, res.content)


## Upload automatizado

In [None]:
files_in_dir = get_files_from_dir(dirs[0])
files_in_dir

In [None]:
values = loadmat(files_in_dir[0])['val'][0].reshape(-1,1)
values, values.shape

In [None]:
values.tolist()

In [None]:
# for i in dirs:
#     x_file_name = get_name_dir(i)
#     files_in_dir = get_files_from_dir(i)
    
#     for j in files_in_dir:
#         values = loadmat(j)['val'][0].reshape(-1,1)
#         print(values)
#         print(x_file_name)
#         upload_ei(x_file_name, values)

In [None]:
df_aux = dict()

aux_x_file_name = list()
aux_values = list()

for i in dirs:
    x_file_name = get_name_dir(i)
    files_in_dir = get_files_from_dir(i)
    
    aux_x_file_name.append(x_file_name)
    aux_values.append(len(files_in_dir))
    
df_aux["name"] = aux_x_file_name
df_aux["value"] = aux_values

In [None]:
ax = pd.DataFrame(df_aux, index=aux_x_file_name).plot(kind="barh")
ax.grid(ls=":")
ax.legend(["ECV"])
for bars in ax.containers:
    ax.bar_label(bars)
ax.set_title("Total classes from database")

plt.savefig("total_classes_from_database.png")
#plt.show()
plt.show()

# Filtrar por los que tienen mas datos que 50

In [None]:
df_ecv = pd.DataFrame(df_aux, index=aux_x_file_name)
list_names = ["RBBBB","NSR","LBBBB"]
df_ecv[df_ecv.index.isin(list_names)]

In [None]:
df_ecv.columns

In [None]:


ax = df_ecv[df_ecv.index.isin(list_names)].plot(kind="barh")
ax.grid(ls=":")
ax.legend(["ECV"])
for bars in ax.containers:
    ax.bar_label(bars)
ax.set_title("Filtered classes from database")
plt.show()

In [None]:
df_filter = ['/kaggle/input/ecg-signals-1000-fragments/MLII/8 Bigeminy',
'/kaggle/input/ecg-signals-1000-fragments/MLII/14 LBBBB',
 '/kaggle/input/ecg-signals-1000-fragments/MLII/1 NSR',
 '/kaggle/input/ecg-signals-1000-fragments/MLII/7 PVC',
 '/kaggle/input/ecg-signals-1000-fragments/MLII/4 AFIB',
 '/kaggle/input/ecg-signals-1000-fragments/MLII/15 RBBBB',
 '/kaggle/input/ecg-signals-1000-fragments/MLII/2 APB'
]

df_filter

In [None]:
df_aux2 = dict()

aux_x_file_name2 = list()
aux_values2 = list()

for i in df_filter:
    x_file_name = get_name_dir(i)
    files_in_dir = get_files_from_dir(i)
    
    for j in files_in_dir:
        values = loadmat(j)['val'][0]
        
        aux_x_file_name2.append(x_file_name)
        aux_values2.append(values.tolist())
    
df_aux2["y"] = aux_x_file_name2
df_aux2["x"] = aux_values2

In [None]:
np.unique(df_aux2["y"])

In [None]:
df_aux2_x = np.array(df_aux2["x"])
df_aux2_x

In [None]:
df3 = pd.DataFrame(df_aux2_x)
df3.head()

In [None]:
df3["y"] = df_aux2["y"]

In [None]:
df3.head()

In [None]:
df3["y"].value_counts(), df3["y"].unique()

In [None]:
rename = {'Bigeminy': 0, 'LBBBB': 1, 'NSR': 2, 'PVC': 3, 'AFIB': 4, 'RBBBB': 5, 'APB': 6}

In [None]:
rename_inverse = dict(zip(rename.values(),rename.keys()))
rename_inverse

In [None]:
df3['y'] = df3['y'].map(rename)

In [None]:
df3.head()

In [None]:
df3["y"].value_counts(), df3["y"].unique()

In [None]:
df3["y"].value_counts().max(), df3["y"].value_counts().shape

In [None]:
print("Numero de muestras luego del resampleo por la técnica de over-sampling:",df3["y"].value_counts().max() * df3["y"].value_counts().shape[0])

In [None]:
df3.shape

In [None]:
X = df3.iloc[:,:-1]
y = df3.iloc[:,-1]

In [None]:
from imblearn.over_sampling import SMOTE

sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X, y)

In [None]:
X_res.shape

In [None]:
X_res.head(), X_res.shape

In [None]:
y_res

In [None]:
y_res.value_counts()

In [None]:
df4 = pd.DataFrame(X_res)
df4["y"] = y_res
df4 = df4.sort_values('y')
df4 = df4.reset_index(drop = True)
df4.head()#, df4.shape

In [None]:
df4.iloc[:284,-1]

In [None]:
df4.columns  #rename_inverse

In [None]:
rename_inverse

In [None]:
df6 = df4.groupby("y")["y"].count()
df6.head()

In [None]:
df6.shape, type(df6), df6[[1,2,5]]

In [None]:
ax= df4.groupby("y")["y"].count()[[1,2,5]].plot(kind="barh")
ax.grid(ls=":")
ax.legend(["ECV"])
for bars in ax.containers:
    ax.bar_label(bars)
ax.set_title("Filtered classes from database")

ax.set_yticklabels(["LBBBB","NSR","RBBBB"])
ax.set_xticks([0,50,100,150,200,250,300])
plt.show()

In [None]:
dict_aux = {"oversampling":[283,283,283], "undersampling":[62,62,62],"imbalanced":[103,283,62]}

df_b = pd.DataFrame(dict_aux, index=["LBBBB","NSR","RBBBB"])
df_b.head()

In [None]:
ax = df_b.plot(kind="barh")
ax.grid(ls=":")
ax.legend(["Oversampling","Undersampling","Imbalanced"])
for bars in ax.containers:
    ax.bar_label(bars)
ax.set_title("Classes after being OverSampled/UnderSampled")
ax.set_xticks([0,50,100,150,200,250,300])
ax.set_xlabel("Samples")

plt.savefig("over_under_imb.png")
plt.show()

In [None]:
from tqdm.notebook import tqdm

In [None]:
# h_mac="36c673aab3eb5b3ff14235a05dc6c7e8"
# key="ei_34b26582c66424e7fd56fc4ac1275bf174f3426aa7629b8f234778b5ae7c9b5a"

# X_res = df4.iloc[:,:-1]
# y_res = df4.iloc[:,-1]

# for i in range(X_res.shape[0]):
#     data = X_res.iloc[i,:]
#     names = y_res.iloc[i]
#     #print(data, names)
#     upload_ei(rename_inverse[names], data.to_numpy().reshape(-1,1), h_mac, key)