<a href="https://colab.research.google.com/github/mateollorente/Producto/blob/master/superVector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tqdm import tqdm
from google.colab import userdata
import os, json


In [2]:
!git clone https://github.com/SantiagoBuffa/BeatAI.git
%cd BeatAI

Cloning into 'BeatAI'...
remote: Enumerating objects: 842, done.[K
remote: Counting objects: 100% (80/80), done.[K
remote: Compressing objects: 100% (62/62), done.[K
remote: Total 842 (delta 32), reused 38 (delta 11), pack-reused 762 (from 3)[K
Receiving objects: 100% (842/842), 97.11 MiB | 17.90 MiB/s, done.
Resolving deltas: 100% (562/562), done.
/content/BeatAI


In [3]:


kaggle_username = userdata.get("kaggle_username")
kaggle_key = userdata.get("kaggle_key")

os.makedirs(os.path.expanduser("~/.kaggle"), exist_ok=True)
with open(os.path.expanduser("~/.kaggle/kaggle.json"), "w") as f:
    json.dump({"username": kaggle_username, "key": kaggle_key}, f)

os.chmod(os.path.expanduser("~/.kaggle/kaggle.json"), 0o600)

!pip install kaggle --quiet
!kaggle datasets download -d evilspirit05/ecg-analysis -p ./data --unzip

Dataset URL: https://www.kaggle.com/datasets/evilspirit05/ecg-analysis
License(s): MIT
Downloading ecg-analysis.zip to ./data
 98% 806M/826M [00:05<00:00, 93.0MB/s]
100% 826M/826M [00:05<00:00, 169MB/s] 


In [31]:
def ecg_to_vector(path, num_rows=4, smooth=True):
    img = cv2.imread(path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                                   cv2.THRESH_BINARY_INV, 35, 10)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 1))
    clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
    contours, _ = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if len(contours) > 0:
        x, y, w, h = cv2.boundingRect(max(contours, key=cv2.contourArea))
        clean = clean[y:y+h, x:x+w]

    height = clean.shape[0]
    row_height = height // num_rows
    signals = []
    for i in range(num_rows):
        row = clean[i*row_height:(i+1)*row_height, :]
        ys = []
        for col in range(row.shape[1]):
            pixels = np.where(row[:, col] > 0)[0]
            if len(pixels) > 0:
                ys.append(np.mean(pixels))
            else:
                ys.append(np.nan)
        ys = np.array(ys)
        nans = np.isnan(ys)
        if np.any(nans):
            ys[nans] = np.interp(np.flatnonzero(nans), np.flatnonzero(~nans), ys[~nans])
        ys = (ys - np.min(ys)) / (np.max(ys) - np.min(ys))
        signals.append(ys)
    vector = np.concatenate(signals)
    if smooth:
        vector = cv2.GaussianBlur(vector.reshape(-1, 1), (9, 1), 0).flatten()
    return vector



In [32]:

def resize_vector(v, target_len=4096):
    x_old = np.linspace(0, 1, len(v))
    x_new = np.linspace(0, 1, target_len)
    return np.interp(x_new, x_old, v)

In [33]:
def preprocess_dataset_1d(dataset_path, target_len=2048, num_rows=4):
    """
    Carga todas las imágenes del dataset y genera vectores 1D concatenados.
    Retorna: X_train, y_train, X_val, y_val, X_test, y_test
    """
    train_dir = os.path.join(dataset_path, 'train')
    test_dir = os.path.join(dataset_path, 'test')

    X_train, y_train = [], []
    X_test, y_test = [], []

    class_names = sorted(os.listdir(train_dir))
    print(f"📂 Clases detectadas: {class_names}")

    # --- TRAIN ---
    for label, cls in enumerate(class_names):
        cls_dir = os.path.join(train_dir, cls)
        for fname in tqdm(os.listdir(cls_dir), desc=f"Procesando {cls} (train)"):
            path = os.path.join(cls_dir, fname)
            if not fname.lower().endswith(('.jpg', '.png', '.jpeg')):
                continue
            vec = ecg_to_vector(path, num_rows=num_rows)
            vec = resize_vector(vec, target_len)
            X_train.append(vec)
            y_train.append(label)

    # --- TEST ---
    for label, cls in enumerate(class_names):
        cls_dir = os.path.join(test_dir, cls)
        for fname in tqdm(os.listdir(cls_dir), desc=f"Procesando {cls} (test)"):
            path = os.path.join(cls_dir, fname)
            if not fname.lower().endswith(('.jpg', '.png', '.jpeg')):
                continue
            vec = ecg_to_vector(path, num_rows=num_rows)
            vec = resize_vector(vec, target_len)
            X_test.append(vec)
            y_test.append(label)

    X_train = np.array(X_train)[..., np.newaxis]
    y_train = tf.keras.utils.to_categorical(y_train, num_classes=len(class_names))
    X_test = np.array(X_test)[..., np.newaxis]
    y_test = tf.keras.utils.to_categorical(y_test, num_classes=len(class_names))

    # --- SPLIT VALIDATION ---
    X_train, X_val, y_train, y_val = train_test_split(
        X_train, y_train, test_size=0.2, random_state=42, stratify=y_train.argmax(axis=1)
    )

    print(f"✅ X_train: {X_train.shape}, X_val: {X_val.shape}, X_test: {X_test.shape}")
    return X_train, y_train, X_val, y_val, X_test, y_test

In [34]:
from tensorflow.keras import layers, models

def build_ecg_1d_model(input_length=2048, num_classes=4):
    model = models.Sequential([
        layers.Input(shape=(input_length, 1)),
        layers.Conv1D(32, 7, activation='relu', padding='same'),
        layers.MaxPooling1D(2),
        layers.Conv1D(64, 5, activation='relu', padding='same'),
        layers.MaxPooling1D(2),
        layers.Conv1D(128, 3, activation='relu', padding='same'),
        layers.GlobalAveragePooling1D(),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [35]:
dataset_path = "/content/BeatAI/data/ECG_DATA"

X_train, y_train, X_val, y_val, X_test, y_test = preprocess_dataset_1d(dataset_path)

model = build_ecg_1d_model(input_length=2048, num_classes=4)
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))


📂 Clases detectadas: ['ECG Images of Myocardial Infarction Patients (240x12=2880)', 'ECG Images of Patient that have History of MI (172x12=2064)', 'ECG Images of Patient that have abnormal heartbeat (233x12=2796)', 'Normal Person ECG Images (284x12=3408)']


Procesando ECG Images of Myocardial Infarction Patients (240x12=2880) (train): 100%|██████████| 956/956 [01:47<00:00,  8.88it/s]
Procesando ECG Images of Patient that have History of MI (172x12=2064) (train): 100%|██████████| 516/516 [00:58<00:00,  8.81it/s]
Procesando ECG Images of Patient that have abnormal heartbeat (233x12=2796) (train): 100%|██████████| 699/699 [01:16<00:00,  9.08it/s]
Procesando Normal Person ECG Images (284x12=3408) (train): 100%|██████████| 852/852 [01:34<00:00,  9.05it/s]
Procesando ECG Images of Myocardial Infarction Patients (240x12=2880) (test): 100%|██████████| 239/239 [00:26<00:00,  9.05it/s]
Procesando ECG Images of Patient that have History of MI (172x12=2064) (test): 100%|██████████| 172/172 [00:18<00:00,  9.13it/s]
Procesando ECG Images of Patient that have abnormal heartbeat (233x12=2796) (test): 100%|██████████| 233/233 [00:26<00:00,  8.75it/s]
Procesando Normal Person ECG Images (284x12=3408) (test): 100%|██████████| 284/284 [00:30<00:00,  9.45it/s

✅ X_train: (2418, 2048, 1), X_val: (605, 2048, 1), X_test: (928, 2048, 1)
Epoch 1/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 57ms/step - accuracy: 0.2651 - loss: 1.3749 - val_accuracy: 0.3157 - val_loss: 1.3589
Epoch 2/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.3053 - loss: 1.3659 - val_accuracy: 0.3157 - val_loss: 1.3569
Epoch 3/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.3121 - loss: 1.3598 - val_accuracy: 0.3157 - val_loss: 1.3570
Epoch 4/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.3041 - loss: 1.3588 - val_accuracy: 0.3157 - val_loss: 1.3432
Epoch 5/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.3395 - loss: 1.3375 - val_accuracy: 0.3636 - val_loss: 1.3170
Epoch 6/10
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.3387 - loss: 1.3170 - 

<keras.src.callbacks.history.History at 0x79cb088a27b0>

In [44]:
model.fit(X_train, y_train, epochs=40, batch_size=32, validation_data=(X_val, y_val))


Epoch 1/40
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.9657 - loss: 0.0947 - val_accuracy: 0.9752 - val_loss: 0.0770
Epoch 2/40
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9574 - loss: 0.1212 - val_accuracy: 0.9653 - val_loss: 0.0951
Epoch 3/40
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.9730 - loss: 0.0822 - val_accuracy: 0.8909 - val_loss: 0.2346
Epoch 4/40
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.9407 - loss: 0.1399 - val_accuracy: 0.9835 - val_loss: 0.0675
Epoch 5/40
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.9685 - loss: 0.0918 - val_accuracy: 0.9537 - val_loss: 0.1360
Epoch 6/40
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9666 - loss: 0.0886 - val_accuracy: 0.9752 - val_loss: 0.0838
Epoch 7/40
[1m76/76[0m [32m━━━━

<keras.src.callbacks.history.History at 0x79cb3c310e90>

In [None]:
"""
dataset_path = "./data/ECG_DATA/"
train_gen, val_gen, test_gen = preprocess_dataset(dataset_path)

batch_train = next(train_gen)
batch_val = next(val_gen)

plt.figure(figsize=(12, 8))

# Train (fila 1)
for i in range(6):
    plt.subplot(2, 6, i + 1)
    plt.imshow(batch_train[0][i].squeeze(), cmap='gray')
    plt.title(f"Train:{np.argmax(batch_train[1][i])}")
    plt.axis('off')

# Val (fila 2)
for i in range(6):
    plt.subplot(2, 6, i + 7)
    plt.imshow(batch_val[0][i].squeeze(), cmap='gray')
    plt.title(f"Val:{np.argmax(batch_val[1][i])}")
    plt.axis('off')

plt.tight_layout()
plt.show()
"""


In [None]:
def show_graphs(history):
    plt.figure(figsize=(10,4))
    plt.subplot(1,2,1)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Loss')
    plt.legend()
    plt.subplot(1,2,2)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Accuracy')
    plt.legend()
    plt.show()

show_graphs(history)

In [46]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"📊 Test Accuracy: {test_acc:.4f}")
print(f"📉 Test Loss: {test_loss:.4f}")


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9523 - loss: 0.1143
📊 Test Accuracy: 0.9666
📉 Test Loss: 0.0998


In [None]:
"""

from datetime import datetime
import pytz
!git pull

# Guardar dentro de la carpeta correcta
os.makedirs("models", exist_ok=True)
model.save("models/ecg_modelV3.h5")
github_user = userdata.get("github_user")
github_token = userdata.get("GITHUB_TOKEN")
github_mail = userdata.get("github_mail")
!git config --global user.name "{github_user}"
!git config --global user.email "{github_mail}"
repo_url = f"https://{github_user}:{github_token}@github.com/SantiagoBuffa/BeatAI.git"
!git remote set-url origin $repo_url

time_zone = pytz.timezone("America/Argentina/Buenos_Aires")
right_now = datetime.now(time_zone)
date_and_time = right_now.strftime("%d-%m %H:%M")

# Registrar en git y subir
!git add models/ecg_modelV3.h5
!git commit -m "Test accuracy: 0.9763, Test loss: 0.0875"

"""


In [45]:
model.save("models/ecg_modelVectores.h5")



In [None]:
#!git push origin main