In [3]:
# ================================
# 1. Importar librerías
# ================================
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Configuración de gráficos
plt.style.use("seaborn-v0_8")
sns.set_palette("muted")

# ================================
# 2. Definir ruta de los datos
# ================================
data_path = "./mit-bih-arrhythmia_dataset"

# ================================
# 3. Listar archivos disponibles
# ================================
files = os.listdir(data_path)
print(f"Total archivos: {len(files)}")
print(files[:10])  # mostrar primeros 10 archivos

# ================================
# 4. Cargar ejemplos de archivos
# ================================
csv_file = os.path.join(data_path, "200.csv")
txt_file = os.path.join(data_path, "200.txt")

# Cargar CSV (señales ECG)
df_signals = pd.read_csv(csv_file)
print("\n=== Señales ECG (CSV) ===")
print(df_signals.shape)
print(df_signals.head())

# Cargar TXT (anotaciones)
df_annotations = pd.read_csv(
    txt_file, 
    delim_whitespace=True, 
    header=None,
    names=["Time", "Sample#", "Type", "Sub", "Chan", "Num", "Aux"]
)

print("\n=== Anotaciones (TXT) ===")
print(df_annotations.shape)
print(df_annotations.head())

# ================================
# 5. Corrección de tipos en anotaciones
# ================================
num_cols = ["Sample#", "Sub", "Chan", "Num"]
df_annotations[num_cols] = df_annotations[num_cols].apply(pd.to_numeric, errors="coerce")

# Confirmar tipos
print("\n--- Tipos de datos corregidos (anotaciones) ---")
print(df_annotations.dtypes)

# ================================
# 6. Información general
# ================================
print("\n--- Info Señales ---")
print(df_signals.info())
print("\n--- Info Anotaciones ---")
print(df_annotations.info())

# ================================
# 7. Estadísticas descriptivas
# ================================
print("\n--- Estadísticas señales ---")
print(df_signals.describe().T)

print("\n--- Estadísticas anotaciones ---")
print(df_annotations.describe(include="all").T)

# ================================
# 8. Valores faltantes
# ================================
print("\nValores faltantes en señales:")
print(df_signals.isna().sum())

print("\nValores faltantes en anotaciones:")
print(df_annotations.isna().sum())

# ================================
# 9. Identificar variables categóricas y numéricas
# ================================
cat_cols = ["Type"]  # solo Type nos interesa para clasificación
num_cols_signals = df_signals.select_dtypes(include=[np.number]).columns.tolist()

print("\nColumnas categóricas (anotaciones):", cat_cols)
print("Columnas numéricas (señales):", num_cols_signals)

# ================================
# 10. Mapeo de etiquetas a superclases AAMI
# ================================
aami_map = {
    "N": "N", "L": "N", "R": "N", "e": "N", "j": "N",
    "A": "S", "a": "S", "J": "S", "S": "S",
    "V": "V", "E": "V",
    "F": "F",
    "Q": "Q", "/": "Q", "f": "Q", "u": "Q"
}

df_annotations["AAMI"] = df_annotations["Type"].map(aami_map).fillna("Q")

print("\n=== Ejemplo con mapeo AAMI ===")
print(df_annotations[["Type", "AAMI"]].head(20))

# ================================
# 11. Visualización preliminar
# ================================
# Graficar 500 muestras de la señal (columna 0 del CSV)
plt.figure(figsize=(12,4))
plt.plot(df_signals.iloc[:500,0])
plt.title("ECG - primeras 500 muestras (canal 1)")
plt.xlabel("Muestra")
plt.ylabel("Amplitud")
plt.show()

# Histograma de tipos de latidos (raw)
plt.figure(figsize=(6,4))
sns.countplot(x="Type", data=df_annotations, order=df_annotations["Type"].value_counts().index)
plt.title("Distribución de latidos anotados (raw)")
plt.show()

# Histograma de superclases AAMI
plt.figure(figsize=(6,4))
sns.countplot(x="AAMI", data=df_annotations, order=df_annotations["AAMI"].value_counts().index)
plt.title("Distribución de latidos por superclase AAMI")
plt.show()


Total archivos: 96
['100.csv', '100annotations.txt', '101.csv', '101annotations.txt', '102.csv', '102annotations.txt', '103.csv', '103annotations.txt', '104.csv', '104annotations.txt']

=== Señales ECG (CSV) ===
(650000, 3)
   'sample #'  'MLII'  'V1'
0           0    1094  1045
1           1    1094  1045
2           2    1094  1045
3           3    1094  1045
4           4    1094  1045


  df_annotations = pd.read_csv(


FileNotFoundError: [Errno 2] No such file or directory: './mit-bih-arrhythmia_dataset\\200.txt'