In [None]:


# 1. Import 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

from pathlib import Path
from scipy import signal
from scipy.stats import skew, kurtosis
from scipy.signal import find_peaks, butter, filtfilt, detrend

from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tools.sm_exceptions import ConvergenceWarning

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Dense, Conv1D, MaxPooling1D, UpSampling1D, LSTM, RepeatVector, TimeDistributed
)
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

sns.set_style("whitegrid")
print("TensorFlow Version:", tf.__version__)


# 2. LOAD DATA

def load_ecg5000(base_path="~/Desktop/ECG5000"):
    base_path = Path(base_path).expanduser()
    train_path = base_path / "ECG5000_TRAIN.txt"
    test_path  = base_path / "ECG5000_TEST.txt"
    train_df = pd.read_csv(train_path, header=None, delim_whitespace=True)
    test_df  = pd.read_csv(test_path, header=None, delim_whitespace=True)
    df = pd.concat([train_df, test_df], axis=0).reset_index(drop=True)
    return df

df = load_ecg5000("~/Desktop/ECG5000")

print("Data shape:", df.shape)
print(df.head())


# 3. BINARY LABEL MAPPING

# Class 1 = Normal --> 0
# All others = Arrhythmia --> 1
y = np.where(df.iloc[:, 0] == 1, 0, 1)
X = df.iloc[:, 1:].to_numpy()

print("\nClass distribution:")
unique, counts = np.unique(y, return_counts=True)
for cls, cnt in zip(unique, counts):
    print(f"Class {cls}: {cnt}")



# 4. BASIC VISUALIZATION

plt.figure(figsize=(12, 3))
plt.plot(X[0], label="Example ECG Beat")
plt.title("Example ECG waveform")
plt.xlabel("Time Index")
plt.ylabel("Amplitude")
plt.legend()
plt.show()

plt.figure(figsize=(5,4))
sns.countplot(x=y)
plt.title("Class Distribution (0=Normal, 1=Arrhythmia)")
plt.show()

plt.figure(figsize=(12,4))
for cls in [0,1]:
    mean = X[y==cls].mean(axis=0)
    std = X[y==cls].std(axis=0)
    plt.plot(mean, label=f"Class {cls} mean")
    plt.fill_between(range(len(mean)), mean-std, mean+std, alpha=0.3)
plt.title("Class Mean Â± Std")
plt.legend()
plt.show()