In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
shayanfazeli_heartbeat_path = kagglehub.dataset_download('shayanfazeli/heartbeat')

print('Data source import complete.')


In [None]:
#Importing dataset from kaggle
import kagglehub
path = kagglehub.dataset_download("shayanfazeli/heartbeat")
print("Path to dataset files:", path)


In [None]:
#Installing tensorflow
!pip install --upgrade tensorflow

In [None]:
#Here we are importing the basic python and data analysis libraries
import os, random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Imported the machine learning utilities from sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.utils.class_weight import compute_class_weight

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, BatchNormalization, Dropout, Dense, Flatten
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical


SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

print("TensorFlow imported successfully, version:", tf.__version__)

In [None]:
#Paths of Train and Test csv files
TRAIN_CSV = "/kaggle/input/heartbeat/mitbih_train.csv"
TEST_CSV  = "/kaggle/input/heartbeat/mitbih_test.csv"

assert os.path.exists(TRAIN_CSV), "Train CSV not found. Did you Add Data (shayanfazeli/heartbeat)?"
assert os.path.exists(TEST_CSV), "Test CSV not found. Did you Add Data (shayanfazeli/heartbeat)?"
#It will read csv files using pandas
train_df = pd.read_csv(TRAIN_CSV, header=None)
test_df  = pd.read_csv(TEST_CSV,  header=None)

print("Train shape:", train_df.shape)  # expected ~ (87554, 188)
print("Test shape:",  test_df.shape)   # expected ~ (21892, 188)

# Last column is label; first 187 columns are samples
N_SAMPLES = train_df.shape[1] - 1
print("Samples per beat:", N_SAMPLES)
train_df.head()

In [None]:
# Extract the features from column 0 to N-1
X_train_all = train_df.iloc[:, :N_SAMPLES].values.astype(np.float32)
# Extract the labels from column 0 to N
y_train_all = train_df.iloc[:,  N_SAMPLES].values.astype(int)

X_test = test_df.iloc[:, :N_SAMPLES].values.astype(np.float32)
y_test = test_df.iloc[:,  N_SAMPLES].values.astype(int)

# Normalization for keeping stable
X_train_all = (X_train_all - X_train_all.min(axis=1, keepdims=True)) / (X_train_all.ptp(axis=1, keepdims=True) + 1e-8)
X_test      = (X_test      - X_test.min(axis=1, keepdims=True))      / (X_test.ptp(axis=1, keepdims=True) + 1e-8)

# Train/Validation split from training set
X_train, X_val, y_train, y_val = train_test_split(
    X_train_all, y_train_all, test_size=0.15, random_state=SEED, stratify=y_train_all
)

# Reshape for 1D CNN: (n_samples, timesteps, channels)
X_train = X_train[..., None]
X_val   = X_val[..., None]
X_test  = X_test[..., None]

# One-hot encoding
num_classes = len(np.unique(y_train_all))
y_train_cat = to_categorical(y_train, num_classes)
y_val_cat   = to_categorical(y_val,   num_classes)
y_test_cat  = to_categorical(y_test,  num_classes)

print(f"Classes: {num_classes} | Shapes -> X_train: {X_train.shape}, y_train: {y_train_cat.shape}")

In [None]:
# Plotting  one random sample per class from training set
plt.figure(figsize=(12, 6))
classes = np.unique(y_train_all)
for i, c in enumerate(classes):
    idx = np.where(y_train == c)[0][0]
    plt.plot(X_train[idx].squeeze(), label=f"class {c}")
plt.title("Example ECG beats (one per class)")
plt.xlabel("Sample index (0..186)")
plt.ylabel("Normalized amplitude")
plt.legend()
plt.show()

In [None]:
# We are computing the class weights for imbalaned class
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=classes,
    y=y_train
)
class_weights = {int(c): w for c, w in zip(classes, class_weights)}
class_weights

In [None]:
#Extracts local patterns and uses RELU to stabilize
#Uses MaxPoolong to reduce dimensions
def make_model(input_len=187, n_classes=5):
    model = Sequential([
        Conv1D(32, kernel_size=7, activation='relu', padding='same', input_shape=(input_len,1)),
        BatchNormalization(),
        MaxPooling1D(2),

        Conv1D(64, kernel_size=5, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling1D(2),

        Conv1D(128, kernel_size=3, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling1D(2),

        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.4),
        Dense(n_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = make_model(N_SAMPLES, num_classes)
model.summary()

In [None]:
EPOCHS = 40
BATCH  = 256

ckpt_path = "/kaggle/working/ecg_cnn_best.keras"

callbacks = [
    EarlyStopping(monitor="val_loss", patience=6, restore_best_weights=True),
    ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, verbose=1),
    ModelCheckpoint(ckpt_path, monitor="val_loss", save_best_only=True, verbose=1)
]
#Training the model

history = model.fit(
    X_train, y_train_cat,
    validation_data=(X_val, y_val_cat),
    epochs=EPOCHS,
    batch_size=BATCH,
    class_weight=class_weights,
    verbose=1,
    callbacks=callbacks
)

In [None]:
#Plotting the graphs for accuracy and loss
plt.figure(figsize=(10,4))
plt.plot(history.history['accuracy'], label='train_acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.title('Accuracy')
plt.xlabel('Epoch'); plt.ylabel('Acc'); plt.legend(); plt.show()

plt.figure(figsize=(10,4))
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.title('Loss')
plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.legend(); plt.show()

In [None]:
#Evaluating the trained model
test_loss, test_acc = model.evaluate(X_test, y_test_cat, verbose=0)
print(f"Test Accuracy: {test_acc*100:.2f}% | Test Loss: {test_loss:.4f}")

# Predictions
y_prob = model.predict(X_test, verbose=0)
y_pred = np.argmax(y_prob, axis=1)

print("\nClassification Report:")
print(classification_report(y_test, y_pred, digits=4))

cm = confusion_matrix(y_test, y_pred, labels=classes)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
fig, ax = plt.subplots(figsize=(6,5))
disp.plot(ax=ax, values_format='d', colorbar=False)
plt.title("Confusion Matrix (Test)")
plt.show()

In [None]:
# Saving the model and predictions
final_model_path = "/kaggle/working/ecg_cnn_final.keras"
model.save(final_model_path)

preds_path = "/kaggle/working/test_predictions.csv"
pd.DataFrame({
    "y_true": y_test,
    "y_pred": y_pred
}).to_csv(preds_path, index=False)

print("Saved:", final_model_path, "and", preds_path)

In [None]:
# pick one sample index
index = 42

sample_signal = X_test[index]       # shape: (187, 1)
true_label = y_test[index]

# visualize it
plt.figure(figsize=(10,3))
plt.plot(sample_signal)
plt.title(f"Example ECG Signal (True label = {true_label})")
plt.xlabel("Sample index (0–186)")
plt.ylabel("Amplitude")
plt.show()

In [None]:
# Model expects shape (1, 187, 1) — add batch dimension
sample_signal = sample_signal.reshape(1, 187, 1)

# Make prediction
prediction = model.predict(sample_signal)

# Get the class with the highest probability
predicted_class = np.argmax(prediction)
confidence = np.max(prediction)

print(f"Predicted class: {predicted_class} (confidence = {confidence*100:.2f}%)")
print(f"True class: {true_label}")

In [None]:
class_names = {
    0: "Normal (N)",
    1: "Supraventricular (S)",
    2: "Ventricular (V)",
    3: "Fusion (F)",
    4: "Unknown (Q)"
}

print(f"Model Prediction: {class_names[predicted_class]} ({confidence*100:.2f}%)")
print(f"Actual Label: {class_names[true_label]}")

**NON-DL MODEL**

In [None]:
!pip install --quiet PyWavelets xgboost

In [None]:
#Here we imported the dataset from kaggle
import kagglehub
path = kagglehub.dataset_download("shayanfazeli/heartbeat")
print("Path to dataset files:",path)

In [None]:
# We imported the basic python and data analysis libraries
import os, random, math, joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

# Imported the machine learning utilities from sklearn
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA

import pywt
from scipy.stats import skew, kurtosis
from scipy.signal import find_peaks

In [None]:
# Paths of Train and Test csv files
DATA_PATH = "/kaggle/input/heartbeat"
TRAIN_CSV = os.path.join(DATA_PATH, "mitbih_train.csv")
TEST_CSV  = os.path.join(DATA_PATH, "mitbih_test.csv")

# It will read csv files using pandas
train_df = pd.read_csv(TRAIN_CSV, header=None)
test_df  = pd.read_csv(TEST_CSV, header=None)

print(" Dataset loaded successfully")
print("Train shape:", train_df.shape)
print("Test shape :", test_df.shape)
train_df.head()

In [None]:
#Here we are creating a function to extract features from each ecg beat
def extract_features_from_beat(beat):
    x = np.asarray(beat).astype(float)
    mean = np.mean(x); std = np.std(x)
    mn, mx = np.min(x), np.max(x)
    ptp = mx - mn
    median = np.median(x)
    q25, q75 = np.percentile(x, [25, 75])
    iqr = q75 - q25
    sk = skew(x); kurt = kurtosis(x)
    rms = np.sqrt(np.mean(x**2))
    energy = np.sum(x**2)
    fft_mag = np.abs(np.fft.rfft(x))
    fft_energy = np.sum(fft_mag**2)
    peaks, props = find_peaks(x, height=np.mean(x)+0.2*np.std(x), distance=10)
    num_peaks = len(peaks)
    peak_heights = props['peak_heights'] if 'peak_heights' in props else np.array([0])
    max_peak_height = np.max(peak_heights) if len(peak_heights)>0 else 0
    idx_max = np.argmax(x)/len(x)

    coeffs = pywt.wavedec(x, 'db4', level=3)
    wa_mean, wd1_mean, wd2_mean, wd3_mean = [np.mean(c) for c in coeffs]
    wa_std, wd1_std, wd2_std, wd3_std = [np.std(c) for c in coeffs]

    diff1 = np.diff(x); diff2 = np.diff(diff1)
    activity = np.var(x)
    mobility = np.sqrt(np.var(diff1)/(activity+1e-8))
    complexity = np.sqrt(np.var(diff2)/(np.var(diff1)+1e-8))/(mobility+1e-8)

    return np.array([
        mean,std,mn,mx,ptp,median,q25,q75,iqr,sk,kurt,rms,energy,fft_energy,
        num_peaks,max_peak_height,idx_max,
        wa_mean,wd1_mean,wd2_mean,wd3_mean,
        wa_std,wd1_std,wd2_std,wd3_std,
        activity,mobility,complexity
    ])

In [None]:
#Converting all rows into feature matrices
N_SAMPLES = train_df.shape[1] - 1

def df_to_feature_matrix(df):
    X, y = [], []
    for _, row in tqdm(df.iterrows(), total=len(df)):
        beat = row.iloc[:N_SAMPLES].values
        label = int(row.iloc[N_SAMPLES])
        X.append(extract_features_from_beat(beat))
        y.append(label)
    return np.vstack(X), np.array(y)

X_train, y_train = df_to_feature_matrix(train_df)
X_test,  y_test  = df_to_feature_matrix(test_df)

print("Feature matrix shapes:", X_train.shape,X_test.shape)

In [None]:
#Here we standardize all features so the mean= 0 and variance = 1 which helps models perform better
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s  = scaler.transform(X_test)

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Train Random Forest
rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)

# Predictions
y_pred = rf.predict(X_test)


In [None]:
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.unique(y_test))
disp.plot(cmap='Blues', values_format='d')
plt.title("Random Forest Confusion Matrix")
plt.show()

importances = rf.feature_importances_
idx = np.argsort(importances)[::-1]
feature_names = [
 'mean','std','min','max','ptp','median','q25','q75','iqr','skew','kurtosis','rms',
 'energy','fft_energy','num_peaks','max_peak_height','idx_max',
 'wa_mean','wd1_mean','wd2_mean','wd3_mean',
 'wa_std','wd1_std','wd2_std','wd3_std',
 'activity','mobility','complexity'
]
print("Top 10 important features:")
for i in idx[:10]:
    print(f"{feature_names[i]:20s}: {importances[i]:.4f}")

In [None]:
# We are using a random forest model
rf = RandomForestClassifier(
    n_estimators=200,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)
rf.fit(X_train_s, y_train)

y_pred = rf.predict(X_test_s)

print("Random Forest Classification Report:")
print(classification_report(y_test, y_pred, digits=4))

In [None]:
#Comparing with XGBoost and SVM
from xgboost import XGBClassifier

xgb = XGBClassifier(n_estimators=200, eval_metric='mlogloss', random_state=42, n_jobs=4)
xgb.fit(X_train_s, y_train)
y_pred_xgb = xgb.predict(X_test_s)
print("\nXGBoost Report:")
print(classification_report(y_test, y_pred_xgb, digits=4))

svc = SVC(kernel='rbf', class_weight='balanced', random_state=42)
svc.fit(X_train_s, y_train)
y_pred_svc = svc.predict(X_test_s)
print("\nSVM Report:")
print(classification_report(y_test, y_pred_svc, digits=4))

In [None]:
#Test prediction for a single heartbeat
idx = 42
beat = test_df.iloc[idx, :N_SAMPLES].values
true_label = test_df.iloc[idx, N_SAMPLES]

feat = extract_features_from_beat(beat).reshape(1, -1)
feat_s = scaler.transform(feat)
pred = rf.predict(feat_s)[0]

plt.figure(figsize=(10,3))
plt.plot(beat)
plt.title(f"Beat {idx} | True = {true_label} | Pred = {pred}")
plt.xlabel("Sample Index (0–186)")
plt.show()

In [None]:
#Upto now we did for just classifying normal amd abnormal
# Now we are seeing for different types like for new born and people just before dying
# Generating data for new born babies
!pip install neurokit2 --quiet

import neurokit2 as nk
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Generating  multiple newborn ECG signals
signals = []
for i in range(10):
    ecg_signal = nk.ecg_simulate(duration=10, heart_rate=np.random.randint(120, 160), noise=0.01)
    signals.append(ecg_signal)

# Converting  to DataFrame
newborn_df = pd.DataFrame(signals).T
newborn_df.to_csv("newborn_ecg_dataset.csv", index=False)

print("Newborn ECG data generated and saved as newborn_ecg_dataset.csv")

# Visualizing one sample
plt.figure(figsize=(10, 4))
plt.plot(signals[0])
plt.title("Simulated Newborn ECG Signal (Fast Heart Rate)")
plt.xlabel("Time steps")
plt.ylabel("Amplitude")
plt.show()


In [None]:
# Feature extraction for newborn ECG

import pandas as pd
import numpy as np
import neurokit2 as nk
from scipy.signal import find_peaks

# Load the ECG dataset
df = pd.read_csv("newborn_ecg_dataset.csv")

features = []

for i in range(df.shape[1]):
    signal = df.iloc[:, i].dropna().values


    mean_val = np.mean(signal)
    std_val = np.std(signal)
    min_val = np.min(signal)
    max_val = np.max(signal)

    # Find peaks (QRS detection)
    peaks, _ = find_peaks(signal, distance=50)
    peak_count = len(peaks)

    # Approx heart rate (peaks per 10s * 6 → bpm)
    heart_rate = (peak_count / 10) * 60

    # Stores all features
    features.append([mean_val, std_val, min_val, max_val, peak_count, heart_rate])

# Create feature DataFrame
feature_df = pd.DataFrame(features, columns=["mean", "std", "min", "max", "peak_count", "heart_rate"])
feature_df.to_csv("newborn_features.csv", index=False)

print("Feature extraction done")
print(feature_df.head())


In [None]:
# Train a Random Forest model on newborn ECG features

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Load feature data
df = pd.read_csv("newborn_features.csv")

# For now, create dummy labels (you can later replace with real labels)
# Suppose we later collect some abnormal newborn data — we'll label it as 1.
# For now all normal newborn => label 0
y = np.zeros(len(df))

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2, random_state=42)

# Train Random Forest
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
print(" Model trained successfully")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(cm)
disp.plot()
plt.title("Newborn ECG Random Forest Model — Confusion Matrix")
plt.show()
import joblib
joblib.dump(model, "ecg_4class_model.pkl")

print("Model saved as ecg_4class_model.pkl")




In [None]:
#  Generating synthetic pre-death ECG signal
import neurokit2 as nk
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Generating multiple pre-death ECG signals
signals = []
for i in range(10):
    ecg_signal = nk.ecg_simulate(duration=10, heart_rate=np.random.randint(25, 40), noise=0.05)

    # Reduce amplitude to mimic weak cardiac activity
    ecg_signal = ecg_signal * np.random.uniform(0.3, 0.6)

    # Adding small irregularity
    ecg_signal += np.random.normal(0, 0.005, len(ecg_signal))

    signals.append(ecg_signal)

# Converting to DataFrame
predeath_df = pd.DataFrame(signals).T
predeath_df.to_csv("predeath_ecg_dataset.csv", index=False)

print(" Pre-death ECG data generated and saved as predeath_ecg_dataset.csv")

# Visualizing one sample
plt.figure(figsize=(10, 4))
plt.plot(signals[0])
plt.title("Simulated Pre-death ECG Signal (Weak, Low Heart Rate)")
plt.xlabel("Time steps")
plt.ylabel("Amplitude")
plt.show()


In [None]:
# Feature extraction for Pre-death ECG

import pandas as pd
import numpy as np
from scipy.signal import find_peaks

# Load the ECG dataset
df = pd.read_csv("predeath_ecg_dataset.csv")

features = []

for i in range(df.shape[1]):
    signal = df.iloc[:, i].dropna().values

    # Basic statistical features
    mean_val = np.mean(signal)
    std_val = np.std(signal)
    min_val = np.min(signal)
    max_val = np.max(signal)

    # Detect peaks (QRS complexes)
    peaks, _ = find_peaks(signal, distance=50)
    peak_count = len(peaks)

    # Approx heart rate
    heart_rate = (peak_count / 10) * 60

    features.append([mean_val, std_val, min_val, max_val, peak_count, heart_rate])

# Create DataFrame
predeath_features = pd.DataFrame(features, columns=["mean", "std", "min", "max", "peak_count", "heart_rate"])
predeath_features.to_csv("predeath_features.csv", index=False)

print("Pre-death ECG features extracted and saved as predeath_features.csv")
print(predeath_features.head())


In [None]:
# Combines newborn and pre-death datasets and train classifier

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Loads features
newborn = pd.read_csv("newborn_features.csv")
predeath = pd.read_csv("predeath_features.csv")

# Add class labels
newborn["label"] = 0  # newborn
predeath["label"] = 1  # pre-death

# Combine datasets
data = pd.concat([newborn, predeath], ignore_index=True)

# Split features and labels
X = data.drop(columns=["label"])
y = data["label"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Train Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
print(" Model trained: Newborn vs Pre-death ECGs")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["Newborn", "Pre-death"]))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels=["Newborn", "Pre-death"])
disp.plot()
plt.title("Confusion Matrix — Newborn vs Pre-death ECGs")
plt.show()

# Feature Importance
importances = model.feature_importances_
plt.figure(figsize=(7,4))
plt.bar(X.columns, importances)
plt.title("Feature Importance — Newborn vs Pre-death ECGs")
plt.ylabel("Importance")
plt.show()


In [None]:
#Generate synthetic Coma ECG signals
import neurokit2 as nk
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

signals = []
n_samples = 20
duration = 10

for i in range(n_samples):
    ecg = nk.ecg_simulate(duration=duration, heart_rate=np.random.randint(35, 50), noise=0.01)

    ecg = ecg * np.random.uniform(0.3, 0.6)
    ecg = nk.signal_smooth(ecg, method="convolution", size=20)

    signals.append(ecg)

coma_df = pd.DataFrame(signals).T
coma_df.to_csv("coma_ecg_dataset.csv", index=False)

print("Coma ECG dataset saved as coma_ecg_dataset.csv")

# Plot example
plt.plot(signals[0])
plt.title("Example: Coma ECG Signal")
plt.xlabel("Time")
plt.ylabel("Amplitude")
plt.show()


In [None]:
#Feature extraction for coma ECG signals
import pandas as pd
import numpy as np
from scipy.signal import find_peaks

df = pd.read_csv("coma_ecg_dataset.csv")

features = []

for i in range(df.shape[1]):
    sig = df.iloc[:, i].dropna().values

    mean_val = np.mean(sig)
    std_val = np.std(sig)
    min_val = np.min(sig)
    max_val = np.max(sig)

    # Peak detection
    peaks, _ = find_peaks(sig, distance=50)
    peak_count = len(peaks)

    heart_rate = (peak_count / 10) * 60

    features.append([mean_val, std_val, min_val, max_val, peak_count, heart_rate])

coma_features = pd.DataFrame(features, columns=["mean","std","min","max","peak_count","heart_rate"])
coma_features.to_csv("coma_features.csv", index=False)

print("Coma features saved as coma_features.csv")
print(coma_features.head())


In [None]:
# Generating Mental/Stress ECG signals
import neurokit2 as nk
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

signals = []
n_samples = 20
duration = 10

for i in range(n_samples):
    hr = np.random.randint(90, 130)
    ecg = nk.ecg_simulate(duration=duration, heart_rate=hr, noise=np.random.uniform(0.02, 0.06))

    # Add occasional random spikes (stress arrhythmia)
    if np.random.rand() < 0.5:
        idx = np.random.randint(0, len(ecg))
        spike_mag = np.random.uniform(0.05, 0.15)
        spike_width = np.random.randint(3, 15)
        start = max(0, idx - spike_width//2)
        end = min(len(ecg), start + spike_width)
        spike = spike_mag * np.exp(-((np.arange(start,end)-idx)**2)/(2*(spike_width/4)**2))
        ecg[start:end] += spike

    ecg *= np.random.uniform(0.8, 1.2)
    signals.append(ecg)

mental_df = pd.DataFrame(signals).T
mental_df.to_csv("mental_ecg_dataset.csv", index=False)

print("Mental/Stress ECG dataset saved as mental_ecg_dataset.csv")

plt.plot(signals[0])
plt.title("Example: Mental/Stress ECG")
plt.show()


In [None]:
# Feature extraction for mental/stress ECG
import pandas as pd
import numpy as np
from scipy.signal import find_peaks

df = pd.read_csv("mental_ecg_dataset.csv")

features = []

for i in range(df.shape[1]):
    sig = df.iloc[:, i].dropna().values

    mean_val = np.mean(sig)
    std_val = np.std(sig)
    min_val = np.min(sig)
    max_val = np.max(sig)

    peaks, _ = find_peaks(sig, distance=50)
    peak_count = len(peaks)

    heart_rate = (peak_count / 10) * 60

    features.append([mean_val, std_val, min_val, max_val, peak_count, heart_rate])

mental_features = pd.DataFrame(features, columns=["mean","std","min","max","peak_count","heart_rate"])
mental_features.to_csv("mental_features.csv", index=False)

print("Mental features saved as mental_features.csv")
print(mental_features.head())


In [None]:
 #Multi-class classification for all 4 ECG types

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Load feature sets
newborn = pd.read_csv("newborn_features.csv"); newborn["label"] = 0
predeath = pd.read_csv("predeath_features.csv"); predeath["label"] = 1
coma = pd.read_csv("coma_features.csv"); coma["label"] = 2
mental = pd.read_csv("mental_features.csv"); mental["label"] = 3

# Combine all classes
data = pd.concat([newborn, predeath, coma, mental], ignore_index=True)

X = data.drop(columns=["label"])
y = data["label"]

# Split (stratified)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
                                                    random_state=42, stratify=y)

# Train Random Forest
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

print("4-Class ECG Model Trained Successfully\n")
print(classification_report(y_test, y_pred,
                            target_names=["Newborn","Dying","Coma","Mental"]))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels=["Newborn","Dying","Coma","Mental"])
disp.plot(cmap="Blues")
plt.title("Confusion Matrix — 4-Class ECG Classification")
plt.show()

# Feature Importance
importances = model.feature_importances_
plt.bar(X.columns, importances)
plt.title("Feature Importance Across All 4 ECG Conditions")
plt.ylabel("Importance")
plt.show()


In [None]:
import joblib
import numpy as np
import pandas as pd
from scipy.signal import find_peaks

# Load trained model
model = joblib.load("ecg_4class_model.pkl")

print("Model loaded successfully!")


In [None]:
def extract_ecg_features(signal, duration_seconds=10):
    signal = np.array(signal).astype(float)

    mean_val = np.mean(signal)
    std_val = np.std(signal)
    min_val = np.min(signal)
    max_val = np.max(signal)

    # Peak detection (same as training)
    peaks, _ = find_peaks(signal, distance=50)
    peak_count = len(peaks)

    # Heart rate estimation
    heart_rate = (peak_count / duration_seconds) * 60

    return np.array([mean_val, std_val, min_val, max_val, peak_count, heart_rate]).reshape(1, -1)


In [None]:
def predict_ecg(signal):
    features = extract_ecg_features(signal)

    pred = model.predict(features)[0]

    label_map = {
        0: "Newborn",
        1: "Dying/Pre-death",
        2: "Coma Patient",
        3: "Mental/Stress Patient"
    }

    return label_map[pred]


In [None]:

import time, os, sys
import numpy as np
import pandas as pd
from scipy.signal import find_peaks
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split

# -------------- Config --------------
FILES = {
    "newborn": "newborn_features.csv",
    "dying":   "predeath_features.csv",
    "coma":    "coma_features.csv",
    "mental":  "mental_features.csv",
}
LABEL_MAP = {0: "Newborn", 1: "Dying/Pre-death", 2: "Coma Patient", 3: "Mental/Stress Patient"}
MIN_LEN = 80          # minimal length for peak detection (tile if shorter)
DURATION_SECONDS = 10.0  # used to estimate bpm; adjust if your input length differs

# -------------- quick file check --------------
missing = [v for v in FILES.values() if not os.path.isfile(v)]
if missing:
    print("ERROR: Missing feature files:", missing)
    print("Make sure these files exist in the notebook working directory.")
    raise FileNotFoundError(f"Missing: {missing}")

# -------------- load and combine feature CSVs (very small) --------------
dfs = []
for i,(k,fname) in enumerate(FILES.items()):
    df = pd.read_csv(fname)
    df = df.copy()
    df['label'] = i   # 0,1,2,3 mapping
    dfs.append(df)
data = pd.concat(dfs, ignore_index=True)
X = data.drop(columns=['label']).values
y = data['label'].values

# -------------- train tiny logistic model (fast) --------------
t0 = time.time()
# Use a tiny pipeline with scaling + logistic regression (liblinear solver is quick on small data)
clf = make_pipeline(StandardScaler(), LogisticRegression(multi_class='ovr', solver='liblinear', C=1.0, max_iter=200, random_state=42))
clf.fit(X, y)
t1 = time.time()
print(f"Trained lightweight classifier in {t1-t0:.2f}s (fast).")

# -------------- fast feature extractor (numpy-only) --------------
def extract_features_fast(sig, duration_seconds=DURATION_SECONDS):
    sig = np.asarray(sig, dtype=float)
    sig = sig[~np.isnan(sig)]
    if sig.size == 0:
        raise ValueError("Signal empty after removing NaNs.")
    mean_val = float(sig.mean())
    std_val  = float(sig.std())
    min_val  = float(sig.min())
    max_val  = float(sig.max())
    # peak distance scaled to signal length (safe)
    distance = max(3, int(len(sig) * 0.05))
    peaks, _ = find_peaks(sig, distance=distance)
    peak_count = int(peaks.size)
    est_bpm = float((peak_count / duration_seconds) * 60.0)
    return np.array([mean_val, std_val, min_val, max_val, peak_count, est_bpm], dtype=float)

# -------------- input parsing helper (very fast) --------------
def parse_input_to_signal(raw_text, min_len=MIN_LEN):
    toks = [t for t in raw_text.replace(',', ' ').split() if t!='']
    if len(toks) == 0:
        raise ValueError("No numeric tokens found. Provide numbers separated by spaces or commas.")
    try:
        arr = np.array([float(x) for x in toks], dtype=float)
    except Exception as e:
        raise ValueError("Failed to parse numbers. Use floats separated by commas or spaces.") from e
    if arr.size < min_len:
        reps = int(np.ceil(min_len / max(1, arr.size)))
        arr = np.tile(arr, reps)[:min_len]
    return arr

# -------------- interactive prompt (simple & fast) --------------
print("\nNow paste ECG values (comma or space separated). Short inputs auto-extended to ~80 samples for detection.")
raw = input("Enter ECG values:\n").strip()
start = time.time()
try:
    sig = parse_input_to_signal(raw)
    feats = extract_features_fast(sig).reshape(1,-1)
    pred = clf.predict(feats)[0]
    probs = clf.predict_proba(feats)[0] if hasattr(clf, "predict_proba") else None
    elapsed = time.time() - start
    print(f"\n Predicted: {LABEL_MAP.get(int(pred),'Class'+str(pred))}  (in {elapsed*1000:.1f} ms)")
    if probs is not None:
        # print compact probabilities
        for i,p in enumerate(probs):
            print(f"  {LABEL_MAP.get(i)}: {p*100:5.1f}%")
    # show features (small)
    print("\nFeatures used: mean, std, min, max, peak_count, est_bpm")
    print(np.round(feats.flatten(), 4))
except Exception as e:
    print("ERROR during prediction:", e)
