<a href="https://colab.research.google.com/github/mythogenesys/ecg-denoiser-hackathon/blob/main/notebooks/ecg-denoiser-training-2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import json
from google.colab import files

# Step 1: Upload the notebook you downloaded from Colab
uploaded = files.upload()
filename = list(uploaded.keys())[0]

# Step 2: Load and fix metadata
with open(filename, "r", encoding="utf-8") as f:
    nb = json.load(f)

# Remove only the widgets metadata if present
if "widgets" in nb.get("metadata", {}):
    del nb["metadata"]["widgets"]
    print("Removed 'metadata.widgets'")

# Step 3: Save fixed notebook
fixed_filename = "fixed_" + filename
with open(fixed_filename, "w", encoding="utf-8") as f:
    json.dump(nb, f, indent=2)

print("Saved cleaned notebook as", fixed_filename)

# Step 4: Download back to your computer
files.download(fixed_filename)


In [None]:
# ==============================================================================
#      FINAL: Full ECG Denoising Ablation & Validation Study Notebook
# ==============================================================================
# This notebook consolidates all setup, environment fixes, code synchronization,
# training, and validation into a single, runnable block.
#
# Instructions:
# 1. Ensure your data is in Google Drive at:
#    /content/drive/MyDrive/ecg_denoiser_hackathon/data/
# 2. Run this entire cell.
# 3. The process will take a significant amount of time (1-2 hours depending on
#    the number of epochs).
# 4. Final results (trained models and confusion matrices) will be saved back
#    to your Google Drive.
# ==============================================================================

import os

# ---
# CELL 1: SETUP - Mount Drive & Clone Repository
# ---
print("--- [Step 1/5] Setting up the environment ---")
from google.colab import drive
drive.mount('/content/drive')

# Your specific GitHub repository and project folder name
GIT_REPO = "ecg-denoiser-hackathon"
GIT_PATH = f"https://github.com/Mohan-CAS-and-hackathons/{GIT_REPO}.git"

if not os.path.exists(GIT_REPO):
    print(f"\nCloning repository from {GIT_PATH}...")
    !git clone {GIT_PATH}
else:
    print(f"\nRepository '{GIT_REPO}' already exists. Skipping clone.")

# CRITICAL: Change directory into the repository for all subsequent commands
os.chdir(GIT_REPO)
print(f"✅ Successfully changed directory to: {os.getcwd()}")




--- [Step 1/5] Setting up the environment ---
Mounted at /content/drive

Cloning repository from https://github.com/Mohan-CAS-and-hackathons/ecg-denoiser-hackathon.git...
Cloning into 'ecg-denoiser-hackathon'...
remote: Enumerating objects: 164, done.[K
remote: Counting objects: 100% (109/109), done.[K
remote: Compressing objects: 100% (82/82), done.[K
remote: Total 164 (delta 49), reused 83 (delta 24), pack-reused 55 (from 1)[K
Receiving objects: 100% (164/164), 87.49 MiB | 4.93 MiB/s, done.
Resolving deltas: 100% (67/67), done.
✅ Successfully changed directory to: /content/ecg-denoiser-hackathon


In [None]:
!pwd
!ls

/content/ecg-denoiser-hackathon
all_beats.npy	docs	   requirements.txt   src
all_labels.npy	models	   RESEARCH_PAPER.md  STPC_research_paper.md
app.py		notebooks  results	      triangular_pulse_appendix.md
assets		README.md  samples	      tutorials


In [None]:
# ---
# CELL 2: CODE SYNC & ENVIRONMENT CORRECTION
# ---
print("\n--- [Step 2/5] Synchronizing source code and fixing environment ---")

# --- Part A: Overwrite all src files with your final, correct versions ---
print("Overwriting local files with your provided source code...")




--- [Step 2/5] Synchronizing source code and fixing environment ---
Overwriting local files with your provided source code...


In [None]:
# --- Part B: Install dependencies and upgrade wfdb ---
print("\nInstalling dependencies and upgrading wfdb...")
!pip install -r requirements.txt
!pip uninstall -y wfdb
!pip install --upgrade wfdb

print("\n\n✅✅✅ Environment is fully prepared. Proceeding to training. ✅✅✅")





Installing dependencies and upgrading wfdb...
Collecting wfdb (from -r requirements.txt (line 13))
  Downloading wfdb-4.3.0-py3-none-any.whl.metadata (3.8 kB)
Collecting streamlit (from -r requirements.txt (line 16))
  Downloading streamlit-1.49.1-py3-none-any.whl.metadata (9.5 kB)
INFO: pip is looking at multiple versions of wfdb to determine which version is compatible with other requirements. This could take a while.
Collecting wfdb (from -r requirements.txt (line 13))
  Downloading wfdb-4.2.0-py3-none-any.whl.metadata (3.7 kB)
  Downloading wfdb-4.1.2-py3-none-any.whl.metadata (4.3 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit->-r requirements.txt (line 16))
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading wfdb-4.1.2-py3-none-any.whl (159 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m160.0/160.0 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading streamlit-1.49.1-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━

In [None]:
# ---
# CELL 3: TRAINING - Run the Ablation Study
# ---
# NOTE: You can edit src/train.py and change NUM_EPOCHS to a smaller number (e.g., 20)
# to make this process faster for testing.

print("\n--- [Step 3/5] Starting Ablation Study Training ---")

DRIVE_BASE_PATH = '/content/drive/MyDrive/ecg_denoiser_hackathon/'
MODEL_DIR = os.path.join(DRIVE_BASE_PATH, 'models')
os.makedirs(MODEL_DIR, exist_ok=True)

MODEL_PATH_L1_ONLY = os.path.join(MODEL_DIR, 'denoiser_l1_only.pth')
MODEL_PATH_L1_GRAD = os.path.join(MODEL_DIR, 'denoiser_l1_grad.pth')
MODEL_PATH_STPC_FULL = os.path.join(MODEL_DIR, 'denoiser_stpc_full.pth')

print("\n--- [Run 1/3] Training Model with L1 Loss Only ---")
!python3 src/train.py \
    --model_save_path "{MODEL_PATH_L1_ONLY}" \
    --no-gradient-loss \
    --no-fft-loss

print("\n--- [Run 2/3] Training Model with L1 + Gradient Loss ---")
!python3 src/train.py \
    --model_save_path "{MODEL_PATH_L1_GRAD}" \
    --no-fft-loss

print("\n--- [Run 3/3] Training Model with Full STPC Loss ---")
!python3 src/train.py \
    --model_save_path "{MODEL_PATH_STPC_FULL}"

print("\n✅ Ablation study training complete!")





--- [Step 3/5] Starting Ablation Study Training ---

--- [Run 1/3] Training Model with L1 Loss Only ---
Using device: cuda
Gradient Loss Enabled: False
FFT Loss Enabled: False
Model will be saved to: /content/drive/MyDrive/ecg_denoiser_hackathon/models/denoiser_l1_only.pth
Initializing dataset: loading all clean record names...
Loading all noise signals into memory...
Loading all clean signals into memory for faster access...
100% 48/48 [00:20<00:00,  2.30it/s]
Dataset initialized with 48 usable clean signals.
  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
100% 313/313 [00:19<00:00, 15.98it/s, loss=0.13]
Epoch 1/50, Average Loss: 0.201804
Model saved to /content/drive/MyDrive/ecg_denoiser_hackathon/models/denoiser_l1_only.pth
100% 313/313 [00:18<00:00, 17.11it/s, loss=0.158]
Epoch 2/50, Average Loss: 0.156606
Model saved to /content/drive/MyDrive/ecg_denoiser_hackathon/models/denoiser_l1_only.pth
100% 313/313 [00:18<00:00, 17.00it/s, loss=0.13]
Epoch 3/50, Av

In [None]:
# ---
# CELL 4: VALIDATION - Run End-to-End Validation for the Ablation Study
# ---
print("\n--- [Step 4/5] Starting End-to-End Validation for All Models ---")

RESULTS_DIR = os.path.join(DRIVE_BASE_PATH, 'results')
os.makedirs(RESULTS_DIR, exist_ok=True)

OUTPUT_PREFIX_L1_ONLY = os.path.join(RESULTS_DIR, 'l1_only')
OUTPUT_PREFIX_L1_GRAD = os.path.join(RESULTS_DIR, 'l1_grad')
OUTPUT_PREFIX_STPC_FULL = os.path.join(RESULTS_DIR, 'stpc_full')

print("\n--- [Run 1/3] Validating Model with L1 Loss Only ---")
!python3 src/validate_end_to_end.py \
    --denoiser_model_path "{MODEL_PATH_L1_ONLY}" \
    --output_prefix "{OUTPUT_PREFIX_L1_ONLY}"

print("\n--- [Run 2/3] Validating Model with L1 + Gradient Loss ---")
!python3 src/validate_end_to_end.py \
    --denoiser_model_path "{MODEL_PATH_L1_GRAD}" \
    --output_prefix "{OUTPUT_PREFIX_L1_GRAD}"

print("\n--- [Run 3/3] Validating Model with Full STPC Loss ---")
!python3 src/validate_end_to_end.py \
    --denoiser_model_path "{MODEL_PATH_STPC_FULL}" \
    --output_prefix "{OUTPUT_PREFIX_STPC_FULL}"

print("\n\n✅✅✅ Ablation study validation complete! ✅✅✅")


# ---
# CELL 5: FINAL CHECK - Verify Output Files
# ---
print("\n--- [Step 5/5] Verifying output files in Google Drive ---")
print("\nTrained Models:")
!ls -lh {MODEL_DIR}

print("\nValidation Results (Confusion Matrices):")
!ls -lh {RESULTS_DIR}


--- [Step 4/5] Starting End-to-End Validation for All Models ---

--- [Run 1/3] Validating Model with L1 Loss Only ---
--- Starting End-to-End Validation ---
Using Device: cuda
Test Record: 201, Noise Level: 0 dB SNR
Loading Denoiser from: /content/drive/MyDrive/ecg_denoiser_hackathon/models/denoiser_l1_only.pth
Output file prefix: /content/drive/MyDrive/ecg_denoiser_hackathon/results/l1_only
Loading models...
Downloading fresh copy of record '201' from PhysioNet to bypass local read errors...
Generating record list for: 201
Generating list of all files for: 201
Downloading files...
Finished downloading files
Download complete.
Synthesizing a highly noisy signal...
Available noise types: dict_keys(['baseline_wander', 'electrode_motion', 'muscle_artifact'])
Using 'baseline_wander' noise for validation.
Denoising the signal with the U-Net model...
Classifying beats from all three signal types...

--- PERFORMANCE ON NOISY SIGNAL ---
              precision    recall  f1-score   support

