In [1]:
import os
import shutil

# ✅ Extract AF label from header file
def get_af_label(hea_path):
    try:
        with open(hea_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                line_lower = line.lower()
                if "paroxysmal atrial fibrillation" in line_lower:
                    return "PAF"
                elif "persistent atrial fibrillation" in line_lower:
                    return "AF"
                elif "non atrial fibrillation" in line_lower:
                    return "NonAF"
    except Exception as e:
        print(f"⚠️ Error reading {hea_path}: {e}")
    return "Unknown"

# ✅ Input base path (updated folder name to PAF-data)
base_input_path = "/kaggle/input/PAF-data/paroxysmal-atrial-fibrillation-events-detection-from-dynamic-ecg-recordings-the-4th-china-physiological-signal-challenge-2021-1.0.0"
training_sets = ['Training_set_I', 'Training_set_II']
input_paths = [os.path.join(base_input_path, ts) for ts in training_sets]

# ✅ Output base path (Kaggle working directory)
output_base_path = "/kaggle/working"
target_labels = ['AF', 'PAF', 'NonAF']

# ✅ Create output folders
for label in target_labels:
    os.makedirs(os.path.join(output_base_path, label), exist_ok=True)

# ✅ Dictionary to count saved files
file_counts = {label: 0 for label in target_labels}

# ✅ Process both training sets
for set_path in input_paths:
    if not os.path.isdir(set_path):
        print(f"⚠️ Folder not found: {set_path}")
        continue

    for file in os.listdir(set_path):
        if file.endswith('.hea'):
            base_name = file[:-4]
            hea_path = os.path.join(set_path, file)
            label = get_af_label(hea_path)

            if label in target_labels:
                for ext in ['.hea', '.atr', '.dat']:
                    src = os.path.join(set_path, base_name + ext)
                    dest = os.path.join(output_base_path, label, base_name + ext)

                    if os.path.exists(src):
                        shutil.copy2(src, dest)
                file_counts[label] += 1
            else:
                print(f"❓ Skipped unlabelled: {file}")

# ✅ Print summary
print("\n✅ Done! Files segregated:")
for label in target_labels:
    print(f"{label}: {file_counts[label]} records")


⚠️ Folder not found: /kaggle/input/PAF-data/paroxysmal-atrial-fibrillation-events-detection-from-dynamic-ecg-recordings-the-4th-china-physiological-signal-challenge-2021-1.0.0/Training_set_I
⚠️ Folder not found: /kaggle/input/PAF-data/paroxysmal-atrial-fibrillation-events-detection-from-dynamic-ecg-recordings-the-4th-china-physiological-signal-challenge-2021-1.0.0/Training_set_II

✅ Done! Files segregated:
AF: 0 records
PAF: 0 records
NonAF: 0 records


In [2]:
import os

base_input_path = "/kaggle/input/PAF-data"

# List all subdirectories
for root, dirs, files in os.walk(base_input_path):
    print("📁", root)
    for d in dirs:
        print("   └──", d)
    break  # Only list top-level


In [3]:
import os

print("📦 Kaggle Input Folders:")
for d in os.listdir("/kaggle/input"):
    print(" -", d)


📦 Kaggle Input Folders:
 - paf-data


In [4]:
import os

base_input_path = "/kaggle/input/paf-data"

print(f"📁 Listing folders inside: {base_input_path}")
for root, dirs, files in os.walk(base_input_path):
    print("📁", root)
    for d in dirs:
        print("   └──", d)
    break  # Only show top-level structure


📁 Listing folders inside: /kaggle/input/paf-data
📁 /kaggle/input/paf-data
   └── PAF


In [5]:
import os
import shutil

# ✅ Correct base path based on Kaggle folder structure
base_input_path = "/kaggle/input/paf-data/PAF"
training_sets = ["Training_set_I", "Training_set_II"]
output_base = "/kaggle/working/segregated_data"

# ✅ Function to determine category from .hea file
def get_af_label(hea_path):
    try:
        with open(hea_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                line_lower = line.lower()
                if "paroxysmal atrial fibrillation" in line_lower:
                    return "PAF"
                elif "persistent atrial fibrillation" in line_lower:
                    return "AF"
                elif "non atrial fibrillation" in line_lower:
                    return "NonAF"
    except Exception as e:
        print(f"⚠️ Error reading {hea_path}: {e}")
    
    return "Unknown"

# ✅ Process each training set independently
global_counts = {}

for ts in training_sets:
    ts_path = os.path.join(base_input_path, ts)
    ts_output = os.path.join(output_base, ts)
    
    if not os.path.exists(ts_path):
        print(f"⚠️ Folder not found: {ts_path}")
        continue

    # Create subfolders for AF, PAF, NonAF under each training set
    categories = ["AF", "PAF", "NonAF"]
    for cat in categories:
        os.makedirs(os.path.join(ts_output, cat), exist_ok=True)
    
    counts = {"AF": 0, "PAF": 0, "NonAF": 0, "Unknown": 0}

    for fname in os.listdir(ts_path):
        if fname.endswith(".hea"):
            record_name = fname.replace(".hea", "")
            hea_path = os.path.join(ts_path, record_name + ".hea")
            label = get_af_label(hea_path)

            if label in categories:
                dest_dir = os.path.join(ts_output, label)
                for ext in [".hea", ".atr", ".dat"]:
                    src = os.path.join(ts_path, record_name + ext)
                    if os.path.exists(src):
                        shutil.copy(src, dest_dir)
                counts[label] += 1
            else:
                counts["Unknown"] += 1

    global_counts[ts] = counts

# ✅ Final report
print("\n✅ Done! Files segregated into AF, PAF, and NonAF folders per training set:")
for ts, counts in global_counts.items():
    print(f"\n📂 {ts}:")
    for label, count in counts.items():
        print(f"  {label}: {count} records")



✅ Done! Files segregated into AF, PAF, and NonAF folders per training set:

📂 Training_set_I:
  AF: 153 records
  PAF: 96 records
  NonAF: 470 records
  Unknown: 0 records

📂 Training_set_II:
  AF: 322 records
  PAF: 133 records
  NonAF: 251 records
  Unknown: 0 records


In [6]:
import shutil

zip_path = "/kaggle/working/segregated_data.zip"
shutil.make_archive(base_name=zip_path.replace('.zip', ''), format='zip', root_dir=output_base)

print(f"\n✅ ZIP archive created at: {zip_path}")



✅ ZIP archive created at: /kaggle/working/segregated_data.zip


In [7]:
import shutil
import os

# Path to the folder you want to zip
folder_to_zip_1 = "/kaggle/working/segregated_data/Training_set_I"
folder_to_zip_2 = "/kaggle/working/segregated_data/Training_set_II"

# Output zip paths (will be saved inside /kaggle/working/)
zip_output_1 = "/kaggle/working/Training_set_I.zip"
zip_output_2 = "/kaggle/working/Training_set_II.zip"

# Create zip files
shutil.make_archive(zip_output_1[:-4], 'zip', folder_to_zip_1)
shutil.make_archive(zip_output_2[:-4], 'zip', folder_to_zip_2)

print(f"✅ Created zip files:\n - {zip_output_1}\n - {zip_output_2}")


✅ Created zip files:
 - /kaggle/working/Training_set_I.zip
 - /kaggle/working/Training_set_II.zip


In [8]:
import os

print("Files in /kaggle/working/:")
print(os.listdir("/kaggle/working/"))

print("\nFiles in segregated_data/Training_set_I:")
print(os.listdir("/kaggle/working/segregated_data/Training_set_I")[:5])

print("\nFiles in segregated_data/Training_set_II:")
print(os.listdir("/kaggle/working/segregated_data/Training_set_II")[:5])


Files in /kaggle/working/:
['PAF', 'Training_set_II.zip', 'Training_set_I.zip', 'AF', '__notebook__.ipynb', 'segregated_data.zip', 'NonAF', 'segregated_data']

Files in segregated_data/Training_set_I:
['PAF', 'AF', 'NonAF']

Files in segregated_data/Training_set_II:
['PAF', 'AF', 'NonAF']


In [9]:
from IPython.display import FileLink, display

display(FileLink("/kaggle/working/Training_set_I.zip"))
display(FileLink("/kaggle/working/Training_set_II.zip"))


In [10]:
from IPython.display import FileLink, display

display(FileLink("Training_set_I.zip"))
display(FileLink("Training_set_II.zip"))


In [11]:
!pip install wfdb


Collecting wfdb
  Downloading wfdb-4.3.0-py3-none-any.whl.metadata (3.8 kB)
Downloading wfdb-4.3.0-py3-none-any.whl (163 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.8/163.8 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: wfdb
Successfully installed wfdb-4.3.0


In [12]:
import os
import wfdb
import scipy.io

def convert_dat_to_mat(record_path, output_mat_path):
    try:
        record = wfdb.rdrecord(record_path)
        signal_data = record.p_signal
        scipy.io.savemat(output_mat_path, {'ecg_signal': signal_data})
        print(f"✅ Converted {record_path} to {output_mat_path}")
    except Exception as e:
        print(f"⚠️ Error converting {record_path}: {e}")

def batch_convert(base_input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    # List category folders like AF, PAF, NonAF
    for category in os.listdir(base_input_folder):
        category_path = os.path.join(base_input_folder, category)
        if os.path.isdir(category_path):
            # Create corresponding category folder in output
            output_category_folder = os.path.join(output_folder, category)
            os.makedirs(output_category_folder, exist_ok=True)
            
            # Now list records inside category folder
            for record_name in os.listdir(category_path):
                record_dir = os.path.join(category_path, record_name)
                if os.path.isdir(record_dir):
                    record_file_prefix = os.path.join(record_dir, record_name)
                    output_mat_path = os.path.join(output_category_folder, record_name + ".mat")
                    convert_dat_to_mat(record_file_prefix, output_mat_path)

# === Set your actual paths ===
training_set_1_folder = "/kaggle/working/segregated_data/Training_set_I"
training_set_2_folder = "/kaggle/working/segregated_data/Training_set_II"

output_mat_folder_1 = "/kaggle/working/mat_files/Training_set_I"
output_mat_folder_2 = "/kaggle/working/mat_files/Training_set_II"

print("Starting conversion for Training_set_I...")
batch_convert(training_set_1_folder, output_mat_folder_1)

print("\nStarting conversion for Training_set_II...")
batch_convert(training_set_2_folder, output_mat_folder_2)

print("\n✅ All conversions completed!")


Starting conversion for Training_set_I...

Starting conversion for Training_set_II...

✅ All conversions completed!
