In [1]:
DATA_BASE_PATH = "/Users/mirzasharmin/Downloads/Star copy 2"

In [2]:
CUTOFF_FREQUENCY = 100
ORIGINAL_FREQUENCY = 2000
TARGET_FREQUENCY = 400
SEGMENT_DURATION_SECONDS = 4

In [3]:
from src.read_data import read_all_data

patient_dfs, sinus_dfs = read_all_data(DATA_BASE_PATH)

In [4]:
first_patient_key = list(patient_dfs.keys())[0]
first_sinus_keys = list(sinus_dfs.keys())[0]

In [5]:
from src.preprocess_data import filter_all_dataframes

filtered_patient_dfs = filter_all_dataframes(patient_dfs, CUTOFF_FREQUENCY)
filtered_sinus_dfs = filter_all_dataframes(sinus_dfs, CUTOFF_FREQUENCY)

In [6]:
from src.preprocess_data import downsample_data

downsampled_patient_dfs = downsample_data(filtered_patient_dfs, TARGET_FREQUENCY)
downsampled_sinus_dfs = downsample_data(filtered_sinus_dfs, TARGET_FREQUENCY)

In [7]:
from src.preprocess_data import normalize_data

normalized_patient_dfs = normalize_data(downsampled_patient_dfs)
normalized_sinus_dfs = normalize_data(downsampled_sinus_dfs)

In [8]:
from src.preprocess_data import segment_data

patient_segmented_arrays = segment_data(normalized_patient_dfs, segment_duration_sec=SEGMENT_DURATION_SECONDS)
sinus_segmented_arrays = segment_data(normalized_sinus_dfs, segment_duration_sec=SEGMENT_DURATION_SECONDS)

Segmenting into 4-second segments (1600 samples each)...
  /Users/mirzasharmin/Downloads/Star copy 2/Patient 1- study date 21:01:2021/2022_07_11_21_30_06: 58 segments created
  /Users/mirzasharmin/Downloads/Star copy 2/Patient 1- study date 21:01:2021/2022_07_11_21_31_11: 53 segments created
  /Users/mirzasharmin/Downloads/Star copy 2/Patient 1- study date 21:01:2021/2022_07_11_21_32_46: 41 segments created
  /Users/mirzasharmin/Downloads/Star copy 2/Patient 1- study date 21:01:2021/2022_07_11_21_33_51: 52 segments created
  /Users/mirzasharmin/Downloads/Star copy 2/Patient 1- study date 21:01:2021/2022_07_11_21_34_38: 49 segments created
  /Users/mirzasharmin/Downloads/Star copy 2/Patient 1- study date 21:01:2021/2022_07_11_21_35_27: 51 segments created
  /Users/mirzasharmin/Downloads/Star copy 2/Patient 1- study date 21:01:2021/2022_07_11_21_36_35: 50 segments created
  /Users/mirzasharmin/Downloads/Star copy 2/Patient 10- study date 24:05:2021/2022_07_13_13_31_54: 44 segments create

In [9]:
sum([len(segments) for key, segments in patient_segmented_arrays.items()])

1777

In [10]:
sum([len(segments) for key, segments in sinus_segmented_arrays.items()])

304

In [11]:
from src.dataset import prepare_data

train_data, valid_data, test_data, train_labels, valid_labels, test_labels = prepare_data(patient_segmented_arrays, sinus_segmented_arrays)

Patient files: 39
Sinus files: 5

File distribution:
  Train: 27 patient + 3 sinus
  Valid: 6 patient + 1 sinus
  Test:  6 patient + 1 sinus

=== Final Dataset Statistics ===
Training segments: 1407 (197 sinus, 1210 patient)
Validation segments: 332 (57 sinus, 275 patient)
Test segments: 342 (50 sinus, 292 patient)


In [12]:
import torch

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [13]:
from src.training import run_training_pipeline

model, history = run_training_pipeline(train_data, train_labels, valid_data, valid_labels, device)

Starting EGM Classification Pipeline
=== SMOTE Oversampling ===
Before SMOTE:
  Shape: (1407, 51200)
  Sinus: 197 segments
  Patient: 1210 segments

After SMOTE:
  Shape: (2420, 51200)
  Sinus: 1210 segments
  Patient: 1210 segments

DataLoaders created:
Train batches: 76
Test batches: 11

Model created with 829,313 parameters

=== Training Setup ===
Device: cpu
Early stopping patience: 5 epochs


KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

epochs = range(1, len(history['train_losses']) + 1)

# Loss plot

ax1.plot(epochs, history['train_losses'], 'b-', label='Training Loss')
ax1.plot(epochs, history['test_losses'], 'r-', label='Validation Loss')
ax1.set_title('Loss Curves')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.legend()
ax1.grid(True)

# Accuracy plot

ax2.plot(epochs, history['train_accuracies'], 'b-', label='Training Accuracy')
ax2.plot(epochs, history['test_accuracies'], 'r-', label='Validation Accuracy')
ax2.set_title('Accuracy Curves')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy (%)')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.savefig('figures/loss_curves.png')
plt.show()

In [None]:
import torch

torch.save(model.state_dict(), 'save/model.pth')

In [None]:
from src.training import predict_and_evaluate

test_results = predict_and_evaluate(model, test_data, test_labels, device)

In [None]:
test_results

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Extract metrics from results dictionary
metrics = ['Precision', 'Recall', 'F1-Score']
values = [test_results['precision'], test_results['recall'], test_results['f1_score']]

# Create bar chart
plt.figure(figsize=(10, 6))
bars = plt.bar(metrics, values, color=['#2E86AB', '#A23B72', '#F18F01'], alpha=0.8, edgecolor='black', linewidth=1.2)

# Customize the plot
plt.title('Model Performance Metrics', fontsize=16, fontweight='bold', pad=20)
plt.ylabel('Score', fontsize=12)
plt.ylim(0, 1.0)  # Set y-axis from 0 to 1

# Add value labels on top of bars
for bar, value in zip(bars, values):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
             f'{value:.3f}', ha='center', va='bottom', fontsize=12, fontweight='bold')

# Add horizontal grid lines
plt.grid(axis='y', alpha=0.3, linestyle='--')

# Customize appearance
plt.tight_layout()
plt.show()