In [1]:
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt
from sklearn.preprocessing import MinMaxScaler
from scipy.io import savemat
# from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

# Bandpass Filter

def bandpass_filter(data, lowcut=0.5, highcut=40, fs=173.61, order=5):
    nyquist = 0.5 * fs
    low, high = lowcut / nyquist, highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return filtfilt(b, a, data)

# Preprocessing Function

def preprocess_signal(signal, fs=173.61, segment_length=None):
    # Bandpass Filter
    filtered = bandpass_filter(signal, 0.5, 50, fs)

    # Segmentation WITH OVERLAP
    if segment_length is None:
        segment_length = int(5 * fs)  # 5-second segments = 868 samples
    
    step_size = int(segment_length * (1 - 0.5))  # 50% overlap = 434 samples
    
    segments = []
    for start in range(0, len(filtered) - segment_length + 1, step_size):
        end = start + segment_length
        segments.append(filtered[start:end])
    
    segments = np.array(segments)

    # Z-score normalization (per segment)
    normed_segments = np.zeros_like(segments)
    for i in range(segments.shape[0]):
        mean = segments[i].mean()
        std = segments[i].std()
        if std > 0:  # Avoid division by zero
            normed_segments[i] = (segments[i] - mean) / std
        else:
            normed_segments[i] = segments[i] - mean  # Just center if std=0

    return normed_segments, filtered, segments


def plot_segmentation_detailed(filtered, segments, fs=173.61, segment_length=None, filename=""):
   
    if segment_length is None:
        segment_length = int(5 * fs)
    
    num_segments = len(segments)
    
    # Create figure
    fig, ax = plt.subplots(figsize=(14, 5))
    
    # Plot the full filtered signal
    time = np.arange(len(filtered)) / fs
    ax.plot(time, filtered, 'b-', linewidth=0.8, alpha=0.7)
    
    # Add vertical lines to show segment boundaries
    for i in range(num_segments + 1):
        segment_time = i * segment_length / fs
        ax.axvline(x=segment_time, color='red', linestyle='--', 
                   linewidth=2, alpha=0.8, label='Segment boundary' if i == 0 else '')
    
    # Shade different segments with alternating colors
    for i in range(num_segments):
        start_time = i * segment_length / fs
        end_time = (i + 1) * segment_length / fs
        color = 'yellow' if i % 2 == 0 else 'lightgreen'
        ax.axvspan(start_time, end_time, alpha=0.2, color=color)
        # Add segment labels
        mid_time = (start_time + end_time) / 2
        ax.text(mid_time, ax.get_ylim()[1] * 0.9, f'Seg {i+1}', 
                ha='center', fontsize=10, fontweight='bold')
    
    ax.set_xlabel('Time (seconds)', fontsize=12)
    ax.set_ylabel('Amplitude', fontsize=12)
    
    title = f'Full Signal with Segmentation Boundaries'
    if filename:
        title += f' ({filename})'
    title += f'\nTotal Length: {len(filtered)} samples ({len(filtered)/fs:.2f} sec) | '
    title += f'Segment Length: {segment_length} samples ({segment_length/fs:.2f} sec) | '
    title += f'Number of Segments: {num_segments}'
    
    ax.set_title(title, fontsize=13, fontweight='bold')
    ax.legend(loc='upper right')
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()


# Load & Preprocess Dataset with File ID Tracking
def load_raw_files(root_dir):
    classes = {
        "Z": "NORMAL",
        "O": "NORMAL",
        "N": "INTERICTAL",
        "F": "INTERICTAL",
        "S": "ICTAL"
    }

    raw_signals = []
    raw_labels = []
    file_paths = []

    for folder, label in classes.items():
        folder_path = os.path.join(root_dir, folder)
        files = glob.glob(os.path.join(folder_path, "**", "*.txt"), recursive=True)

        for f in files:
            try:
                sig = np.loadtxt(f)
                raw_signals.append(sig)
                raw_labels.append(label)
                file_paths.append(f)
            except:
                print("Error loading:", f)

    return np.array(raw_signals, dtype=object), np.array(raw_labels), file_paths


# ----------------------------------------------------------
#            APPLY YOUR PREPROCESSING ON K-FOLD
# ----------------------------------------------------------

def apply_kfold_preprocessing(root_dir, save_dir, n_folds=5):

    os.makedirs(save_dir, exist_ok=True)

    raw_signals, raw_labels, file_paths = load_raw_files(root_dir)

    label_map = {"NORMAL":0, "INTERICTAL":1, "ICTAL":2}
    y_raw = np.array([label_map[l] for l in raw_labels])

    kf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)

    for fold, (train_idx, test_idx) in enumerate(kf.split(raw_signals,y_raw)):

        print(f"\n========== Fold {fold+1}/{n_folds} ==========")

        X_train_list, y_train_list, fileid_train = [], [], []
        X_test_list,  y_test_list,  fileid_test  = [], [], []

        # ----------------------
        # TRAIN FILES
        # ----------------------
        for i in train_idx:
            raw = raw_signals[i]
            segs, _, _ = preprocess_signal(raw)    # ← USING YOUR FUNCTION

            X_train_list.append(segs)
            y_train_list.append(np.full(len(segs), y_raw[i]))
            fileid_train.append(np.full(len(segs), i))

        # ----------------------
        # TEST FILES
        # ----------------------
        for i in test_idx:
            raw = raw_signals[i]
            segs, _, _ = preprocess_signal(raw)    # ← USING YOUR FUNCTION

            X_test_list.append(segs)
            y_test_list.append(np.full(len(segs), y_raw[i]))
            fileid_test.append(np.full(len(segs), i))

        # Merge
        X_train = np.vstack(X_train_list)
        y_train = np.concatenate(y_train_list)
        fileid_train = np.concatenate(fileid_train)

        X_test = np.vstack(X_test_list)
        y_test = np.concatenate(y_test_list)
        fileid_test = np.concatenate(fileid_test)

        # Save each fold
        np.save(os.path.join(save_dir, f"fold_{fold}_X_train.npy"), X_train)
        np.save(os.path.join(save_dir, f"fold_{fold}_y_train.npy"), y_train)
        np.save(os.path.join(save_dir, f"fold_{fold}_fileids_train.npy"), fileid_train)

        np.save(os.path.join(save_dir, f"fold_{fold}_X_test.npy"), X_test)
        np.save(os.path.join(save_dir, f"fold_{fold}_y_test.npy"), y_test)
        np.save(os.path.join(save_dir, f"fold_{fold}_fileids_test.npy"), fileid_test)

        print("Train:", X_train.shape, "Test:", X_test.shape)

    print("\nAll folds saved successfully!")




# Main
if __name__ == "__main__":
    root = r"C:\Users\pc\Desktop\Graduation Project\GP-Epileptic-seizures"
    save = os.path.join(root, "Preprocessing_Updated_Kfold")

    apply_kfold_preprocessing(root, save, n_folds=5)


Train: (3200, 868) Test: (800, 868)

Train: (3200, 868) Test: (800, 868)

Train: (3200, 868) Test: (800, 868)

Train: (3200, 868) Test: (800, 868)

Train: (3200, 868) Test: (800, 868)

All folds saved successfully!
