REQUIREMENTS

In [33]:
from scipy.io import loadmat
import numpy as np, os, sys, joblib
import matplotlib.pyplot as plt
import math
from tqdm import tqdm
import pandas as pd
import random
import tensorflow as tf
import sklearn
from collections import Counter

SEED = 1234
np.random.seed(SEED)
pd.core.common._random_stat = SEED
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)

In [32]:
from sklearn.model_selection import train_test_split
import os
import re

DATA

In [4]:
path_to_data = "./data"

In [21]:
def load_data(file):
  
  original = loadmat(file) #  dictionary with variable names as keys, and loaded matrices as values
  
  spk_file = file.replace('.mat', '_spk.mat')
  spk = loadmat(spk_file)

  ann_file = file.replace('.mat', '_ann.mat')
  ann = loadmat(ann_file)

  return original, spk, ann

In [36]:
def filter_files_by_pattern(directory, pattern):
    """
    Filter files in a directory based on a specified pattern.

    Parameters:
    - directory: The directory containing the files.
    - pattern: The regular expression pattern to match filenames.

    Returns:
    - List of filenames matching the pattern.
    """
    files = os.listdir(directory)
    filtered_files = [file for file in files if re.match(pattern, file)]
    return filtered_files

In [37]:
directory = path_to_data  
pattern = r'S\d{3}_\d{3}\.mat'  # Regular expression pattern matching 'SXXX_YYY.mat'

filtered_files = filter_files_by_pattern(directory, pattern)
print(filtered_files)

['S034_128.mat', 'S024_128.mat', 'S056_128.mat', 'S046_128.mat', 'S091_250.mat', 'S081_250.mat', 'S060_128.mat', 'S012_128.mat', 'S002_128.mat', 'S114_250.mat', 'S104_250.mat', 'S088_250.mat', 'S098_250.mat', 'S061_128.mat', 'S003_128.mat', 'S013_128.mat', 'S105_250.mat', 'S115_250.mat', 'S099_250.mat', 'S089_250.mat', 'S025_128.mat', 'S035_128.mat', 'S080_250.mat', 'S090_250.mat', 'S047_128.mat', 'S057_128.mat', 'S117_250.mat', 'S107_250.mat', 'S011_128.mat', 'S001_128.mat', 'S008_128.mat', 'S018_128.mat', 'S092_250.mat', 'S082_250.mat', 'S055_128.mat', 'S045_128.mat', 'S037_128.mat', 'S027_128.mat', 'S121_250.mat', 'S019_128.mat', 'S009_128.mat', 'S044_128.mat', 'S054_128.mat', 'S083_250.mat', 'S093_250.mat', 'S026_128.mat', 'S036_128.mat', 'S120_250.mat', 'S106_250.mat', 'S116_250.mat', 'S010_128.mat', 'S062_128.mat', 'S048_128.mat', 'S058_128.mat', 'S015_128.mat', 'S005_128.mat', 'S113_250.mat', 'S103_250.mat', 'S051_128.mat', 'S041_128.mat', 'S096_250.mat', 'S086_250.mat', 'S033_1

In [40]:
train_files = filtered_files[:round(len(filtered_files)*0.8)]   

In [47]:
train_files

['S034_128.mat',
 'S024_128.mat',
 'S056_128.mat',
 'S046_128.mat',
 'S091_250.mat',
 'S081_250.mat',
 'S060_128.mat',
 'S012_128.mat',
 'S002_128.mat',
 'S114_250.mat',
 'S104_250.mat',
 'S088_250.mat',
 'S098_250.mat',
 'S061_128.mat',
 'S003_128.mat',
 'S013_128.mat',
 'S105_250.mat',
 'S115_250.mat',
 'S099_250.mat',
 'S089_250.mat',
 'S025_128.mat',
 'S035_128.mat',
 'S080_250.mat',
 'S090_250.mat',
 'S047_128.mat',
 'S057_128.mat',
 'S117_250.mat',
 'S107_250.mat',
 'S011_128.mat',
 'S001_128.mat',
 'S008_128.mat',
 'S018_128.mat',
 'S092_250.mat',
 'S082_250.mat',
 'S055_128.mat',
 'S045_128.mat',
 'S037_128.mat',
 'S027_128.mat',
 'S121_250.mat',
 'S019_128.mat',
 'S009_128.mat',
 'S044_128.mat',
 'S054_128.mat',
 'S083_250.mat',
 'S093_250.mat',
 'S026_128.mat',
 'S036_128.mat',
 'S120_250.mat',
 'S106_250.mat',
 'S116_250.mat',
 'S010_128.mat',
 'S062_128.mat',
 'S048_128.mat',
 'S058_128.mat',
 'S015_128.mat',
 'S005_128.mat',
 'S113_250.mat',
 'S103_250.mat',
 'S051_128.mat

In [46]:
validation_files = filtered_files[(round(len(filtered_files)*0.8)):]

In [48]:
validation_files

['S029_128.mat',
 'S039_128.mat',
 'S110_250.mat',
 'S100_250.mat',
 'S016_128.mat',
 'S006_128.mat',
 'S038_128.mat',
 'S028_128.mat',
 'S101_250.mat',
 'S079_250.mat',
 'S111_250.mat',
 'S007_128.mat',
 'S017_128.mat',
 'S021_128.mat',
 'S031_128.mat',
 'S043_128.mat',
 'S053_128.mat',
 'S084_250.mat',
 'S094_250.mat',
 'S118_250.mat',
 'S108_250.mat']

In [51]:
train = []
ecg_train = []
spk_train = []
ann_train = []

for file in train_files:
  ecg, spk, ann = load_data(path_to_data + '/' + file)
  ecg_train.append(ecg)
  spk_train.append(spk)
  ann_train.append(ann)

  train.append(path_to_data + '/' + file)


In [53]:
validation = []
ecg_validation = []
spk_validation = []
ann_validation = []

for file in validation_files:
  
  ecg, spk, ann = load_data(path_to_data + '/' + file)
  ecg_validation.append(ecg)
  spk_validation.append(spk)
  ann_validation.append(ann)
  
  validation.append(path_to_data + '/' + file)   

PREPROCESSING

In [54]:
! pip install neurokit2 tdqm

Collecting tdqm
  Downloading tdqm-0.0.1.tar.gz (1.4 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: tdqm
  Building wheel for tdqm (setup.py) ... [?25ldone
[?25h  Created wheel for tdqm: filename=tdqm-0.0.1-py3-none-any.whl size=1322 sha256=f237875888925ceb27f58e8c0b667c5283dc23c5eeb877b52e6a58a5a42a62eb
  Stored in directory: /Users/anadrmic/Library/Caches/pip/wheels/af/02/71/aae0f7ee738abf19498353918ddae0f90a0d6ceb337b0bbc91
Successfully built tdqm
Installing collected packages: tdqm
Successfully installed tdqm-0.0.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [55]:
def freq(filename):
  if '128' in filename:
    return 128
  else:
    return 250

In [56]:

import scipy
     