In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/Colab Notebooks/

/content/drive/MyDrive/Colab Notebooks


In [3]:
%run './imports.ipynb'

Collecting pyedflib
  Downloading pyEDFlib-0.1.34-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pyedflib
Successfully installed pyedflib-0.1.34


In [4]:
def get_ids(folder_path):
    # Get all the files in the folder
    files = os.listdir(folder_path)
    # Extract the IDs from the filenames
    ids = []
    for file in files:
        if file.startswith("shhs1-") and file.endswith(".edf"):
            id_val = file[len("shhs1-"):-len(".edf")]
            try:
                id_val = int(id_val)
                ids.append(id_val)
            except ValueError:
                print(f"Warning: Invalid ID format in filename '{file}'. Skipping.")

    return ids

In [5]:
def eeg_freq_extraction(eeg_data, sf):
    psd_data = fft(eeg_data)
    freq_data = fftfreq(eeg_data.size, 1/sf)


    #Making the frequency of non desired range as zero
    # Delta 0 to 4
    # Theta 4 to 8
    # alpha 8 to 12
    #sigma 12 to 16
    # beta 16 to 20
    #Gamma 1 20 to 34
    # gamma2 34 to 100
    psd_delta = copy.deepcopy(psd_data)
    psd_theta = copy.deepcopy(psd_data)
    psd_alpha = copy.deepcopy(psd_data)
    psd_sigma = copy.deepcopy(psd_data)
    psd_beta = copy.deepcopy(psd_data)
    psd_gamma1 = copy.deepcopy(psd_data)
    psd_gamma2 = copy.deepcopy(psd_data)

    for i in range(0, eeg_data.size):
      if ~(abs(freq_data[i]>=0) and abs(freq_data[i]<3.9)):
          psd_delta[i]=0
      if ~(abs(freq_data[i]>=4) and abs(freq_data[i]<7.9)):
          psd_theta[i]=0
      if ~(abs(freq_data[i]>=8) and abs(freq_data[i]<11.9)):
          psd_alpha[i]=0
      if ~(abs(freq_data[i]>=12) and abs(freq_data[i]<15.9)):
          psd_sigma[i]=0
      if ~(abs(freq_data[i]>=16) and abs(freq_data[i]<19.9)):
          psd_beta[i]=0
      if ~(abs(freq_data[i]>=20) and abs(freq_data[i]<33.9)):
          psd_gamma1[i]=0
      if ~(abs(freq_data[i]>=34) and abs(freq_data[i]<99.9)):
          psd_gamma2[i]=0

    # #Computing inverse fft
    delta_spectrum = irfft(psd_delta, n=eeg_data.size)
    theta_spectrum = irfft(psd_theta, n=eeg_data.size)
    alpha_spectrum = irfft(psd_alpha, n=eeg_data.size)
    sigma_spectrum = irfft(psd_sigma, n=eeg_data.size)
    beta_spectrum = irfft(psd_beta, n=eeg_data.size)
    gamma1_spectrum = irfft(psd_gamma1, n=eeg_data.size)
    gamma2_spectrum = irfft(psd_gamma2, n=eeg_data.size)

    eeg_freq_bands = pd.DataFrame(columns=['Raw_EEG','Delta', 'Theta', 'Alpha', 'Sigma', 'Beta', 'Gamma1', 'Gamma2'])

    eeg_freq_bands['Raw_EEG'] = eeg_data
    eeg_freq_bands['Delta'] = delta_spectrum
    eeg_freq_bands['Theta'] = theta_spectrum
    eeg_freq_bands['Alpha'] = alpha_spectrum
    eeg_freq_bands['Sigma'] = sigma_spectrum
    eeg_freq_bands['Beta'] = beta_spectrum
    eeg_freq_bands['Gamma1'] = gamma1_spectrum
    eeg_freq_bands['Gamma2'] = gamma2_spectrum

    #eeg_freq_bands.to_csv('bands.csv')
    #print(eeg_freq_bands)

    return eeg_freq_bands

In [6]:
def psd_eeg(raw_data, sf):
  data = np.array(raw_data)
  #time_array = np.array(signal_data['time_sec'])

# Moving window size = 30sec

  #getting the data points for every 30 seconds of data and getting frequency band outputs
  n = 30*sf  #window size
  #for entire data
  list_eeg_data = [data[i:i+n] for i in range(0,len(data),30*sf)]

  freq_bands = pd.DataFrame(columns = ['Raw_EEG','Delta', 'Theta', 'Alpha', 'Sigma', 'Beta', 'Gamma1', 'Gamma2'])
  # #Pass the window data and extract the frequency bands for each window
  for i in range(0,len(list_eeg_data)):
      #print(list_eeg_data)
      bands = eeg_freq_extraction(list_eeg_data[i], sf)
      freq_bands = pd.concat([freq_bands, bands], ignore_index=True)
  return freq_bands

In [7]:
def remove_consecutive_zeros(df):
    # Find the index of the first row with a non-zero "sleep_stage" value
    first_nonzero_index = (df['sleep_stage'] != 0).idxmax()

    # Find the index of the last row with a non-zero "sleep_stage" value
    last_nonzero_index = (df['sleep_stage'] != 0).iloc[::-1].idxmax()

    # Remove the initial and trailing rows with consecutive zeros
    df = df.loc[first_nonzero_index:last_nonzero_index]

    # Reset the index of the DataFrame
    df.reset_index(drop=True, inplace=True)

    return df

In [8]:
def readData(id_val):
  #Read edf file and also sleep stages csv file
  file_name = "./Data/Raw/MSA_male/shhs1-{}.edf".format(id_val)
  patient_id = id_val
  f = pyedflib.EdfReader(file_name)
  signal_labels = f.getSignalLabels()
  print("List of Signals = ", signal_labels)
  sleep_stages = pd.read_csv("./Data/Raw/MSA_male/shhs1-{}-profusion.csv".format(id_val))
  o2sa = f.readSignal(0)
  hr = f.readSignal(1)
  eeg = f.readSignal(2)
  emg = f.readSignal(4)
  eog_l = f.readSignal(5)
  eog_r = f.readSignal(6)
  # abdo = f.readSignal(10)
  airflow = f.readSignal(9)
  f.close()
  sf_eog = 50
  n = 2*sf_eog  #window size
  #for entire data
  eog_l_data = [eog_l[i:i+n].var() for i in range(0,len(eog_l),1*sf_eog)]
  sf_eog = 50
  n = 2*sf_eog  #window size
  #for entire data
  eog_r_data = [eog_r[i:i+n].var() for i in range(0,len(eog_r),1*sf_eog)]
  sf_emg = 125
  n = 2*sf_emg  #window size
  #for entire data
  emg_data = [emg[i:i+n].var() for i in range(0,len(emg),1*sf_emg)]
  o2sa_data = o2sa
  hr_data = hr
  sf_air = 10
  n = 2*sf_air  #window size
  #for entire data
  air_data = [airflow[i:i+n].mean() for i in range(0,len(airflow),1*sf_air)]
  sf = 125
  eeg_freq_bands = psd_eeg(eeg, sf)
  n = 2*sf
  pow_bands_eeg = pd.DataFrame([(abs(eeg_freq_bands[i:i+n])**2).mean() for i in range(0,len(eeg),1*sf)],columns = ['Raw_EEG','Delta', 'Theta', 'Alpha', 'Sigma', 'Beta', 'Gamma1', 'Gamma2'])
  full_data = pow_bands_eeg
  full_data['hr'] = hr_data
  full_data['sao2'] = o2sa_data
  full_data['emg']=emg_data
  full_data['eog_l']=eog_l_data
  full_data['eog_r']=eog_r_data
  full_data['airflow'] = air_data
  nor_data = (full_data-full_data.mean())/full_data.std()
  stages = sleep_stages['SleepStage'].repeat(30)
  full_data['sleep_stage'] = np.array(stages)
  nor_data = nor_data.fillna(0)
  nor_data['sleep_stage'] = np.array(stages)
  new = nor_data
  new = remove_consecutive_zeros(new)
  new.to_pickle('./Data/Raw/MSAHBP_male/{}_data.p'.format(id_val))
  return new


In [16]:
def check(id):
  file_name = "./Data/Raw/MSA_male/shhs1-{}.edf".format(id)
  patient_id = id
  f = pyedflib.EdfReader(file_name)
  signal_labels = f.getSignalLabels()
  print(id)
  print("List of Signals = ", signal_labels)
  # if  signal_labels[9] != 'AIRFLOW':
  #   print(id)
  #   print("List of Signals = ", signal_labels)

In [17]:
ids = get_ids('./Data/Raw/MSA_male')
# print(sorted(ids))
# ids = [202382, 202232]
for id in sorted(ids):
  check(id)

200078
List of Signals =  ['SaO2', 'H.R.', 'EEG(sec)', 'ECG', 'EMG', 'EOG(L)', 'EOG(R)', 'EEG', 'SOUND', 'AIRFLOW', 'THOR RES', 'ABDO RES', 'POSITION', 'LIGHT', 'AUX', 'OX stat']
200110
List of Signals =  ['SaO2', 'H.R.', 'EEG(sec)', 'ECG', 'EMG', 'EOG(L)', 'EOG(R)', 'EEG', 'SOUND', 'AIRFLOW', 'THOR RES', 'ABDO RES', 'POSITION', 'LIGHT', 'AUX', 'OX stat']
200427
List of Signals =  ['SaO2', 'H.R.', 'EEG(sec)', 'ECG', 'EMG', 'EOG(L)', 'EOG(R)', 'EEG', 'SOUND', 'AIRFLOW', 'THOR RES', 'ABDO RES', 'POSITION', 'LIGHT', 'NEW AIR', 'OX stat']
201211
List of Signals =  ['SaO2', 'H.R.', 'EEG(sec)', 'ECG', 'EMG', 'EOG(L)', 'EOG(R)', 'EEG', 'SOUND', 'AIRFLOW', 'THOR RES', 'ABDO RES', 'POSITION', 'LIGHT', 'AUX', 'OX stat']
201216
List of Signals =  ['SaO2', 'H.R.', 'EEG(sec)', 'ECG', 'EMG', 'EOG(L)', 'EOG(R)', 'EEG', 'SOUND', 'AIRFLOW', 'THOR RES', 'ABDO RES', 'POSITION', 'LIGHT', 'NEW AIR', 'OX stat']
201470
List of Signals =  ['SaO2', 'H.R.', 'EEG(sec)', 'ECG', 'EMG', 'EOG(L)', 'EOG(R)', 'EEG', '

In [18]:
ids = get_ids('./Data/Raw/MSA_male')
# ids = [202232]
for id in ids:
  df = readData(id)
  print(df)

List of Signals =  ['SaO2', 'H.R.', 'EEG(sec)', 'ECG', 'EMG', 'EOG(L)', 'EOG(R)', 'EEG', 'SOUND', 'AIRFLOW', 'THOR RES', 'ABDO RES', 'POSITION', 'LIGHT', 'NEW AIR', 'OX stat']
        Raw_EEG     Delta     Theta     Alpha     Sigma      Beta    Gamma1  \
0     -0.301954 -0.271603 -0.891675 -0.339410 -0.337312 -0.211922 -0.164515   
1     -0.179259 -0.148141 -0.981653 -0.284118 -0.121341  0.015832 -0.126507   
2     -0.223067 -0.199017 -0.646183 -0.258049 -0.139202  0.187076 -0.110729   
3     -0.264440 -0.261261 -0.017720 -0.127227 -0.484038  0.060375 -0.126762   
4     -0.178469 -0.167980 -0.228824 -0.308908 -0.449442 -0.093603 -0.149408   
...         ...       ...       ...       ...       ...       ...       ...   
25075 -0.156986 -0.160648  0.055736 -0.101252  0.415431  0.092139 -0.123427   
25076 -0.216165 -0.212060 -0.221132 -0.317751  0.363414  0.257177 -0.105765   
25077 -0.232092 -0.206358 -0.033663 -0.614089 -0.136806 -0.036920 -0.140616   
25078 -0.312521 -0.273641 -0.65600