# *Pre-processing and Feature Extraction*

### Import Utility Methods

In [2]:
import os
import re
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def find_files(url, headers):
    # Access the directory URL
    response = requests.get(url, auth=(headers['user'], headers['passwd']))
    soup = BeautifulSoup(response.text, features="html.parser")
    
    # Separate files and directories
    hrefs_files = []
    hrefs_dirs = []
    
    for link in soup.find_all('a'):
        href = link.get('href')
        if href and not href.startswith('.'):
            if href.endswith('/'):
                hrefs_dirs.append(href.strip('/'))
            else:
                hrefs_files.append(href)
    return hrefs_files, hrefs_dirs

def download_file(download_file_url, file_path, headers, output=False):
    if output:
        print('Downloading:', download_file_url)
    r = requests.get(download_file_url, auth=(headers['user'], headers['passwd']))
    with open(file_path, 'wb') as f:
        f.write(r.content)

def download_TUH(DOWNLOAD_DIR, headers, sub_dir='', output=False):
    # Base URL for the dataset
    base_url = 'https://isip.piconepress.com/projects/nedc/data/tuh_eeg/tuh_eeg_seizure/v2.0.3/edf/'
    dir_url = urljoin(base_url, sub_dir)
    
    # Clean up export_dir path for local storage
    export_dir = os.path.join(DOWNLOAD_DIR, re.sub(r'.*edf/', '', sub_dir))
    
    if not os.path.exists(export_dir):
        os.makedirs(export_dir, exist_ok=True)

    # Get lists of files and directories
    files, dirs = find_files(dir_url, headers)
    
    # Download all files in the current directory
    for file in files:
        if re.search(r'\.xlsx$|\.edf$|\.txt$|\.tse(?!_)', file):
            file_path = os.path.join(export_dir, file)
            if not os.path.exists(file_path):
                download_file(urljoin(dir_url, file), file_path, headers, output)

    # Recursively process each subdirectory
    for subfolder in dirs:
        next_sub_dir = os.path.join(sub_dir, subfolder)
        download_TUH(DOWNLOAD_DIR, headers, next_sub_dir, output)


In [3]:
from getpass import getpass
import os
import sys
import os
from bs4 import BeautifulSoup
import requests
import re
import wget
import zipfile


DOWNLOAD_DIR = os.path.expanduser('tuh_data')  # Set a local path

if not os.path.exists(DOWNLOAD_DIR):
  os.makedirs(DOWNLOAD_DIR)

user = "nedc-tuh-eeg"
key = "RLYF8ZhBMZwNnsYA8FsP"

auth_dict = {'user': user, 'passwd': key}

download_TUH(DOWNLOAD_DIR, auth_dict, '', output=True)


Downloading: https://isip.piconepress.com/projects/nedc/data/tuh_eeg/tuh_eeg_seizure/v2.0.3/edf/dev/aaaaadkb/s009_2012/aaaaadkb_s009_t000.edf
Downloading: https://isip.piconepress.com/projects/nedc/data/tuh_eeg/tuh_eeg_seizure/v2.0.3/edf/dev/aaaaadkb/s009_2012/aaaaadkb_s009_t002.edf
Downloading: https://isip.piconepress.com/projects/nedc/data/tuh_eeg/tuh_eeg_seizure/v2.0.3/edf/dev/aaaaadkb/s009_2012/aaaaadkb_s009_t003.edf
Downloading: https://isip.piconepress.com/projects/nedc/data/tuh_eeg/tuh_eeg_seizure/v2.0.3/edf/dev/aaaaadkb/s010_2016/aaaaadkb_s010_t000.edf
Downloading: https://isip.piconepress.com/projects/nedc/data/tuh_eeg/tuh_eeg_seizure/v2.0.3/edf/dev/aaaaadkb/s010_2016/aaaaadkb_s010_t001.edf
Downloading: https://isip.piconepress.com/projects/nedc/data/tuh_eeg/tuh_eeg_seizure/v2.0.3/edf/dev/aaaaadkj/s001_2005/aaaaadkj_s001_t000.edf
Downloading: https://isip.piconepress.com/projects/nedc/data/tuh_eeg/tuh_eeg_seizure/v2.0.3/edf/dev/aaaaadkj/s002_2007/aaaaadkj_s002_t000.edf
Downlo

### *Process and Export Feature Dump*