In [1]:
from pathlib import Path
import wget
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
from joblib import Parallel, delayed

In [2]:
def download_measurement(measurement_path):
    try:
        directory, subject, recording = measurement_path.split("/")
        path = Path(f"mimic_db/{directory}/{subject}")
        path.mkdir(parents=True, exist_ok=True)
        url_dat = f'https://physionet.org/files/mimic3wdb-matched/1.0/{directory}/{subject}/{recording}.dat'
        url_hea = f'https://physionet.org/files/mimic3wdb-matched/1.0/{directory}/{subject}/{recording}.hea'
        wget.download(url_dat, out=str(path))
        wget.download(url_hea, out=str(path))
        with open("downloaded_measurements.txt", "a") as file:
            file.write(f"{measurement_path}\n")
        return True
    except Exception:
        print(f"   {measurement_path} failed")
        return False
        
def download_measurements(measurements, n_jobs=-1):  
    results = Parallel(n_jobs=n_jobs)(delayed(download_measurement)(path) for path in tqdm(measurements, desc="Downloading measurements"))
    return results

In [18]:
measurements_df = pd.DataFrame()
n_records = 250

for start in np.arange(0, 10000, n_records):
    end = start + n_records
    try:
        df = pd.read_csv(f"mimic_recordings/{start}-{end}.csv")
        measurements_df = pd.concat([measurements_df, df])
    except:
        print(f'No file for {start}-{end}')
        
good_measurements = measurements_df.query("ABP == True and PLETH == True and II == True")['path'].values

with open('downloaded_measurements.txt') as f:
    downloaded_measurements = f.readlines()
    downloaded_measurements = [path.replace("\n", "") for path in downloaded_measurements]

measurements_to_download = [path for path in good_measurements if path not in downloaded_measurements]

No file for 8000-8250
No file for 8250-8500
No file for 8500-8750
No file for 8750-9000
No file for 9000-9250
No file for 9250-9500
No file for 9500-9750
No file for 9750-10000


In [19]:
len(downloaded_measurements)

143914

In [20]:
measurements_to_download.__len__()

36

In [21]:
download_measurements(measurements_to_download, n_jobs=-1)

Downloading measurements:   0%|          | 0/36 [00:00<?, ?it/s]

[True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True]