This repository is built using a dockerized template repo for accessing UKE GPU servers.
- src/ contains the source code of the project
- run_process.sh contains the shell script for running the ETL
curl https://sdk.cloud.google.com | bash
export PATH="$PATH:~/google-cloud-sdk/bin"
gcloud auth logincd src/MIMIC/
cd vocabulary_refresh
python3 vocabulary_refresh.py -s10
python3 vocabulary_refresh.py -s20
python3 vocabulary_refresh.py -s30
cd ../
python3 scripts/run_workflow.py -e conf/full.etlconf -c conf/workflow_ddl.conf
python3 scripts/run_workflow.py -e conf/full.etlconf -c conf/workflow_staging.conf
python3 scripts/run_workflow.py -e conf/full.etlconf -c conf/workflow_etl.conf
python3 scripts/run_workflow.py -e conf/full.etlconf -c conf/workflow_ut.conf
python3 scripts/run_workflow.py -e conf/full.etlconf -c conf/workflow_metrics.conf
python3 scripts/run_workflow.py -e conf/full.etlconf -c conf/workflow_unload.confcreate script file
nano load_csv_files.shpaste this code in the script file
#!/bin/bash
# set Google Cloud project and dataset
PROJECT_ID="booming-edge-403620"
DATASET="mimiciv_hosp"
# loop through all CSV files in the current directory
for file in *.csv.gz; do
# extract the table name from the filename
table_name=$(basename "$file" .csv.gz)
# create the table in BigQuery and upload the data
bq load --autodetect --source_format=CSV "${PROJECT_ID}:${DATASET}.${table_name}" "$file"
donegive the correct permissions to the script
chmod +x load_csv_files.shthen run the script
sudo apt-get update
sudo apt-get install tmuxhttps://physionet.org/content/mimic4wdb/0.1.0/waves/#files-panel
wget -r -N -c -np https://physionet.org/files/mimic4wdb/0.1.0/ > wget.log 2>&1 &
gsutil cp physionet.org/* gs://shubov-athena/waveformsbq ls booming-edge-403620:mimiciv_full_current_cdm
python extract-bq-to-bucket.py
gsutil -m cp -r gs://shubov_mimic-iv/* /data/mimic-iv-2.2/shubov_thesis/OMOP