# Import all libraries

In [35]:
from google.cloud import bigquery
from pandas_gbq import read_gbq
import pandas as pd
from pathlib import Path
from datetime import datetime, timezone

PROJECT = "windy-forge-475207-e3"
DATASET = "derived_AUMCdb"

In [36]:
pd.reset_option("display.max_rows")      # geen limiet op rijen
pd.reset_option("display.max_columns")   # geen limiet op kolommen
pd.reset_option("display.width")   

In [37]:
pd.set_option("display.max_rows", 100)     # geen limiet op rijen
pd.set_option("display.max_columns", 100)   # geen limiet op kolommen
#pd.set_option("display.width", None)   

In [38]:
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 200)

def run_sql(filename: str):
    """Run a SQL file and print completion time."""
    client = bigquery.Client(project=PROJECT, location="EU")
    sql = Path(filename).read_text(encoding="utf-8")
    job = client.query(sql)
    job.result()  # wait for completion
    print(f"{filename} - Ready at: {datetime.now(timezone.utc).astimezone().strftime('%Y-%m-%d %H:%M:%S %Z')}")

# 1. Variables 

In [39]:
client = bigquery.Client(project=PROJECT)

# Create derived_AUMCdb dataset (EU region)
dataset_id = f"{PROJECT}.{DATASET}"
dataset = bigquery.Dataset(dataset_id)
dataset.location = "EU"
client.create_dataset(dataset, exists_ok=True)
print(f"Dataset {DATASET} exists (EU region)")

# Create derived dataset for unified config (EU region)
unified_dataset_id = f"{PROJECT}.derived"
unified_dataset = bigquery.Dataset(unified_dataset_id)
unified_dataset.location = "EU"
client.create_dataset(unified_dataset, exists_ok=True)
print(f"Dataset derived exists (EU region)")

Dataset derived_AUMCdb exists (EU region)
Dataset derived exists (EU region)


## SQL0 - Config param standard

In [40]:
#run_sql("SQL0_cfgparams_AUMCdb.sql")

## SQL0 - Config param unified

In [41]:
run_sql("cfg_params_unified.sql")

cfg_params_unified.sql - Ready at: 2026-01-27 00:32:55 West-Europa (standaardtijd)


## SQL 1 - UTILS

In [42]:
run_sql("SQL1_utils_AUMCdb.sql")

SQL1_utils_AUMCdb.sql - Ready at: 2026-01-27 00:33:15 West-Europa (standaardtijd)


## SQL2 - Cohort index

In [43]:
run_sql("SQL2_cohort_index_AUMCdb.sql")

SQL2_cohort_index_AUMCdb.sql - Ready at: 2026-01-27 00:34:26 West-Europa (standaardtijd)


## SQL 3 - Grid

In [44]:
run_sql("SQL3_grid_AUMCdb.sql")

SQL3_grid_AUMCdb.sql - Ready at: 2026-01-27 00:34:31 West-Europa (standaardtijd)


## SQL 4 - Static variables

In [45]:
run_sql("SQL4_Static_variables_AUMCdb.sql")

SQL4_Static_variables_AUMCdb.sql - Ready at: 2026-01-27 00:34:36 West-Europa (standaardtijd)


## SQL 5 - Varying variables

In [46]:
run_sql("SQL5_Varying_variables_AUMCdb.sql")

SQL5_Varying_variables_AUMCdb.sql - Ready at: 2026-01-27 00:34:47 West-Europa (standaardtijd)


## SQL 5.1 - Varying variables censored

In [47]:
run_sql("SQL5_1_Varying_variables_censored_AUMCdb.sql")

SQL5_1_Varying_variables_censored_AUMCdb.sql - Ready at: 2026-01-27 00:34:56 West-Europa (standaardtijd)


## SQL 6 - Last variables

In [48]:
run_sql("SQL6_Last_variables_AUMCdb.sql")

SQL6_Last_variables_AUMCdb.sql - Ready at: 2026-01-27 00:35:10 West-Europa (standaardtijd)


## SQL 7 - Urine output 

In [49]:
run_sql("SQL7_Urine_output_AUMCdb.sql")


SQL7_Urine_output_AUMCdb.sql - Ready at: 2026-01-27 00:35:20 West-Europa (standaardtijd)


## SQL 8 - Renal trends

In [50]:
run_sql("SQL8_Renal_trends_AUMCdb.sql")

SQL8_Renal_trends_AUMCdb.sql - Ready at: 2026-01-27 00:35:26 West-Europa (standaardtijd)


## SQL 9 - Vasopressors

In [51]:
run_sql("SQL9_Vasopressors_AUMCdb.sql")

SQL9_Vasopressors_AUMCdb.sql - Ready at: 2026-01-27 00:35:32 West-Europa (standaardtijd)


## SQL 10 - Mechanical ventilation

In [52]:
run_sql("SQL10_Mechanical_ventilation_AUMCdb.sql")

SQL10_Mechanical_ventilation_AUMCdb.sql - Ready at: 2026-01-27 00:35:38 West-Europa (standaardtijd)


## SQL 11 - Fluid balance

In [53]:
run_sql("SQL11_Fluid_balance_AUMCdb.sql")

SQL11_Fluid_balance_AUMCdb.sql - Ready at: 2026-01-27 00:35:44 West-Europa (standaardtijd)


## SQL 12 - GCS

In [54]:
run_sql("SQL12_GCS_AUMCdb.sql")

SQL12_GCS_AUMCdb.sql - Ready at: 2026-01-27 00:35:50 West-Europa (standaardtijd)


## SQL 13 - Actions

In [55]:
# SQL13 Actions is commented out
# run_sql("SQL13_Actions_AUMCdb.sql")

## SQL 14 - Sofa cardio

In [56]:
run_sql("SQL14_Sofa_cardio_AUMCdb.sql")

SQL14_Sofa_cardio_AUMCdb.sql - Ready at: 2026-01-27 00:35:58 West-Europa (standaardtijd)


## SQL 14 - Sofa coag

In [57]:
run_sql("SQL14_Sofa_coag_AUMCdb.sql")

SQL14_Sofa_coag_AUMCdb.sql - Ready at: 2026-01-27 00:36:04 West-Europa (standaardtijd)


## SQL 14 - Sofa liver

In [58]:
run_sql("SQL14_Sofa_liver_AUMCdb.sql")

SQL14_Sofa_liver_AUMCdb.sql - Ready at: 2026-01-27 00:36:09 West-Europa (standaardtijd)


## SQL 14 - Sofa neuro

In [59]:
run_sql("SQL14_Sofa_neuro_AUMCdb.sql")

SQL14_Sofa_neuro_AUMCdb.sql - Ready at: 2026-01-27 00:36:16 West-Europa (standaardtijd)


## SQL 14 - Sofa renal

In [60]:
run_sql("SQL14_Sofa_renal_AUMCdb.sql")

SQL14_Sofa_renal_AUMCdb.sql - Ready at: 2026-01-27 00:36:24 West-Europa (standaardtijd)


## SQL 14 - Sofa resp

In [61]:
run_sql("SQL14_Sofa_resp_AUMCdb.sql")

SQL14_Sofa_resp_AUMCdb.sql - Ready at: 2026-01-27 00:36:37 West-Europa (standaardtijd)


## SQL 15 - Final dataframe

In [62]:
run_sql("SQL15_Final_dataframe_AUMCdb.sql")

SQL15_Final_dataframe_AUMCdb.sql - Ready at: 2026-01-27 00:36:46 West-Europa (standaardtijd)


## Extraction

In [63]:
# Download final dataset from BigQuery
query = f"""
SELECT * 
FROM `{PROJECT}.derived.grid_master_all_features`
"""

df = pd.read_gbq(query, project_id=PROJECT)

# Save locally as parquet
output_path = Path(r"C:\Users\karel\Desktop\data\Thesis\Data\AUMCdb_start\Full_dataset\aumc_rrt_raw.parquet")
output_path.parent.mkdir(parents=True, exist_ok=True)
df.to_parquet(output_path)

print(f"Dataset saved: {output_path}")
print(f"Shape: {df.shape}")

  df = pd.read_gbq(query, project_id=PROJECT)


Dataset saved: C:\Users\karel\Desktop\data\Thesis\Data\AUMCdb_start\Full_dataset\aumc_rrt_raw.parquet
Shape: (125798, 132)


In [64]:
print(f"Total rows: {len(df):,}")
print(f"Total columns: {len(df.columns)}")
print(f"\nUnique stays: {df['visit_occurrence_id'].nunique():,}")
print(f"Unique subjects: {df['person_id'].nunique():,}")
print(f"\nTerminal events:")
print(df['terminal_event'].value_counts())

Total rows: 125,798
Total columns: 132

Unique stays: 7,514
Unique subjects: 6,835

Terminal events:
terminal_event
discharge    5805
death         955
rrt_start     754
Name: count, dtype: int64
