In [63]:

from sklearn.linear_model import LogisticRegression # binary classification
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [64]:
# Access data using Google BigQuery.
from google.colab import auth
from google.cloud import bigquery

auth.authenticate_user()

project_id='icu-readmissions-ai-project'
client = bigquery.Client(project=project_id)

# MIMIC-IV v3.1 dataset information
MIMIC_PROJECT = "physionet-data"
MIMIC_DATASET_HOSP = "mimiciv_3_1_hosp"
MIMIC_DATASET_ICU = "mimiciv_3_1_icu"

In [72]:
# Query all data needed from MIMIC-IV
base_query = f"""
WITH icu_stays AS (
    SELECT
        icus.subject_id,
        icus.hadm_id,
        icus.stay_id,
        icus.intime,
        icus.outtime,
        icus.los as icu_los_days,
        icus.first_careunit,
        icus.last_careunit,

        LEAD(intime) OVER (PARTITION BY subject_id ORDER BY intime) as next_icu_intime,
        LEAD(stay_id) OVER (PARTITION BY subject_id ORDER BY intime) as next_stay_id

    FROM `{MIMIC_PROJECT}.{MIMIC_DATASET_ICU}.icustays` icus
)
"""


In [76]:
# Turn this data into 30 Day ICU Readmissions
readmission_query = base_query + """
 , readmissions AS (
    SELECT
        *,
        DATETIME_DIFF(next_icu_intime, outtime, DAY) as days_to_readmission,
        CASE
            WHEN DATETIME_DIFF(next_icu_intime, outtime, DAY) <= 30
            AND DATETIME_DIFF(next_icu_intime, outtime, DAY) >= 0
            THEN 1
            ELSE 0
        END as readmitted_30day
    FROM icu_stays
)
SELECT *
FROM readmissions
ORDER BY subject_id, intime
"""

In [80]:
df_readmissions = client.query(readmission_query).to_dataframe()