In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS mimic

In [0]:
admissions_df = spark.read.option("header", "true").option("inferSchema", "true").csv("dbfs:/mnt/mimic/ADMISSIONS.csv")
patients_df = spark.read.option("header", "true").option("inferSchema", "true").csv("dbfs:/mnt/mimic/PATIENTS.csv")
icustays_df = spark.read.option("header", "true").option("inferSchema", "true").csv("dbfs:/mnt/mimic/ICUSTAYS.csv")

In [0]:
admissions_df.write.format("delta").mode("overwrite").saveAsTable("mimic.admissions")
patients_df.write.format("delta").mode("overwrite").saveAsTable("mimic.patients")
icustays_df.write.format("delta").mode("overwrite").saveAsTable("mimic.icustays")

In [0]:
%sql
WITH patient_journey AS (
    SELECT 
        a.admission_type,
        i.first_careunit,
        i.last_careunit,
        a.discharge_location,
        COUNT(*) as patient_count,
        AVG(i.los) as avg_icu_los
    FROM mimicdata_2567611159492892.mimic.admissions a
    LEFT JOIN mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
    WHERE a.admission_type IS NOT NULL 
      AND a.discharge_location IS NOT NULL
    GROUP BY a.admission_type, i.first_careunit, i.last_careunit, a.discharge_location
)
SELECT 
    admission_type,
    COALESCE(first_careunit, 'No ICU') as first_icu,
    COALESCE(last_careunit, 'No ICU') as last_icu,
    discharge_location,
    patient_count,
    ROUND(avg_icu_los, 2) as avg_icu_los,
    ROUND(100.0 * patient_count / SUM(patient_count) OVER(), 2) as percentage
FROM patient_journey
ORDER BY patient_count DESC;

admission_type,first_icu,last_icu,discharge_location,patient_count,avg_icu_los,percentage
EMERGENCY,MICU,MICU,SNF,24,2.68,17.65
EMERGENCY,MICU,MICU,DEAD/EXPIRED,22,5.63,16.18
EMERGENCY,MICU,MICU,HOME,10,1.62,7.35
EMERGENCY,SICU,SICU,DEAD/EXPIRED,6,12.34,4.41
EMERGENCY,SICU,SICU,SNF,6,2.58,4.41
EMERGENCY,CCU,CCU,DEAD/EXPIRED,6,5.78,4.41
EMERGENCY,TSICU,TSICU,DEAD/EXPIRED,6,1.44,4.41
EMERGENCY,MICU,MICU,REHAB/DISTINCT PART HOSP,5,10.16,3.68
EMERGENCY,CCU,CCU,SNF,4,2.5,2.94
EMERGENCY,SICU,SICU,HOME HEALTH CARE,3,2.34,2.21


Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH age_icu_mortality AS (
    SELECT 
        CASE 
            WHEN EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) < 30 THEN '18-29'
            WHEN EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) < 50 THEN '30-49'
            WHEN EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) < 70 THEN '50-69'
            WHEN EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) < 85 THEN '70-84'
            ELSE '85+'
        END as age_group,
        CASE 
            WHEN i.los IS NULL THEN 'No ICU'
            WHEN i.los < 1 THEN '< 1 day'
            WHEN i.los < 3 THEN '1-3 days'
            WHEN i.los < 7 THEN '3-7 days'
            WHEN i.los < 14 THEN '1-2 weeks'
            ELSE '2+ weeks'
        END as icu_los_category,
        COUNT(*) as total_patients,
        SUM(CASE WHEN a.hospital_expire_flag = 1 THEN 1 ELSE 0 END) as deaths,
        ROUND(100.0 * SUM(CASE WHEN a.hospital_expire_flag = 1 THEN 1 ELSE 0 END) / COUNT(*), 2) as mortality_rate
    FROM mimicdata_2567611159492892.mimic.patients p
    JOIN mimicdata_2567611159492892.mimic.admissions a ON p.subject_id = a.subject_id
    LEFT JOIN mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
    GROUP BY age_group, icu_los_category
)
SELECT * FROM age_icu_mortality
WHERE total_patients >= 5  
ORDER BY age_group, icu_los_category;

age_group,icu_los_category,total_patients,deaths,mortality_rate
30-49,2+ weeks,5,3,60.0
50-69,1-2 weeks,6,2,33.33
50-69,1-3 days,22,1,4.55
50-69,3-7 days,7,3,42.86
50-69,< 1 day,5,1,20.0
70-84,1-2 weeks,6,2,33.33
70-84,1-3 days,28,8,28.57
70-84,3-7 days,8,2,25.0
70-84,< 1 day,6,2,33.33
85+,1-3 days,14,7,50.0


Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH readmissions AS (
    SELECT 
        p.subject_id,
        p.gender,
        EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) as age_at_admission,
        a.hadm_id,
        a.admittime,
        a.dischtime,
        a.hospital_expire_flag,
        LAG(a.dischtime) OVER (PARTITION BY p.subject_id ORDER BY a.admittime) as prev_discharge,
        LEAD(a.admittime) OVER (PARTITION BY p.subject_id ORDER BY a.admittime) as next_admission
    FROM mimicdata_2567611159492892.mimic.patients p
    JOIN mimicdata_2567611159492892.mimic.admissions a ON p.subject_id = a.subject_id
),
readmission_analysis AS (
    SELECT 
        subject_id,
        gender,
        CASE 
            WHEN age_at_admission < 30 THEN '18-29'
            WHEN age_at_admission < 50 THEN '30-49'
            WHEN age_at_admission < 70 THEN '50-69'
            ELSE '70+'
        END as age_group,
        hadm_id,
        admittime,
        CASE 
            WHEN next_admission IS NOT NULL 
             AND next_admission <= dischtime + INTERVAL '30 days' THEN 1 
            ELSE 0 
        END as readmitted_30d,
        CASE 
            WHEN next_admission IS NOT NULL 
             AND next_admission <= dischtime + INTERVAL '90 days' THEN 1 
            ELSE 0 
        END as readmitted_90d
    FROM readmissions
    WHERE hospital_expire_flag = 0  
)
SELECT 
    age_group,
    gender,
    COUNT(*) as total_discharges,
    SUM(readmitted_30d) as readmissions_30d,
    SUM(readmitted_90d) as readmissions_90d,
    ROUND(100.0 * SUM(readmitted_30d) / COUNT(*), 2) as readmission_rate_30d,
    ROUND(100.0 * SUM(readmitted_90d) / COUNT(*), 2) as readmission_rate_90d
FROM readmission_analysis
GROUP BY age_group, gender
ORDER BY age_group, gender;


age_group,gender,total_discharges,readmissions_30d,readmissions_90d,readmission_rate_30d,readmission_rate_90d
18-29,F,1,0,0,0.0,0.0
18-29,M,1,0,0,0.0,0.0
30-49,F,1,0,0,0.0,0.0
30-49,M,4,0,3,0.0,75.0
50-69,F,7,0,0,0.0,0.0
50-69,M,26,6,14,23.08,53.85
70+,F,29,2,2,6.9,6.9
70+,M,20,3,3,15.0,15.0


Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH care_escalation AS (
    SELECT 
        a.admission_type,
        EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) as age_at_admission,
        p.gender,
        CASE WHEN i.icustay_id IS NOT NULL THEN 1 ELSE 0 END as required_icu,
        a.hospital_expire_flag,
        CASE 
            WHEN i.intime IS NOT NULL 
            THEN (UNIX_TIMESTAMP(i.intime) - UNIX_TIMESTAMP(a.admittime))/3600 
            ELSE NULL 
        END as hours_to_icu
    FROM mimicdata_2567611159492892.mimic.patients p
    JOIN mimicdata_2567611159492892.mimic.admissions a ON p.subject_id = a.subject_id
    LEFT JOIN mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
    
) SELECT 
    admission_type,
    CASE 
        WHEN age_at_admission < 30 THEN '18-29'
        WHEN age_at_admission < 50 THEN '30-49'
        WHEN age_at_admission < 70 THEN '50-69'
        ELSE '70+'
    END as age_group,
    gender,
    COUNT(*) as total_admissions,
    SUM(required_icu) as icu_transfers,
    ROUND(100.0 * SUM(required_icu) / COUNT(*), 2) as icu_transfer_rate,
    ROUND(AVG(hours_to_icu), 2) as avg_hours_to_icu,
    ROUND(100.0 * SUM(hospital_expire_flag) / COUNT(*), 2) as mortality_rate
FROM care_escalation
GROUP BY admission_type, age_group, gender
ORDER BY icu_transfer_rate DESC;


admission_type,age_group,gender,total_admissions,icu_transfers,icu_transfer_rate,avg_hours_to_icu,mortality_rate
URGENT,70+,F,2,2,100.0,0.03,50.0
EMERGENCY,30-49,M,7,7,100.0,24.8,42.86
EMERGENCY,50-69,F,11,11,100.0,36.89,36.36
ELECTIVE,50-69,M,2,2,100.0,9.41,0.0
EMERGENCY,50-69,M,28,28,100.0,7.83,14.29
ELECTIVE,70+,M,1,1,100.0,21.57,0.0
EMERGENCY,18-29,F,2,2,100.0,0.02,50.0
EMERGENCY,70+,M,32,32,100.0,21.64,37.5
EMERGENCY,70+,F,39,39,100.0,39.99,41.03
ELECTIVE,70+,F,5,5,100.0,15.36,0.0


Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH los_analysis AS (
    SELECT 
        p.subject_id,
        p.gender,
        EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) as age,
        a.admission_type,
        DATEDIFF(DAY, a.admittime, a.dischtime) as hospital_los,
        i.los as icu_los,
        a.hospital_expire_flag,
        CASE WHEN i.icustay_id IS NOT NULL THEN 1 ELSE 0 END as had_icu_stay
    FROM  mimicdata_2567611159492892.mimic.patients p
    JOIN  mimicdata_2567611159492892.mimic.admissions a ON p.subject_id = a.subject_id
    LEFT JOIN  mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
)
SELECT 
    admission_type,
    CASE 
        WHEN age < 30 THEN '18-29'
        WHEN age < 50 THEN '30-49'
        WHEN age < 70 THEN '50-69'
        ELSE '70+'
    END as age_group,
    gender,
    COUNT(*) as total_patients,
    ROUND(AVG(hospital_los), 2) as avg_hospital_los,
    ROUND(AVG(CASE WHEN had_icu_stay = 1 THEN icu_los END), 2) as avg_icu_los,
    ROUND(100.0 * SUM(had_icu_stay) / COUNT(*), 2) as icu_utilization_rate,
    ROUND(100.0 * SUM(hospital_expire_flag) / COUNT(*), 2) as mortality_rate,
    ROUND(AVG(CASE WHEN had_icu_stay = 1 THEN hospital_los END), 2) as avg_hospital_los_with_icu
FROM los_analysis
GROUP BY admission_type, age_group, gender
HAVING COUNT(*) >= 5
ORDER BY avg_hospital_los DESC;

admission_type,age_group,gender,total_patients,avg_hospital_los,avg_icu_los,icu_utilization_rate,mortality_rate,avg_hospital_los_with_icu
EMERGENCY,30-49,M,7,25.57,8.98,100.0,42.86,25.57
EMERGENCY,50-69,F,11,12.0,4.1,100.0,36.36,12.0
EMERGENCY,70+,F,39,8.77,4.13,100.0,41.03,8.77
ELECTIVE,70+,F,5,8.2,2.74,100.0,0.0,8.2
EMERGENCY,70+,M,32,6.22,2.59,100.0,37.5,6.22
EMERGENCY,50-69,M,28,5.93,3.45,100.0,14.29,5.93


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH risk_factors AS (
    SELECT 
        a.hadm_id,
        EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) as age,
        CASE WHEN p.gender = 'M' THEN 1 ELSE 0 END as is_male,
        CASE WHEN a.admission_type = 'EMERGENCY' THEN 1 ELSE 0 END as is_emergency,
        CASE WHEN i.icustay_id IS NOT NULL THEN 1 ELSE 0 END as required_icu,
        COALESCE(i.los, 0) as icu_los,
        (UNIX_TIMESTAMP(a.dischtime) - UNIX_TIMESTAMP(a.admittime))/86400 as hospital_los,
        a.hospital_expire_flag as died_in_hospital
    FROM mimicdata_2567611159492892.mimic.patients p
    JOIN mimicdata_2567611159492892.mimic.admissions a ON p.subject_id = a.subject_id
    LEFT JOIN mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
),
risk_scores AS (
    SELECT 
        *,
        -- risk score calculation
        (age * 0.02) + 
        (is_male * 0.1) + 
        (is_emergency * 0.3) + 
        (required_icu * 0.5) + 
        (icu_los * 0.1) as risk_score
    FROM risk_factors
)
SELECT 
    CASE 
        WHEN risk_score < 0.5 THEN 'Low Risk'
        WHEN risk_score < 1.0 THEN 'Medium Risk'
        WHEN risk_score < 2.0 THEN 'High Risk'
        ELSE 'Very High Risk'
    END as risk_category,
    COUNT(*) as total_patients,
    SUM(died_in_hospital) as deaths,
    ROUND(100.0 * SUM(died_in_hospital) / COUNT(*), 2) as mortality_rate,
    ROUND(AVG(age), 1) as avg_age,
    ROUND(AVG(hospital_los), 2) as avg_hospital_los,
    ROUND(100.0 * SUM(required_icu) / COUNT(*), 2) as icu_utilization_rate
FROM risk_scores
GROUP BY risk_category
ORDER BY 
    CASE risk_category 
        WHEN 'Low Risk' THEN 1
        WHEN 'Medium Risk' THEN 2
        WHEN 'High Risk' THEN 3
        WHEN 'Very High Risk' THEN 4
    END;

risk_category,total_patients,deaths,mortality_rate,avg_age,avg_hospital_los,icu_utilization_rate
High Risk,7,3,42.86,36.9,3.67,100.0
Very High Risk,129,43,33.33,87.3,10.08,100.0


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH unit_transfers AS (
    SELECT 
        i.icustay_id,
        i.first_careunit,
        i.last_careunit,
        CASE WHEN i.first_careunit != i.last_careunit THEN 1 ELSE 0 END as transferred,
        i.los,
        a.hospital_expire_flag,
        EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) as age
    FROM mimicdata_2567611159492892.mimic.icustays i
    JOIN mimicdata_2567611159492892.mimic.admissions a ON i.hadm_id = a.hadm_id
    JOIN mimicdata_2567611159492892.mimic.patients p ON a.subject_id = p.subject_id
)
SELECT 
    first_careunit,
    last_careunit,
    COUNT(*) as transfer_count,
    ROUND(AVG(los), 2) as avg_icu_los,
    ROUND(100.0 * SUM(hospital_expire_flag) / COUNT(*), 2) as mortality_rate,
    ROUND(AVG(age), 1) as avg_age,
    ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER(), 2) as percentage_of_transfers
FROM unit_transfers
WHERE transferred = 1
GROUP BY first_careunit, last_careunit
HAVING COUNT(*) >= 3
ORDER BY transfer_count DESC;

first_careunit,last_careunit,transfer_count,avg_icu_los,mortality_rate,avg_age,percentage_of_transfers
MICU,SICU,3,6.58,66.67,52.7,100.0


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH icu_timing AS (
    SELECT 
        a.admission_type,
        EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) as age,
        p.gender,
        (unix_timestamp(i.intime) - unix_timestamp(a.admittime)) / 3600 as hours_to_icu,
        i.los as icu_los,
        a.hospital_expire_flag
    FROM mimicdata_2567611159492892.mimic.patients p
    JOIN mimicdata_2567611159492892.mimic.admissions a ON p.subject_id = a.subject_id
    JOIN mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
    WHERE i.intime >= a.admittime  -- Valid transfer times only
)
SELECT 
    admission_type,
    CASE 
        WHEN age < 30 THEN '18-29'
        WHEN age < 50 THEN '30-49'
        WHEN age < 70 THEN '50-69'
        ELSE '70+'
    END as age_group,
    COUNT(*) as icu_transfers,
    ROUND(AVG(hours_to_icu), 2) as avg_hours_to_icu,
    ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY hours_to_icu), 2) as median_hours_to_icu,
    ROUND(AVG(icu_los), 2) as avg_icu_los,
    ROUND(100.0 * SUM(hospital_expire_flag) / COUNT(*), 2) as mortality_rate,
    COUNT(CASE WHEN hours_to_icu <= 6 THEN 1 END) as transfers_within_6h,
    ROUND(100.0 * COUNT(CASE WHEN hours_to_icu <= 6 THEN 1 END) / COUNT(*), 2) as pct_within_6h
FROM icu_timing
GROUP BY admission_type, age_group
HAVING COUNT(*) >= 5
ORDER BY avg_hours_to_icu;

admission_type,age_group,icu_transfers,avg_hours_to_icu,median_hours_to_icu,avg_icu_los,mortality_rate,transfers_within_6h,pct_within_6h
EMERGENCY,18-29,5,0.02,0.02,5.41,60.0,5,100.0
EMERGENCY,50-69,39,16.02,0.03,3.63,20.51,32,82.05
ELECTIVE,70+,5,20.02,14.56,2.75,0.0,2,40.0
EMERGENCY,30-49,11,26.03,0.03,14.39,54.55,7,63.64
EMERGENCY,70+,71,31.72,0.03,3.44,39.44,49,69.01


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH daily_census AS (
    SELECT 
        calendar_date,
        SUM(CASE WHEN calendar_date BETWEEN DATE(a.admittime) AND DATE(a.dischtime) THEN 1 ELSE 0 END) as hospital_census,
        SUM(CASE WHEN calendar_date BETWEEN DATE(i.intime) AND DATE(i.outtime) THEN 1 ELSE 0 END) as icu_census
    FROM (
        SELECT DISTINCT DATE(admittime) as calendar_date FROM mimicdata_2567611159492892.mimic.admissions
        UNION
        SELECT DISTINCT DATE(dischtime) FROM mimicdata_2567611159492892.mimic.admissions
        UNION
        SELECT DISTINCT DATE(intime) FROM mimicdata_2567611159492892.mimic.icustays
        UNION
        SELECT DISTINCT DATE(outtime) FROM mimicdata_2567611159492892.mimic.icustays
    ) dates
    CROSS JOIN mimicdata_2567611159492892.mimic.admissions a
    LEFT JOIN mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
    GROUP BY calendar_date
)
SELECT 
    calendar_date,
    hospital_census,
    icu_census,
    ROUND(100.0 * icu_census / NULLIF(hospital_census, 0), 2) as icu_occupancy_rate,
    hospital_census - icu_census as general_ward_census
FROM daily_census
WHERE calendar_date IS NOT NULL
ORDER BY calendar_date;

calendar_date,hospital_census,icu_census,icu_occupancy_rate,general_ward_census
2102-08-29,1,0,0.0,1
2102-08-31,1,1,100.0,0
2102-09-01,1,1,100.0,0
2102-09-06,1,0,0.0,1
2104-09-24,1,1,100.0,0
2104-09-26,1,1,100.0,0
2104-09-30,1,0,0.0,1
2104-10-24,1,1,100.0,0
2104-10-25,1,1,100.0,0
2104-11-01,1,0,0.0,1


In [0]:
%sql
WITH seasonal_patterns AS (
    SELECT 
        EXTRACT(MONTH FROM a.admittime) as admission_month,
        EXTRACT(YEAR FROM a.admittime) as admission_year,
        a.admission_type,
        COUNT(*) as admissions,
        SUM(CASE WHEN i.icustay_id IS NOT NULL THEN 1 ELSE 0 END) as icu_transfers,
        SUM(a.hospital_expire_flag) as deaths,
        AVG(DATEDIFF(CAST(a.dischtime AS timestamp), CAST(a.admittime AS timestamp))) as avg_los
    FROM mimicdata_2567611159492892.mimic.admissions a
    LEFT JOIN mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
    GROUP BY admission_month, admission_year, admission_type
)
SELECT 
    admission_month,
    CASE 
        WHEN admission_month = 12 OR admission_month <= 2 THEN 'Winter'
        WHEN admission_month <= 5 THEN 'Spring'
        WHEN admission_month <= 8 THEN 'Summer'
        ELSE 'Fall'
    END as season,
    admission_type,
    AVG(admissions) as avg_monthly_admissions,
    ROUND(AVG(100.0 * icu_transfers / admissions), 2) as avg_icu_transfer_rate,
    ROUND(AVG(100.0 * deaths / admissions), 2) as avg_mortality_rate,
    ROUND(AVG(avg_los), 2) as avg_length_of_stay
FROM seasonal_patterns
GROUP BY admission_month, season, admission_type
ORDER BY admission_month, admission_type;

admission_month,season,admission_type,avg_monthly_admissions,avg_icu_transfer_rate,avg_mortality_rate,avg_length_of_stay
1,Winter,EMERGENCY,1.4285714285714286,100.0,28.57,9.6
1,Winter,URGENT,1.0,100.0,100.0,6.0
2,Winter,EMERGENCY,1.1,100.0,40.0,9.05
3,Spring,ELECTIVE,1.0,100.0,0.0,4.0
3,Spring,EMERGENCY,1.0,100.0,66.67,5.78
4,Spring,EMERGENCY,1.5,100.0,25.0,15.0
5,Spring,ELECTIVE,1.0,100.0,0.0,7.0
5,Spring,EMERGENCY,1.1538461538461535,100.0,38.46,5.35
6,Summer,ELECTIVE,1.0,100.0,0.0,39.0
6,Summer,EMERGENCY,1.0,100.0,0.0,10.67


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH care_pathway_comparison AS (
    SELECT 
        a.admission_type,
        (UNIX_TIMESTAMP(a.dischtime) - UNIX_TIMESTAMP(a.admittime))/86400 as hospital_los,
        CASE WHEN i.icustay_id IS NOT NULL THEN 1 ELSE 0 END as required_icu,
        COALESCE(i.los, 0) as icu_los,
        a.hospital_expire_flag,
        a.discharge_location,
        CASE 
            WHEN i.intime IS NOT NULL 
            THEN (UNIX_TIMESTAMP(i.intime) - UNIX_TIMESTAMP(a.admittime))/3600 
            ELSE NULL 
        END as hours_to_icu
    FROM mimicdata_2567611159492892.mimic.admissions a
    LEFT JOIN mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
    WHERE a.admission_type IN ('EMERGENCY', 'ELECTIVE', 'URGENT')
)
SELECT 
    admission_type,
    COUNT(*) as total_admissions,
    ROUND(AVG(hospital_los), 2) as avg_hospital_los,
    ROUND(100.0 * SUM(required_icu) / COUNT(*), 2) as icu_utilization_rate,
    ROUND(AVG(CASE WHEN required_icu = 1 THEN icu_los END), 2) as avg_icu_los,
    ROUND(AVG(hours_to_icu), 2) as avg_hours_to_icu,
    ROUND(100.0 * SUM(hospital_expire_flag) / COUNT(*), 2) as mortality_rate,
    ROUND(100.0 * COUNT(CASE WHEN discharge_location = 'HOME' THEN 1 END) / COUNT(*), 2) as home_discharge_rate
FROM care_pathway_comparison
GROUP BY admission_type
ORDER BY 
    CASE admission_type 
        WHEN 'EMERGENCY' THEN 1
        WHEN 'URGENT' THEN 2
        WHEN 'ELECTIVE' THEN 3
    END;

admission_type,total_admissions,avg_hospital_los,icu_utilization_rate,avg_icu_los,avg_hours_to_icu,mortality_rate,home_discharge_rate
EMERGENCY,126,9.68,100.0,4.53,25.11,35.71,11.11
URGENT,2,6.26,100.0,5.21,0.03,50.0,0.0
ELECTIVE,8,11.67,100.0,3.01,14.65,0.0,12.5


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH insurance_analysis AS (
    SELECT 
        a.insurance,
        EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) as age,
        p.gender,
        a.ethnicity,
        (unix_timestamp(a.dischtime) - unix_timestamp(a.admittime))/86400 as hospital_los,
        CASE WHEN i.icustay_id IS NOT NULL THEN 1 ELSE 0 END as used_icu,
        COALESCE(i.los, 0) as icu_los,
        a.hospital_expire_flag,
        a.discharge_location
    FROM mimicdata_2567611159492892.mimic.patients p
    JOIN mimicdata_2567611159492892.mimic.admissions a ON p.subject_id = a.subject_id
    LEFT JOIN mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
    WHERE a.insurance IS NOT NULL
)
SELECT 
    insurance,
    COUNT(*) as total_patients,
    ROUND(AVG(age), 1) as avg_age,
    ROUND(100.0 * SUM(CASE WHEN gender = 'F' THEN 1 ELSE 0 END) / COUNT(*), 2) as female_percentage,
    ROUND(AVG(hospital_los), 2) as avg_hospital_los,
    ROUND(100.0 * SUM(used_icu) / COUNT(*), 2) as icu_utilization_rate,
    ROUND(AVG(CASE WHEN used_icu = 1 THEN icu_los END), 2) as avg_icu_los,
    ROUND(100.0 * SUM(hospital_expire_flag) / COUNT(*), 2) as mortality_rate,
    ROUND(100.0 * COUNT(CASE WHEN discharge_location = 'HOME' THEN 1 END) / COUNT(*), 2) as home_discharge_rate
FROM insurance_analysis
GROUP BY insurance
HAVING COUNT(*) >= 10
ORDER BY total_patients DESC;

insurance,total_patients,avg_age,female_percentage,avg_hospital_los,icu_utilization_rate,avg_icu_los,mortality_rate,home_discharge_rate
Medicare,105,94.5,48.57,8.45,100.0,3.93,33.33,9.52
Private,24,52.9,41.67,15.12,100.0,5.89,25.0,16.67


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH patient_admission_counts AS (
    SELECT 
        p.subject_id,
        p.gender,
        p.dob,
        p.expire_flag,
        COUNT(a.hadm_id) as total_admissions,
        MIN(a.admittime) as first_admission,
        MAX(a.admittime) as last_admission,
        SUM((UNIX_TIMESTAMP(a.dischtime) - UNIX_TIMESTAMP(a.admittime))/86400) as total_hospital_days,
        SUM(CASE WHEN i.icustay_id IS NOT NULL THEN 1 ELSE 0 END) as total_icu_stays,
        SUM(COALESCE(i.los, 0)) as total_icu_days,
        SUM(a.hospital_expire_flag) as hospital_deaths
    FROM mimicdata_2567611159492892.mimic.patients p
    JOIN mimicdata_2567611159492892.mimic.admissions a ON p.subject_id = a.subject_id
    LEFT JOIN mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
    GROUP BY p.subject_id, p.gender, p.dob, p.expire_flag
)
SELECT 
    CASE 
        WHEN total_admissions = 1 THEN 'Single Admission'
        WHEN total_admissions <= 3 THEN '2-3 Admissions'
        WHEN total_admissions <= 5 THEN '4-5 Admissions'
        ELSE '6+ Admissions'
    END as admission_frequency_group,
    COUNT(*) as patient_count,
    ROUND(AVG(total_admissions), 2) as avg_admissions_per_patient,
    ROUND(AVG(total_hospital_days), 2) as avg_total_hospital_days,
    ROUND(AVG(total_icu_stays), 2) as avg_icu_stays_per_patient,
    ROUND(AVG(total_icu_days), 2) as avg_total_icu_days,
    ROUND(100.0 * SUM(CASE WHEN total_icu_stays > 0 THEN 1 ELSE 0 END) / COUNT(*), 2) as pct_with_icu_stay,
    ROUND(100.0 * SUM(expire_flag) / COUNT(*), 2) as overall_mortality_rate,
    ROUND(AVG((UNIX_TIMESTAMP(last_admission) - UNIX_TIMESTAMP(first_admission))/86400), 2) as avg_days_between_first_last
FROM patient_admission_counts
GROUP BY admission_frequency_group
ORDER BY 
    CASE admission_frequency_group
        WHEN 'Single Admission' THEN 1
        WHEN '2-3 Admissions' THEN 2
        WHEN '4-5 Admissions' THEN 3
        WHEN '6+ Admissions' THEN 4
    END;


admission_frequency_group,patient_count,avg_admissions_per_patient,avg_total_hospital_days,avg_icu_stays_per_patient,avg_total_icu_days,pct_with_icu_stay,overall_mortality_rate,avg_days_between_first_last
Single Admission,81,1.0,8.58,1.0,4.74,100.0,100.0,0.0
2-3 Admissions,17,2.12,26.45,2.12,8.71,100.0,100.0,52.14
4-5 Admissions,1,4.0,96.77,4.0,24.58,100.0,100.0,21.23
6+ Admissions,1,15.0,84.41,15.0,48.65,100.0,100.0,1433.79


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH mortality_stratification AS (
    SELECT 
        a.hadm_id,
        EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) as age,
        i.first_careunit,
        i.los as icu_los,
        a.admission_type,
        a.hospital_expire_flag,
        CASE 
            WHEN i.icustay_id IS NULL THEN 'No ICU'
            WHEN i.los < 1 THEN 'ICU < 1 day'
            WHEN i.los < 3 THEN 'ICU 1-3 days'
            WHEN i.los < 7 THEN 'ICU 3-7 days'
            ELSE 'ICU 7+ days'
        END as icu_category
    FROM mimicdata_2567611159492892.mimic.patients p
    JOIN mimicdata_2567611159492892.mimic.admissions a ON p.subject_id = a.subject_id
    LEFT JOIN mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
)
SELECT 
    icu_category,
    first_careunit,
    CASE 
        WHEN age < 50 THEN '<50'
        WHEN age < 70 THEN '50-69'
        ELSE '70+'
    END as age_group,
    COUNT(*) as total_patients,
    SUM(hospital_expire_flag) as deaths,
    ROUND(100.0 * SUM(hospital_expire_flag) / COUNT(*), 2) as mortality_rate,
    
    ROUND(100.0 * SUM(hospital_expire_flag) / COUNT(*) - 1.96 * SQRT(100.0 * SUM(hospital_expire_flag) / COUNT(*) * (100 - 100.0 * SUM(hospital_expire_flag) / COUNT(*)) / COUNT(*)), 2) as mortality_rate_lower_ci,
    ROUND(100.0 * SUM(hospital_expire_flag) / COUNT(*) + 1.96 * SQRT(100.0 * SUM(hospital_expire_flag) / COUNT(*) * (100 - 100.0 * SUM(hospital_expire_flag) / COUNT(*)) / COUNT(*)), 2) as mortality_rate_upper_ci
FROM mortality_stratification
GROUP BY icu_category, first_careunit, age_group
HAVING COUNT(*) >= 10
ORDER BY mortality_rate DESC;


icu_category,first_careunit,age_group,total_patients,deaths,mortality_rate,mortality_rate_lower_ci,mortality_rate_upper_ci
ICU 1-3 days,SICU,70+,10,5,50.0,19.01,80.99
ICU < 1 day,MICU,70+,11,4,36.36,7.94,64.79
ICU 1-3 days,MICU,70+,20,6,30.0,9.92,50.08
ICU 1-3 days,MICU,50-69,14,0,0.0,0.0,0.0


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH efficiency_metrics AS (
    SELECT 
        i.first_careunit,
        a.admission_type,
        EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) as age,
        i.los as icu_los,
        CASE WHEN a.hospital_expire_flag = 0 THEN 1 ELSE 0 END as survived,
        CASE WHEN a.discharge_location IN ('HOME', 'HOME HEALTH CARE') THEN 1 ELSE 0 END as good_outcome
    FROM mimicdata_2567611159492892.mimic.patients p
    JOIN mimicdata_2567611159492892.mimic.admissions a ON p.subject_id = a.subject_id
    JOIN mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
    WHERE i.los > 0
)
SELECT 
    first_careunit,
    admission_type,
    COUNT(*) as total_icu_stays,
    ROUND(AVG(icu_los), 2) as avg_icu_los,
    ROUND(100.0 * SUM(survived) / COUNT(*), 2) as survival_rate,
    ROUND(100.0 * SUM(good_outcome) / COUNT(*), 2) as good_outcome_rate,
    -- Efficiency score: good outcomes per day of ICU care
    ROUND(SUM(good_outcome) / SUM(icu_los), 3) as efficiency_score,
    -- Survival per ICU day
    ROUND(SUM(survived) / SUM(icu_los), 3) as survival_per_icu_day,
    ROUND(AVG(age), 1) as avg_patient_age
FROM efficiency_metrics
GROUP BY first_careunit, admission_type
HAVING COUNT(*) >= 5
ORDER BY efficiency_score DESC;


first_careunit,admission_type,total_icu_stays,avg_icu_los,survival_rate,good_outcome_rate,efficiency_score,survival_per_icu_day,avg_patient_age
TSICU,EMERGENCY,11,3.59,36.36,18.18,0.051,0.101,75.8
MICU,EMERGENCY,74,4.0,67.57,18.92,0.047,0.169,88.0
CCU,EMERGENCY,18,5.81,66.67,22.22,0.038,0.115,81.8
SICU,EMERGENCY,20,6.0,65.0,15.0,0.025,0.108,86.0


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH safety_metrics AS (
    SELECT 
        a.hadm_id,
        DATEDIFF(a.dischtime, a.admittime) as hospital_los,
        COUNT(i.icustay_id) as icu_transfers,
        SUM(i.los) as total_icu_days,
        a.hospital_expire_flag,
        CASE 
            WHEN COUNT(i.icustay_id) > 1 THEN 1 
            ELSE 0 
        END as multiple_icu_stays,
        CASE 
            WHEN DATEDIFF(a.dischtime, a.admittime) > 14 THEN 1 
            ELSE 0 
        END as extended_stay,
        CASE 
            WHEN SUM(CASE WHEN i.first_careunit != i.last_careunit THEN 1 ELSE 0 END) > 0 THEN 1 
            ELSE 0 
        END as icu_unit_transfers
    FROM mimicdata_2567611159492892.mimic.admissions a
    LEFT JOIN mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
    GROUP BY a.hadm_id, a.admittime, a.dischtime, a.hospital_expire_flag
)
SELECT 
    CASE 
        WHEN hospital_los <= 3 THEN '≤3 days'
        WHEN hospital_los <= 7 THEN '4-7 days'
        WHEN hospital_los <= 14 THEN '8-14 days'
        ELSE '>14 days'
    END as los_category,
    COUNT(*) as total_admissions,
    ROUND(100.0 * SUM(multiple_icu_stays) / COUNT(*), 2) as pct_multiple_icu_stays,
    ROUND(100.0 * SUM(icu_unit_transfers) / COUNT(*), 2) as pct_with_icu_transfers,
    ROUND(100.0 * SUM(extended_stay) / COUNT(*), 2) as pct_extended_stays,
    ROUND(100.0 * SUM(hospital_expire_flag) / COUNT(*), 2) as mortality_rate,
    ROUND(AVG(total_icu_days), 2) as avg_total_icu_days,
    -- Risk indicator: combination of complexity factors
    ROUND(100.0 * SUM(CASE WHEN multiple_icu_stays = 1 AND icu_unit_transfers = 1 THEN 1 ELSE 0 END) / COUNT(*), 2) as high_complexity_rate
FROM safety_metrics
GROUP BY los_category
ORDER BY 
    CASE los_category
        WHEN '≤3 days' THEN 1
        WHEN '4-7 days' THEN 2
        WHEN '8-14 days' THEN 3
        WHEN '>14 days' THEN 4
    END;

los_category,total_admissions,pct_multiple_icu_stays,pct_with_icu_transfers,pct_extended_stays,mortality_rate,avg_total_icu_days,high_complexity_rate
≤3 days,35,0.0,5.71,0.0,51.43,1.36,0.0
4-7 days,42,4.76,4.76,0.0,21.43,2.73,0.0
8-14 days,35,2.86,5.71,0.0,14.29,4.6,0.0
>14 days,17,17.65,11.76,100.0,47.06,16.59,0.0


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH age_adjusted_outcomes AS (
    SELECT 
        a.hadm_id,
        EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) as age,
        a.admission_type,
        i.first_careunit,
        TIMESTAMPDIFF(SECOND, a.admittime, a.dischtime)/86400 as hospital_los,
        COALESCE(i.los, 0) as icu_los,
        a.hospital_expire_flag,
        -- Age standardization weights (approximate)
        CASE 
            WHEN EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) < 30 THEN 0.15
            WHEN EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) < 50 THEN 0.25
            WHEN EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) < 70 THEN 0.35
            ELSE 0.25
        END as age_weight
    FROM mimicdata_2567611159492892.mimic.patients p
    JOIN mimicdata_2567611159492892.mimic.admissions a ON p.subject_id = a.subject_id
    LEFT JOIN mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
)
SELECT 
    admission_type,
    COALESCE(first_careunit, 'No ICU') as care_unit,
    COUNT(*) as total_patients,
    ROUND(AVG(age), 1) as avg_age,
    -- Raw outcomes
    ROUND(100.0 * SUM(hospital_expire_flag) / COUNT(*), 2) as raw_mortality_rate,
    ROUND(AVG(hospital_los), 2) as raw_avg_hospital_los,
    -- Age-adjusted outcomes (simplified calculation)
    ROUND(100.0 * SUM(hospital_expire_flag * age_weight) / SUM(age_weight), 2) as age_adjusted_mortality_rate,
    ROUND(SUM(hospital_los * age_weight) / SUM(age_weight), 2) as age_adjusted_avg_los,
    -- Performance compared to overall average
    ROUND(100.0 * SUM(hospital_expire_flag) / COUNT(*) - 
          (SELECT 100.0 * SUM(hospital_expire_flag) / COUNT(*) FROM age_adjusted_outcomes), 2) as mortality_vs_benchmark,
    ROUND(AVG(hospital_los) - 
          (SELECT AVG(hospital_los) FROM age_adjusted_outcomes), 2) as los_vs_benchmark
FROM age_adjusted_outcomes
GROUP BY admission_type, first_careunit
HAVING COUNT(*) >= 10
ORDER BY age_adjusted_mortality_rate;

admission_type,care_unit,total_patients,avg_age,raw_mortality_rate,raw_avg_hospital_los,age_adjusted_mortality_rate,age_adjusted_avg_los,mortality_vs_benchmark,los_vs_benchmark
EMERGENCY,CCU,18,81.8,33.33,15.74,30.0,14.82,-0.49,5.99
EMERGENCY,MICU,74,88.0,32.43,8.68,30.77,8.77,-1.39,-1.07
EMERGENCY,SICU,20,86.0,35.0,11.02,32.41,10.94,1.18,1.27
EMERGENCY,TSICU,11,75.8,63.64,5.18,62.71,4.91,29.81,-4.56


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH care_coordination AS (
    SELECT 
        a.hadm_id,
        a.admission_type,
        CASE 
            WHEN i.intime <= a.admittime + INTERVAL '6 hours' THEN 'Direct ICU'
            WHEN i.intime IS NOT NULL THEN 'Transferred to ICU'
            ELSE 'No ICU'
        END as icu_pathway,
        EXTRACT(YEAR FROM a.admittime) - EXTRACT(YEAR FROM p.dob) as age,
        (UNIX_TIMESTAMP(a.dischtime) - UNIX_TIMESTAMP(a.admittime))/86400 as hospital_los,
        COALESCE(i.los, 0) as icu_los,
        a.hospital_expire_flag,
        a.discharge_location,
        CASE 
            WHEN i.intime IS NOT NULL 
            THEN (UNIX_TIMESTAMP(i.intime) - UNIX_TIMESTAMP(a.admittime))/3600 
            ELSE NULL 
        END as hours_to_icu
    FROM mimicdata_2567611159492892.mimic.patients p
    JOIN mimicdata_2567611159492892.mimic.admissions a ON p.subject_id = a.subject_id
    LEFT JOIN mimicdata_2567611159492892.mimic.icustays i ON a.hadm_id = i.hadm_id
)
SELECT 
    icu_pathway,
    admission_type,
    COUNT(*) as total_patients,
    ROUND(AVG(age), 1) as avg_age,
    ROUND(AVG(hospital_los), 2) as avg_hospital_los,
    ROUND(AVG(icu_los), 2) as avg_icu_los,
    ROUND(AVG(hours_to_icu), 2) as avg_hours_to_icu,
    ROUND(100.0 * SUM(hospital_expire_flag) / COUNT(*), 2) as mortality_rate,
    ROUND(100.0 * COUNT(CASE WHEN discharge_location IN ('HOME', 'HOME HEALTH CARE') THEN 1 END) / COUNT(*), 2) as home_discharge_rate,
    -- Effectiveness score: home discharges per hospital day
    ROUND(COUNT(CASE WHEN discharge_location IN ('HOME', 'HOME HEALTH CARE') THEN 1 END) / SUM(hospital_los), 3) as coordination_effectiveness_score
FROM care_coordination
GROUP BY icu_pathway, admission_type
HAVING COUNT(*) >= 5
ORDER BY coordination_effectiveness_score DESC;

icu_pathway,admission_type,total_patients,avg_age,avg_hospital_los,avg_icu_los,avg_hours_to_icu,mortality_rate,home_discharge_rate,coordination_effectiveness_score
Transferred to ICU,ELECTIVE,5,72.4,12.94,2.66,22.55,0.0,80.0,0.062
Direct ICU,EMERGENCY,93,90.3,7.57,4.18,0.27,31.18,19.35,0.026
Transferred to ICU,EMERGENCY,33,72.0,15.63,5.53,95.1,48.48,18.18,0.012


Databricks visualization. Run in Databricks to view.