In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS mimic

In [0]:
icustays_df = spark.read.option("header", "true").option("inferSchema", "true").csv("dbfs:/mnt/mimic/ICUSTAYS.csv")

In [0]:
icustays_df.write.format("delta").mode("overwrite").saveAsTable("mimic.icustays")

In [0]:
%sql
SELECT 
    first_careunit,
    COUNT(*) as total_stays,
    ROUND(AVG(los), 2) as avg_length_of_stay,
    ROUND(MIN(los), 2) as min_los,
    ROUND(MAX(los), 2) as max_los,
    ROUND(STDDEV(los), 2) as std_dev_los
FROM mimicdata_2567611159492892.mimic.icustays
GROUP BY first_careunit
ORDER BY avg_length_of_stay DESC;

first_careunit,total_stays,avg_length_of_stay,min_los,max_los,std_dev_los
CCU,19,5.75,0.88,25.0,7.02
SICU,23,5.67,0.74,35.41,8.75
MICU,77,3.96,0.19,31.12,5.19
CSRU,6,3.63,0.9,8.14,3.2
TSICU,11,3.59,0.11,22.39,6.42


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    DATE_TRUNC('month', intime) as month,
    first_careunit,
    COUNT(*) as admissions,
    ROUND(AVG(los), 2) as avg_los_month
FROM mimicdata_2567611159492892.mimic.icustays
GROUP BY DATE_TRUNC('month', intime), first_careunit
ORDER BY month, first_careunit;

-- Daily occupancy calculation
SELECT 
    DATE(intime) as admission_date,
    COUNT(*) as daily_admissions,
    SUM(COUNT(*)) OVER (ORDER BY DATE(intime)) as cumulative_admissions
FROM mimicdata_2567611159492892.mimic.icustays
GROUP BY DATE(intime)
ORDER BY admission_date;

admission_date,daily_admissions,cumulative_admissions
2102-08-31,1,1
2104-09-24,1,2
2104-10-24,1,3
2105-06-08,1,4
2106-08-30,1,5
2107-01-05,1,6
2107-01-16,1,7
2107-01-29,1,8
2107-03-24,1,9
2107-05-12,1,10


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    first_careunit,
    last_careunit,
    COUNT(*) as transfer_count,
    ROUND(AVG(los), 2) as avg_los_transfers,
    CASE 
        WHEN first_careunit = last_careunit THEN 'No Transfer'
        ELSE 'Transfer'
    END as transfer_status
FROM mimicdata_2567611159492892.mimic.icustays
GROUP BY first_careunit, last_careunit
ORDER BY transfer_count DESC;

-- Transfer rate by unit
SELECT 
    first_careunit,
    COUNT(*) as total_admissions,
    SUM(CASE WHEN first_careunit != last_careunit THEN 1 ELSE 0 END) as transfers,
    ROUND(
        (SUM(CASE WHEN first_careunit != last_careunit THEN 1 ELSE 0 END) * 100.0 / COUNT(*)), 2
    ) as transfer_rate_percent
FROM mimicdata_2567611159492892.mimic.icustays
GROUP BY first_careunit
ORDER BY transfer_rate_percent DESC;

first_careunit,total_admissions,transfers,transfer_rate_percent
TSICU,11,1,9.09
MICU,77,5,6.49
CCU,19,1,5.26
SICU,23,1,4.35
CSRU,6,0,0.0


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    CASE 
        WHEN los < 1 THEN '< 1 day'
        WHEN los >= 1 AND los < 3 THEN '1-3 days'
        WHEN los >= 3 AND los < 7 THEN '3-7 days'
        WHEN los >= 7 AND los < 14 THEN '1-2 weeks'
        WHEN los >= 14 AND los < 30 THEN '2-4 weeks'
        ELSE '> 1 month'
    END as los_category,
    COUNT(*) as patient_count,
    ROUND((COUNT(*) * 100.0 / (SELECT COUNT(*) FROM mimicdata_2567611159492892.mimic.icustays)), 2) as percentage
FROM mimicdata_2567611159492892.mimic.icustays
GROUP BY 
    CASE 
        WHEN los < 1 THEN '< 1 day'
        WHEN los >= 1 AND los < 3 THEN '1-3 days'
        WHEN los >= 3 AND los < 7 THEN '3-7 days'
        WHEN los >= 7 AND los < 14 THEN '1-2 weeks'
        WHEN los >= 14 AND los < 30 THEN '2-4 weeks'
        ELSE '> 1 month'
    END
ORDER BY 
    los_category;

los_category,patient_count,percentage
1-2 weeks,15,11.03
1-3 days,69,50.74
2-4 weeks,8,5.88
3-7 days,20,14.71
< 1 day,22,16.18
> 1 month,2,1.47


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    EXTRACT(DOW FROM intime) as day_of_week,
    CASE EXTRACT(DOW FROM intime)
        WHEN 0 THEN 'Sunday'
        WHEN 1 THEN 'Monday'
        WHEN 2 THEN 'Tuesday'
        WHEN 3 THEN 'Wednesday'
        WHEN 4 THEN 'Thursday'
        WHEN 5 THEN 'Friday'
        WHEN 6 THEN 'Saturday'
    END as day_name,
    COUNT(*) as admission_count,
    ROUND(AVG(los), 2) as avg_los_by_day
FROM mimicdata_2567611159492892.mimic.icustays
GROUP BY EXTRACT(DOW FROM intime)
ORDER BY day_of_week;

-- Admissions by hour of day
SELECT 
    EXTRACT(HOUR FROM intime) as hour_of_day,
    COUNT(*) as admission_count,
    first_careunit
FROM mimicdata_2567611159492892.mimic.icustays
GROUP BY EXTRACT(HOUR FROM intime), first_careunit
ORDER BY hour_of_day, first_careunit;

hour_of_day,admission_count,first_careunit
0,3,MICU
1,4,MICU
1,1,TSICU
2,2,CCU
2,2,MICU
3,1,CCU
3,1,MICU
3,2,TSICU
4,1,CCU
4,2,MICU


Databricks visualization. Run in Databricks to view.

In [0]:
# Create the unit_capacity DataFrame
unit_capacity = spark.createDataFrame([
    ('MICU', 20),
    ('SICU', 15),
    ('CCU', 12),
    ('CSRU', 10),
    ('TSICU', 8)
], ['unit', 'capacity'])

# Register the DataFrame as a temporary view
unit_capacity.createOrReplaceTempView("unit_capacity")

# Create the daily_occupancy DataFrame using Spark SQL
spark.sql("""
    CREATE OR REPLACE TEMP VIEW daily_occupancy AS
    SELECT 
        DATE(intime) as stay_date,
        first_careunit,
        COUNT(*) as daily_admissions
    FROM mimicdata_2567611159492892.mimic.icustays
    GROUP BY DATE(intime), first_careunit
""")

# Execute the final query using Spark SQL
result = spark.sql("""
    SELECT 
        d.stay_date,
        d.first_careunit,
        d.daily_admissions,
        u.capacity,
        ROUND((d.daily_admissions * 100.0 / u.capacity), 2) as utilization_percent
    FROM daily_occupancy d
    JOIN unit_capacity u ON d.first_careunit = u.unit
    ORDER BY d.stay_date, d.first_careunit
""")

# Display the result
display(result)

stay_date,first_careunit,daily_admissions,capacity,utilization_percent
2102-08-31,MICU,1,20,5.0
2104-09-24,MICU,1,20,5.0
2104-10-24,TSICU,1,8,12.5
2105-06-08,MICU,1,20,5.0
2106-08-30,TSICU,1,8,12.5
2107-01-05,MICU,1,20,5.0
2107-01-16,CCU,1,12,8.33
2107-01-29,CCU,1,12,8.33
2107-03-24,MICU,1,20,5.0
2107-05-12,MICU,1,20,5.0


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    subject_id,
    COUNT(*) as total_icu_stays,
    MIN(intime) as first_admission,
    MAX(outtime) as last_discharge,
    ROUND(AVG(los), 2) as avg_los_per_stay,
    ROUND(SUM(los), 2) as total_los,
    ARRAY_JOIN(COLLECT_LIST(DISTINCT first_careunit), ', ') as units_used
FROM mimicdata_2567611159492892.mimic.icustays
GROUP BY subject_id
HAVING COUNT(*) > 1
ORDER BY total_icu_stays DESC, total_los DESC;

WITH readmissions AS (
    SELECT 
        subject_id,
        intime,
        outtime,
        LAG(outtime) OVER (PARTITION BY subject_id ORDER BY intime) as prev_discharge,
        first_careunit
    FROM mimicdata_2567611159492892.mimic.icustays
)
SELECT 
    subject_id,
    first_careunit,
    intime as readmission_time,
    prev_discharge,
    ROUND((UNIX_TIMESTAMP(intime) - UNIX_TIMESTAMP(prev_discharge))/86400, 2) as days_between_stays
FROM readmissions
WHERE prev_discharge IS NOT NULL
ORDER BY days_between_stays;

subject_id,first_careunit,readmission_time,prev_discharge,days_between_stays
40304,MICU,2163-11-24T16:52:26Z,2163-11-22T22:06:43Z,1.78
43735,CCU,2128-11-08T13:03:05Z,2128-11-05T17:07:58Z,2.83
43746,SICU,2111-01-12T15:26:49Z,2111-01-09T15:47:22Z,2.99
44083,CCU,2112-05-29T02:01:33Z,2112-05-25T14:59:50Z,3.46
10124,MICU,2192-04-30T14:50:44Z,2192-04-26T23:59:45Z,3.62
10124,CCU,2192-04-24T02:29:49Z,2192-04-20T08:51:28Z,3.73
40177,MICU,2169-05-11T20:45:56Z,2169-05-08T01:58:01Z,3.78
10119,SICU,2117-08-21T08:34:09Z,2117-08-13T18:44:34Z,7.58
42346,TSICU,2160-12-26T11:46:17Z,2160-12-17T12:41:12Z,8.96
42281,MICU,2119-10-30T15:42:34Z,2119-10-21T00:36:31Z,9.63


Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    first_careunit,
    COUNT(*) as total_admissions,
    ROUND(AVG(los), 2) as avg_los,
    ROUND(MIN(los), 2) as min_los,
    ROUND(MAX(los), 2) as max_los,
    COUNT(DISTINCT subject_id) as unique_patients,
    ROUND((COUNT(*) * 1.0 / COUNT(DISTINCT subject_id)), 2) as avg_stays_per_patient,
    SUM(CASE WHEN first_careunit != last_careunit THEN 1 ELSE 0 END) as transfers_out,
    ROUND(
        (SUM(CASE WHEN first_careunit != last_careunit THEN 1 ELSE 0 END) * 100.0 / COUNT(*)), 2
    ) as transfer_rate_percent,
    COUNT(CASE WHEN los > 30 THEN 1 END) as long_stay_patients,
    dbsource
FROM mimicdata_2567611159492892.mimic.icustays
GROUP BY first_careunit, dbsource
ORDER BY total_admissions DESC;



first_careunit,total_admissions,avg_los,min_los,max_los,unique_patients,avg_stays_per_patient,transfers_out,transfer_rate_percent,long_stay_patients,dbsource
MICU,44,4.04,0.19,31.12,28,1.57,2,4.55,1,metavision
MICU,33,3.84,0.65,15.36,28,1.18,3,9.09,0,carevue
SICU,13,5.43,0.74,29.26,11,1.18,1,7.69,0,metavision
CCU,12,4.58,0.88,19.58,11,1.09,1,8.33,0,metavision
SICU,10,5.98,1.07,35.41,9,1.11,0,0.0,1,carevue
CCU,7,7.76,1.62,25.0,7,1.0,0,0.0,0,carevue
TSICU,6,2.22,0.54,5.1,6,1.0,0,0.0,0,metavision
TSICU,5,5.23,0.11,22.39,5,1.0,1,20.0,0,carevue
CSRU,4,4.64,0.9,8.14,4,1.0,0,0.0,0,carevue
CSRU,2,1.61,1.32,1.91,2,1.0,0,0.0,0,metavision


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH latest_date AS (
    SELECT MAX(DATE(intime)) as max_date 
    FROM mimicdata_2567611159492892.mimic.icustays
),
current_status AS (
    SELECT 
        first_careunit,
        COUNT(*) as current_patients,
        ROUND(AVG(los), 2) as avg_current_los,
        MIN(intime) as longest_stay_start
    FROM mimicdata_2567611159492892.mimic.icustays
    CROSS JOIN latest_date
    WHERE DATE(intime) = latest_date.max_date
    GROUP BY first_careunit
)
SELECT 
    c.first_careunit,
    c.current_patients,
    c.avg_current_los,
    c.longest_stay_start,
    CASE 
        WHEN c.current_patients > 15 THEN 'High Census'
        WHEN c.current_patients > 10 THEN 'Moderate Census'
        ELSE 'Low Census'
    END as occupancy_status
FROM current_status c
ORDER BY c.current_patients DESC;

first_careunit,current_patients,avg_current_los,longest_stay_start,occupancy_status
MICU,1,1.64,2202-10-03T01:46:32Z,Low Census


Databricks visualization. Run in Databricks to view.