In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS mimic

In [0]:
transfers_df = spark.read.option("header", "true").option("inferSchema", "true").csv("dbfs:/mnt/mimic/TRANSFERS.csv")

In [0]:
transfers_df.write.format("delta").mode("overwrite").saveAsTable("mimic.transfers")

In [0]:
%sql
SELECT 
    COUNT(*) as total_records,
    COUNT(DISTINCT subject_id) as unique_patients,
    COUNT(DISTINCT hadm_id) as unique_hospital_admissions,
    COUNT(DISTINCT icustay_id) as unique_icu_stays,
    MIN(intime) as earliest_date,
    MAX(intime) as latest_date
FROM mimicdata_2567611159492892.mimic.transfers;


total_records,unique_patients,unique_hospital_admissions,unique_icu_stays,earliest_date,latest_date
524,100,129,136,2102-08-29T03:15:39Z,2202-10-11T18:30:49Z


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    'row_id' as column_name, COUNT(*) as total_count, COUNT(row_id) as non_null_count,
    ROUND(COUNT(row_id) * 100.0 / COUNT(*), 2) as completeness_pct
FROM mimicdata_2567611159492892.mimic.transfers
UNION ALL
SELECT 'subject_id', COUNT(*), COUNT(subject_id), ROUND(COUNT(subject_id) * 100.0 / COUNT(*), 2) FROM mimicdata_2567611159492892.mimic.transfers
UNION ALL
SELECT 'hadm_id', COUNT(*), COUNT(hadm_id), ROUND(COUNT(hadm_id) * 100.0 / COUNT(*), 2) FROM mimicdata_2567611159492892.mimic.transfers
UNION ALL
SELECT 'icustay_id', COUNT(*), COUNT(icustay_id), ROUND(COUNT(icustay_id) * 100.0 / COUNT(*), 2) FROM mimicdata_2567611159492892.mimic.transfers
UNION ALL
SELECT 'prev_careunit', COUNT(*), COUNT(prev_careunit), ROUND(COUNT(prev_careunit) * 100.0 / COUNT(*), 2) FROM mimicdata_2567611159492892.mimic.transfers
UNION ALL
SELECT 'curr_careunit', COUNT(*), COUNT(curr_careunit), ROUND(COUNT(curr_careunit) * 100.0 / COUNT(*), 2) FROM mimicdata_2567611159492892.mimic.transfers
UNION ALL
SELECT 'los', COUNT(*), COUNT(los), ROUND(COUNT(los) * 100.0 / COUNT(*), 2) FROM mimicdata_2567611159492892.mimic.transfers;

column_name,total_count,non_null_count,completeness_pct
row_id,524,524,100.0
subject_id,524,524,100.0
hadm_id,524,524,100.0
icustay_id,524,167,31.87
prev_careunit,524,167,31.87
curr_careunit,524,167,31.87
los,524,395,75.38


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    eventtype,
    COUNT(*) as event_count,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM mimicdata_2567611159492892.mimic.transfers), 2) as percentage
FROM mimicdata_2567611159492892.mimic.transfers
GROUP BY eventtype
ORDER BY event_count DESC;

eventtype,event_count,percentage
transfer,266,50.76
admit,129,24.62
discharge,129,24.62


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    dbsource,
    COUNT(*) as record_count,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM mimicdata_2567611159492892.mimic.transfers), 2) as percentage
FROM mimicdata_2567611159492892.mimic.transfers
GROUP BY dbsource;


dbsource,record_count,percentage
carevue,237,45.23
metavision,287,54.77


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    'Current Care Units' as category,
    curr_careunit as care_unit,
    COUNT(*) as frequency
FROM mimicdata_2567611159492892.mimic.transfers
WHERE curr_careunit IS NOT NULL
GROUP BY curr_careunit
UNION ALL
SELECT 
    'Previous Care Units' as category,
    prev_careunit as care_unit,
    COUNT(*) as frequency
FROM mimicdata_2567611159492892.mimic.transfers
WHERE prev_careunit IS NOT NULL
GROUP BY prev_careunit
ORDER BY category, frequency DESC;

category,care_unit,frequency
Current Care Units,MICU,95
Current Care Units,SICU,27
Current Care Units,CCU,26
Current Care Units,TSICU,12
Current Care Units,CSRU,7
Previous Care Units,MICU,95
Previous Care Units,SICU,27
Previous Care Units,CCU,26
Previous Care Units,TSICU,12
Previous Care Units,CSRU,7


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    COUNT(los) as records_with_los,
    ROUND(AVG(los), 2) as avg_los_hours,
    ROUND(AVG(los) / 24, 2) as avg_los_days,
    ROUND(MIN(los), 2) as min_los_hours,
    ROUND(MAX(los), 2) as max_los_hours,
    ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY los), 2) as median_los_hours,
    ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY los), 2) as p75_los_hours,
    ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY los), 2) as p95_los_hours
FROM mimicdata_2567611159492892.mimic.transfers
WHERE los IS NOT NULL AND los > 0;

records_with_los,avg_los_hours,avg_los_days,min_los_hours,max_los_hours,median_los_hours,p75_los_hours,p95_los_hours
394,71.9,3.0,0.01,1415.45,38.71,71.62,232.89


In [0]:
%sql
SELECT 
    DATE(intime) as transfer_date,
    eventtype,
    COUNT(*) as daily_count
FROM mimicdata_2567611159492892.mimic.transfers
WHERE intime IS NOT NULL
GROUP BY DATE(intime), eventtype
ORDER BY transfer_date, eventtype;

transfer_date,eventtype,daily_count
2102-08-29,admit,1
2102-08-29,transfer,2
2102-08-31,transfer,1
2102-09-01,transfer,2
2102-09-03,transfer,2
2102-09-06,discharge,1
2104-09-24,admit,1
2104-09-26,transfer,1
2104-09-30,discharge,1
2104-10-24,admit,1


Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    COALESCE(prev_careunit, 'ADMISSION') as source_unit,
    COALESCE(curr_careunit, 'DISCHARGE') as target_unit,
    COUNT(*) as transfer_count,
    ROUND(AVG(los), 2) as avg_los_hours
FROM mimicdata_2567611159492892.mimic.transfers
GROUP BY prev_careunit, curr_careunit
HAVING COUNT(*) > 0
ORDER BY transfer_count DESC;

source_unit,target_unit,transfer_count,avg_los_hours
ADMISSION,DISCHARGE,212,44.42
ADMISSION,MICU,82,73.89
MICU,DISCHARGE,81,97.2
SICU,DISCHARGE,26,61.05
ADMISSION,SICU,24,129.62
ADMISSION,CCU,22,80.09
CCU,DISCHARGE,22,47.06
ADMISSION,TSICU,11,80.18
MICU,MICU,11,105.1
TSICU,DISCHARGE,10,39.21


Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    care_unit,
    SUM(CASE WHEN event_type = 'admissions' THEN event_count ELSE 0 END) as admissions,
    SUM(CASE WHEN event_type = 'discharges' THEN event_count ELSE 0 END) as discharges,
    SUM(CASE WHEN event_type = 'admissions' THEN event_count ELSE 0 END) - 
    SUM(CASE WHEN event_type = 'discharges' THEN event_count ELSE 0 END) as net_flow
FROM (
    SELECT curr_careunit as care_unit, 'admissions' as event_type, COUNT(*) as event_count
    FROM mimicdata_2567611159492892.mimic.transfers 
    WHERE eventtype = 'admit' AND curr_careunit IS NOT NULL
    GROUP BY curr_careunit
    
    UNION ALL
    
    SELECT prev_careunit as care_unit, 'discharges' as event_type, COUNT(*) as event_count
    FROM mimicdata_2567611159492892.mimic.transfers 
    WHERE eventtype = 'discharge' AND prev_careunit IS NOT NULL
    GROUP BY prev_careunit
) combined
GROUP BY care_unit
ORDER BY net_flow DESC;

care_unit,admissions,discharges,net_flow
MICU,55,22,33
TSICU,9,5,4
SICU,8,6,2
CSRU,3,1,2
CCU,11,9,2


Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    curr_careunit as care_unit,
    COUNT(*) as total_stays,
    ROUND(AVG(los), 2) as avg_los_hours,
    ROUND(AVG(los) / 24, 2) as avg_los_days,
    ROUND(MIN(los), 2) as min_los_hours,
    ROUND(MAX(los), 2) as max_los_hours,
    ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY los), 2) as median_los_hours
FROM mimicdata_2567611159492892.mimic.transfers
WHERE curr_careunit IS NOT NULL AND los IS NOT NULL AND los > 0
GROUP BY curr_careunit
ORDER BY avg_los_hours DESC;

care_unit,total_stays,avg_los_hours,avg_los_days,min_los_hours,max_los_hours,median_los_hours
SICU,27,130.61,5.44,1.29,849.76,57.74
CCU,26,86.32,3.6,0.03,599.92,44.33
MICU,94,77.53,3.23,0.04,746.96,39.27
CSRU,7,74.7,3.11,21.64,195.4,31.88
TSICU,12,74.5,3.1,0.79,537.35,20.86


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    transfer_count,
    COUNT(*) as number_of_patients,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(DISTINCT subject_id) FROM mimicdata_2567611159492892.mimic.transfers), 2) as percentage
FROM (
    SELECT subject_id, COUNT(*) as transfer_count
    FROM mimicdata_2567611159492892.mimic.transfers
    GROUP BY subject_id
) patient_transfers
GROUP BY transfer_count
ORDER BY transfer_count;

transfer_count,number_of_patients,percentage
2,17,17.0
3,22,22.0
4,19,19.0
5,12,12.0
6,11,11.0
7,5,5.0
8,3,3.0
9,4,4.0
10,2,2.0
12,2,2.0


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    'ICU Events' as category,
    COUNT(*) as event_count,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM mimicdata_2567611159492892.mimic.transfers), 2) as percentage
FROM mimicdata_2567611159492892.mimic.transfers
WHERE icustay_id IS NOT NULL

UNION ALL

SELECT 
    'Non-ICU Events' as category,
    COUNT(*) as event_count,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM mimicdata_2567611159492892.mimic.transfers), 2) as percentage
FROM mimicdata_2567611159492892.mimic.transfers
WHERE icustay_id IS NULL;

category,event_count,percentage
ICU Events,167,31.87
Non-ICU Events,357,68.13


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    subject_id,
    COUNT(DISTINCT hadm_id) as hospital_admissions,
    COUNT(*) as total_events,
    MIN(intime) as first_admission,
    MAX(intime) as last_event
FROM mimicdata_2567611159492892.mimic.transfers
GROUP BY subject_id
HAVING COUNT(DISTINCT hadm_id) > 1
ORDER BY hospital_admissions DESC, total_events DESC;

subject_id,hospital_admissions,total_events,first_admission,last_event
41976,15,53,2198-10-29T06:55:20Z,2202-10-11T18:30:49Z
44083,3,15,2112-05-04T01:45:43Z,2112-06-07T16:54:04Z
10088,3,10,2107-01-05T00:03:45Z,2107-05-18T13:29:09Z
10117,2,15,2138-06-05T17:23:00Z,2138-11-18T23:13:16Z
10124,2,12,2192-03-26T15:31:25Z,2192-05-15T23:34:21Z
10119,2,12,2117-08-05T18:28:22Z,2117-08-26T13:59:40Z
42135,2,10,2127-07-23T15:22:08Z,2127-10-28T06:57:19Z
40310,2,9,2144-07-11T15:02:58Z,2144-12-31T21:02:59Z
40124,2,9,2130-02-04T02:27:27Z,2130-08-18T15:27:55Z
10094,2,8,2180-02-29T18:54:39Z,2180-03-20T19:34:49Z


In [0]:
%sql
SELECT 
    'Same Ward Transfers' as transfer_type,
    COUNT(*) as transfer_count
FROM mimicdata_2567611159492892.mimic.transfers
WHERE eventtype = 'transfer' 
    AND prev_wardid IS NOT NULL 
    AND curr_wardid IS NOT NULL
    AND prev_wardid = curr_wardid

UNION ALL

SELECT 
    'Different Ward Transfers' as transfer_type,
    COUNT(*) as transfer_count
FROM mimicdata_2567611159492892.mimic.transfers
WHERE eventtype = 'transfer' 
    AND prev_wardid IS NOT NULL 
    AND curr_wardid IS NOT NULL
    AND prev_wardid != curr_wardid;


transfer_type,transfer_count
Same Ward Transfers,65
Different Ward Transfers,201


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    EXTRACT(HOUR FROM intime) as hour_of_day,
    eventtype,
    COUNT(*) as transfer_count
FROM mimicdata_2567611159492892.mimic.transfers
WHERE intime IS NOT NULL
GROUP BY EXTRACT(HOUR FROM intime), eventtype
ORDER BY hour_of_day, eventtype;


hour_of_day,eventtype,transfer_count
0,admit,7
0,transfer,5
1,admit,4
1,discharge,2
1,transfer,7
2,admit,5
2,discharge,1
2,transfer,6
3,admit,4
3,discharge,3


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    dbsource,
    eventtype,
    COUNT(*) as record_count,
    ROUND(AVG(CASE WHEN los IS NOT NULL THEN 1.0 ELSE 0.0 END) * 100, 2) as los_completeness_pct,
    ROUND(AVG(CASE WHEN curr_careunit IS NOT NULL THEN 1.0 ELSE 0.0 END) * 100, 2) as careunit_completeness_pct
FROM mimicdata_2567611159492892.mimic.transfers
GROUP BY dbsource, eventtype
ORDER BY dbsource, eventtype;

dbsource,eventtype,record_count,los_completeness_pct,careunit_completeness_pct
carevue,admit,57,100.0,59.65
carevue,discharge,57,0.0,0.0
carevue,transfer,123,100.0,29.27
metavision,admit,72,100.0,72.22
metavision,discharge,72,0.0,0.0
metavision,transfer,143,100.0,31.47


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    subject_id,
    COUNT(*) as total_events,
    COUNT(DISTINCT curr_careunit) as units_visited,
    COUNT(CASE WHEN eventtype = 'transfer' THEN 1 END) as transfer_count,
    ROUND(AVG(los), 2) as avg_los_per_event,
    -- Complexity score: combines event count, units visited, and transfers
    (COUNT(*) * 0.3 + COUNT(DISTINCT curr_careunit) * 0.4 + COUNT(CASE WHEN eventtype = 'transfer' THEN 1 END) * 0.3) as complexity_score
FROM mimicdata_2567611159492892.mimic.transfers
GROUP BY subject_id
ORDER BY complexity_score DESC
LIMIT 20;


subject_id,total_events,units_visited,transfer_count,avg_los_per_event,complexity_score
41976,53,3,23,53.4,24.0
10117,15,2,11,27.54,8.6
44083,15,2,9,41.08,8.0
10124,12,2,8,93.78,6.8
10119,12,1,8,46.1,6.4
42135,10,1,6,100.33,5.2
42231,9,1,7,25.63,5.2
43746,9,1,7,48.03,5.2
40310,9,2,5,449.77,5.0
40124,9,2,5,44.69,5.0


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    curr_careunit as care_unit,
    COUNT(*) as total_events,
    COUNT(DISTINCT subject_id) as unique_patients,
    ROUND(COUNT(*) * 1.0 / COUNT(DISTINCT subject_id), 2) as events_per_patient,
    ROUND(AVG(los), 2) as avg_los_hours,
    -- Turnover rate (events per day, assuming 30-day period)
    ROUND(COUNT(*) / 30.0, 2) as avg_daily_turnover
FROM mimicdata_2567611159492892.mimic.transfers
WHERE curr_careunit IS NOT NULL
GROUP BY curr_careunit
ORDER BY avg_daily_turnover DESC;

care_unit,total_events,unique_patients,events_per_patient,avg_los_hours,avg_daily_turnover
MICU,95,56,1.7,76.71,3.17
SICU,27,22,1.23,130.61,0.9
CCU,26,21,1.24,86.32,0.87
TSICU,12,11,1.09,74.5,0.4
CSRU,7,6,1.17,74.7,0.23


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH bounce_backs AS (
    SELECT 
        t1.subject_id,
        t1.hadm_id,
        t1.outtime as first_transfer_out,
        t2.intime as return_time,
        t1.curr_careunit as left_unit,
        t2.curr_careunit as return_unit,
        (unix_timestamp(t2.intime) - unix_timestamp(t1.outtime)) / 3600 as hours_between
    FROM mimicdata_2567611159492892.mimic.transfers t1
    JOIN mimicdata_2567611159492892.mimic.transfers t2 
        ON t1.subject_id = t2.subject_id 
        AND t1.hadm_id = t2.hadm_id
        AND t1.outtime < t2.intime
        AND t1.curr_careunit = t2.curr_careunit
        AND (unix_timestamp(t2.intime) - unix_timestamp(t1.outtime)) / 3600 <= 24
    WHERE t1.eventtype = 'transfer' AND t2.eventtype IN ('transfer', 'admit')
)
SELECT 
    'Total Transfers' as metric,
    COUNT(*) as count
FROM mimicdata_2567611159492892.mimic.transfers
WHERE eventtype = 'transfer'

UNION ALL

SELECT 
    'Bounce-backs (24h)' as metric,
    COUNT(*) as count
FROM bounce_backs

UNION ALL

SELECT 
    'Success Rate %' as metric,
    ROUND(
        (SELECT COUNT(*) FROM mimicdata_2567611159492892.mimic.transfers WHERE eventtype = 'transfer') - COUNT(*)
    ) * 100.0 / (SELECT COUNT(*) FROM mimicdata_2567611159492892.mimic.transfers WHERE eventtype = 'transfer') as count
FROM bounce_backs;

metric,count
Total Transfers,266.0
Bounce-backs (24h),2.0
Success Rate %,99.24812030075188


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    care_unit,
    census_date,
    SUM(occupied) as daily_census
FROM (
    SELECT 
        curr_careunit as care_unit,
        DATE(intime) as census_date,
        1 as occupied
    FROM mimicdata_2567611159492892.mimic.transfers
    WHERE curr_careunit IS NOT NULL 
        AND eventtype IN ('admit', 'transfer')
        AND intime IS NOT NULL
) daily_admissions
GROUP BY care_unit, census_date
ORDER BY care_unit, census_date;

care_unit,census_date,daily_census
CCU,2107-01-16,1
CCU,2107-01-29,1
CCU,2112-05-23,2
CCU,2112-05-29,1
CCU,2128-11-08,1
CCU,2130-08-12,2
CCU,2131-07-27,1
CCU,2144-07-13,1
CCU,2149-05-29,1
CCU,2166-02-12,1


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    'ICU to Non-ICU' as flow_direction,
    COUNT(*) as transfer_count,
    ROUND(AVG(t1.los), 2) as avg_los_hours
FROM mimicdata_2567611159492892.mimic.transfers t1
JOIN mimicdata_2567611159492892.mimic.transfers t2 ON t1.subject_id = t2.subject_id 
    AND t1.hadm_id = t2.hadm_id
    AND t1.outtime = t2.intime
WHERE t1.icustay_id IS NOT NULL 
    AND t2.icustay_id IS NULL
    AND t1.eventtype = 'transfer'

UNION ALL

SELECT 
    'Non-ICU to ICU' as flow_direction,
    COUNT(*) as transfer_count,
    ROUND(AVG(t1.los), 2) as avg_los_hours
FROM mimicdata_2567611159492892.mimic.transfers t1
JOIN mimicdata_2567611159492892.mimic.transfers t2 ON t1.subject_id = t2.subject_id 
    AND t1.hadm_id = t2.hadm_id
    AND t1.outtime = t2.intime
WHERE t1.icustay_id IS NULL 
    AND t2.icustay_id IS NOT NULL
    AND t1.eventtype = 'transfer';

flow_direction,transfer_count,avg_los_hours
ICU to Non-ICU,71,99.64
Non-ICU to ICU,36,25.95


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    'Total Transfers' as metric,
    COUNT(*) as value,
    '' as unit
FROM mimicdata_2567611159492892.mimic.transfers
WHERE eventtype = 'transfer'

UNION ALL

SELECT 
    'Average LOS',
    ROUND(AVG(los), 2),
    'hours'
FROM mimicdata_2567611159492892.mimic.transfers
WHERE los IS NOT NULL AND los > 0

UNION ALL

SELECT 
    'ICU Utilization',
    ROUND(COUNT(CASE WHEN icustay_id IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 2),
    '%'
FROM mimicdata_2567611159492892.mimic.transfers

UNION ALL

SELECT 
    'Complex Patients (5+ events)',
    (SELECT COUNT(*) FROM (
        SELECT subject_id FROM mimicdata_2567611159492892.mimic.transfers GROUP BY subject_id HAVING COUNT(*) >= 5
    ) complex_patients),
    'patients'


metric,value,unit
Total Transfers,266.0,
Average LOS,71.9,hours
ICU Utilization,31.87,%
Complex Patients (5+ events),42.0,patients


Databricks visualization. Run in Databricks to view.