In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS mimic

In [0]:
callout_df = spark.read.option("header", "true").option("inferSchema", "true").csv("dbfs:/mnt/sravs/CALLOUT.csv")

In [0]:
callout_df.write.format("delta").mode("overwrite").saveAsTable("mimic.callout")

In [0]:
%sql
SELECT 
    COUNT(*) as total_callouts,
    ROUND(AVG((unix_timestamp(outcometime) - unix_timestamp(createtime)) / 3600), 2) as avg_total_process_hours,
    ROUND(AVG((unix_timestamp(acknowledgetime) - unix_timestamp(createtime)) / 3600), 2) as avg_acknowledgment_hours,
    ROUND(COUNT(CASE WHEN callout_outcome = 'Discharged' THEN 1 END) * 100.0 / COUNT(*), 1) as success_rate_percent,
    COUNT(CASE WHEN callout_outcome = 'Discharged' THEN 1 END) as discharged_count,
    COUNT(CASE WHEN callout_outcome = 'Cancelled' THEN 1 END) as cancelled_count
FROM mimicdata.mimic.callout;

total_callouts,avg_total_process_hours,avg_acknowledgment_hours,success_rate_percent,discharged_count,cancelled_count
77,10.1,1.33,92.2,71,6


In [0]:
%sql
SELECT 
    curr_careunit,
    COUNT(*) as callout_volume,
    ROUND(AVG((UNIX_TIMESTAMP(outcometime) - UNIX_TIMESTAMP(createtime)) / 3600), 2) as avg_process_hours,
    ROUND(AVG((UNIX_TIMESTAMP(acknowledgetime) - UNIX_TIMESTAMP(createtime)) / 3600), 2) as avg_ack_hours,
    COUNT(CASE WHEN callout_outcome = 'Discharged' THEN 1 END) as discharged,
    COUNT(CASE WHEN callout_outcome = 'Cancelled' THEN 1 END) as cancelled,
    ROUND(COUNT(CASE WHEN callout_outcome = 'Discharged' THEN 1 END) * 100.0 / COUNT(*), 1) as success_rate_percent
FROM mimicdata.mimic.callout
WHERE curr_careunit IS NOT NULL
GROUP BY curr_careunit
ORDER BY callout_volume DESC;

curr_careunit,callout_volume,avg_process_hours,avg_ack_hours,discharged,cancelled,success_rate_percent
MICU,41,9.55,1.01,37,4,90.2
SICU,19,12.47,0.97,18,1,94.7
CCU,10,9.47,3.54,9,1,90.0
TSICU,4,6.8,0.89,4,0,100.0
CSRU,3,9.18,0.84,3,0,100.0


In [0]:
%sql
SELECT 
    callout_service,
    COUNT(*) as callout_volume,
    ROUND(AVG(TIMESTAMPDIFF(HOUR, createtime, outcometime)), 2) as avg_process_hours,
    COUNT(CASE WHEN callout_outcome = 'Discharged' THEN 1 END) as discharged,
    COUNT(CASE WHEN callout_outcome = 'Cancelled' THEN 1 END) as cancelled,
    ROUND(COUNT(CASE WHEN callout_outcome = 'Discharged' THEN 1 END) * 100.0 / COUNT(*), 1) as success_rate_percent
FROM mimicdata.mimic.callout
WHERE callout_service IS NOT NULL
GROUP BY callout_service
ORDER BY callout_volume DESC;

callout_service,callout_volume,avg_process_hours,discharged,cancelled,success_rate_percent
MED,47,10.62,43,4,91.5
SURG,7,6.43,7,0,100.0
CCU,5,5.0,4,1,80.0
TSURG,4,6.25,4,0,100.0
NSURG,3,10.67,3,0,100.0
NMED,3,12.67,3,0,100.0
CSURG,3,8.67,3,0,100.0
OMED,3,11.67,2,1,66.7
ORTHO,1,5.0,1,0,100.0
CMED,1,4.0,1,0,100.0


In [0]:
%sql
SELECT 
    'Telemetry' as request_type,
    COUNT(CASE WHEN request_tele = 1 THEN 1 END) as request_count,
    ROUND(COUNT(CASE WHEN request_tele = 1 THEN 1 END) * 100.0 / COUNT(*), 1) as percentage
FROM mimicdata.mimic.callout
UNION ALL
SELECT 
    'MRSA Isolation' as request_type,
    COUNT(CASE WHEN request_mrsa = 1 THEN 1 END) as request_count,
    ROUND(COUNT(CASE WHEN request_mrsa = 1 THEN 1 END) * 100.0 / COUNT(*), 1) as percentage
FROM mimicdata.mimic.callout
UNION ALL
SELECT 
    'VRE Isolation' as request_type,
    COUNT(CASE WHEN request_vre = 1 THEN 1 END) as request_count,
    ROUND(COUNT(CASE WHEN request_vre = 1 THEN 1 END) * 100.0 / COUNT(*), 1) as percentage
FROM mimicdata.mimic.callout
UNION ALL
SELECT 
    'C.Diff Isolation' as request_type,
    COUNT(CASE WHEN request_cdiff = 1 THEN 1 END) as request_count,
    ROUND(COUNT(CASE WHEN request_cdiff = 1 THEN 1 END) * 100.0 / COUNT(*), 1) as percentage
FROM mimicdata.mimic.callout
UNION ALL
SELECT 
    'Respiratory' as request_type,
    COUNT(CASE WHEN request_resp = 1 THEN 1 END) as request_count,
    ROUND(COUNT(CASE WHEN request_resp = 1 THEN 1 END) * 100.0 / COUNT(*), 1) as percentage
FROM mimicdata.mimic.callout
ORDER BY request_count DESC;

request_type,request_count,percentage
Telemetry,36,46.8
MRSA Isolation,17,22.1
VRE Isolation,9,11.7
C.Diff Isolation,6,7.8
Respiratory,0,0.0


In [0]:
%sql
SELECT 
    time_bucket,
    COUNT(*) as callout_count,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM mimicdata.mimic.callout), 1) as percentage
FROM (
    SELECT 
        CASE 
            WHEN (unix_timestamp(outcometime) - unix_timestamp(createtime))/3600 < 4 THEN '0-4 hours'
            WHEN (unix_timestamp(outcometime) - unix_timestamp(createtime))/3600 < 8 THEN '4-8 hours'
            WHEN (unix_timestamp(outcometime) - unix_timestamp(createtime))/3600 < 12 THEN '8-12 hours'
            WHEN (unix_timestamp(outcometime) - unix_timestamp(createtime))/3600 < 24 THEN '12-24 hours'
            ELSE '24+ hours'
        END as time_bucket
    FROM mimicdata.mimic.callout
    WHERE outcometime IS NOT NULL AND createtime IS NOT NULL
) subquery
GROUP BY time_bucket
ORDER BY 
    CASE 
        WHEN time_bucket = '0-4 hours' THEN 1
        WHEN time_bucket = '4-8 hours' THEN 2
        WHEN time_bucket = '8-12 hours' THEN 3
        WHEN time_bucket = '12-24 hours' THEN 4
        ELSE 5
    END;

time_bucket,callout_count,percentage
0-4 hours,9,11.7
4-8 hours,40,51.9
8-12 hours,11,14.3
12-24 hours,7,9.1
24+ hours,10,13.0


In [0]:
%sql
SELECT 
    acknowledge_status,
    COUNT(*) as count,
    ROUND(AVG((UNIX_TIMESTAMP(acknowledgetime) - UNIX_TIMESTAMP(createtime)) / 3600), 2) as avg_ack_time_hours,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM mimicdata.mimic.callout), 1) as percentage
FROM mimicdata.mimic.callout
WHERE acknowledge_status IS NOT NULL
GROUP BY acknowledge_status
ORDER BY count DESC;

acknowledge_status,count,avg_ack_time_hours,percentage
Acknowledged,74,1.33,96.1
Revised,3,,3.9


In [0]:
%sql
SELECT 
    curr_careunit as from_unit,
    callout_service as to_service,
    COUNT(*) as transfer_count,
    ROUND(AVG((UNIX_TIMESTAMP(outcometime) - UNIX_TIMESTAMP(createtime))/3600), 2) as avg_process_hours,
    ROUND(COUNT(CASE WHEN callout_outcome = 'Discharged' THEN 1 END) * 100.0 / COUNT(*), 1) as success_rate
FROM mimicdata.mimic.callout
WHERE curr_careunit IS NOT NULL AND callout_service IS NOT NULL
GROUP BY curr_careunit, callout_service
HAVING COUNT(*) > 1
ORDER BY transfer_count DESC;

from_unit,to_service,transfer_count,avg_process_hours,success_rate
MICU,MED,37,10.01,89.2
SICU,SURG,6,6.91,100.0
SICU,MED,5,17.35,100.0
CCU,MED,4,15.91,100.0
CCU,CCU,4,5.56,75.0
SICU,NSURG,3,11.4,100.0
SICU,NMED,3,13.29,100.0
CSRU,CSURG,3,9.18,100.0
TSICU,TSURG,2,7.62,100.0
MICU,OMED,2,4.89,100.0


In [0]:
%sql
SELECT 
    EXTRACT(HOUR FROM createtime) as hour_of_day,
    COUNT(*) as callout_count,
    ROUND(AVG((UNIX_TIMESTAMP(outcometime) - UNIX_TIMESTAMP(createtime))/3600), 2) as avg_process_hours
FROM mimicdata.mimic.callout
WHERE createtime IS NOT NULL
GROUP BY EXTRACT(HOUR FROM createtime)
ORDER BY hour_of_day;

SELECT 
    EXTRACT(DOW FROM createtime) as day_of_week,
    CASE EXTRACT(DOW FROM createtime)
        WHEN 0 THEN 'Sunday'
        WHEN 1 THEN 'Monday'
        WHEN 2 THEN 'Tuesday'
        WHEN 3 THEN 'Wednesday'
        WHEN 4 THEN 'Thursday'
        WHEN 5 THEN 'Friday'
        WHEN 6 THEN 'Saturday'
    END as day_name,
    COUNT(*) as callout_count,
    ROUND(AVG((UNIX_TIMESTAMP(outcometime) - UNIX_TIMESTAMP(createtime))/3600), 2) as avg_process_hours
FROM mimicdata.mimic.callout
WHERE createtime IS NOT NULL
GROUP BY EXTRACT(DOW FROM createtime)
ORDER BY day_of_week;

day_of_week,day_name,callout_count,avg_process_hours
1,Monday,6,8.06
2,Tuesday,7,11.01
3,Wednesday,15,8.29
4,Thursday,15,12.49
5,Friday,6,15.53
6,Saturday,16,9.87
7,,12,7.47


In [0]:
query = """
WITH processing_stats AS (
    SELECT 
        AVG((UNIX_TIMESTAMP(outcometime) - UNIX_TIMESTAMP(createtime))/3600) as mean_hours,
        STDDEV((UNIX_TIMESTAMP(outcometime) - UNIX_TIMESTAMP(createtime))/3600) as stddev_hours
    FROM mimicdata.mimic.callout
    WHERE outcometime IS NOT NULL AND createtime IS NOT NULL
)
SELECT 
    c.row_id,
    c.subject_id,
    c.curr_careunit,
    c.callout_service,
    c.callout_outcome,
    ROUND((UNIX_TIMESTAMP(c.outcometime) - UNIX_TIMESTAMP(c.createtime))/3600, 2) as process_hours,
    c.createtime,
    c.outcometime
FROM mimicdata.mimic.callout c, processing_stats ps
WHERE (UNIX_TIMESTAMP(c.outcometime) - UNIX_TIMESTAMP(c.createtime))/3600 > (ps.mean_hours + 2 * ps.stddev_hours)
ORDER BY (UNIX_TIMESTAMP(c.outcometime) - UNIX_TIMESTAMP(c.createtime))/3600 DESC
"""

df = spark.sql(query)
display(df)

row_id,subject_id,curr_careunit,callout_service,callout_outcome,process_hours,createtime,outcometime
3948,10124,CCU,MED,Discharged,37.03,2192-04-25T11:08:15Z,2192-04-27T00:10:03Z
3947,10124,MICU,MED,Discharged,34.65,2192-04-18T22:30:53Z,2192-04-20T09:10:04Z
3935,10083,MICU,MED,Discharged,33.7,2192-11-21T11:28:19Z,2192-11-22T21:10:02Z
3923,10038,SICU,MED,Discharged,33.51,2144-02-13T12:09:13Z,2144-02-14T21:40:03Z
3921,10029,SICU,MED,Discharged,33.27,2139-09-24T09:53:37Z,2139-09-25T19:10:01Z
3939,10094,MICU,MED,Discharged,30.82,2180-03-03T14:50:39Z,2180-03-04T21:40:03Z
3946,10124,MICU,MED,Cancelled,28.94,2192-04-17T15:32:11Z,2192-04-18T20:28:33Z


In [0]:
%sql
WITH unit_metrics AS (
    SELECT 
        curr_careunit,
        COUNT(*) as volume,
        AVG((UNIX_TIMESTAMP(outcometime) - UNIX_TIMESTAMP(createtime))/3600) as avg_hours,
        COUNT(CASE WHEN callout_outcome = 'Discharged' THEN 1 END) * 100.0 / COUNT(*) as success_rate
    FROM mimicdata.mimic.callout
    WHERE curr_careunit IS NOT NULL
    GROUP BY curr_careunit
)
SELECT 
    curr_careunit,
    volume,
    ROUND(avg_hours, 2) as avg_process_hours,
    ROUND(success_rate, 1) as success_rate_percent,
    -- Efficiency score: higher success rate and lower processing time = better
    ROUND((success_rate / avg_hours) * 10, 2) as efficiency_score
FROM unit_metrics
WHERE volume >= 3  -- Only units with sufficient volume
ORDER BY efficiency_score DESC;

curr_careunit,volume,avg_process_hours,success_rate_percent,efficiency_score
TSICU,4,6.8,100.0,146.98
CSRU,3,9.18,100.0,108.88
CCU,10,9.47,90.0,95.02
MICU,41,9.55,90.2,94.54
SICU,19,12.47,94.7,75.99
