In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS mimic

In [0]:
admissions_df = spark.read.option("header", "true").option("inferSchema", "true").csv("dbfs:/mnt/mimic/ADMISSIONS.csv")

In [0]:
admissions_df.write.format("delta").mode("overwrite").saveAsTable("mimic.admissions")

In [0]:
%sql
SELECT 
    admission_type,
    COUNT(*) as count,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM mimicdata_2567611159492892.mimic.admissions), 2) as percentage
FROM 
    mimicdata_2567611159492892.mimic.admissions
GROUP BY 
    admission_type
ORDER BY 
    count DESC;

admission_type,count,percentage
EMERGENCY,119,92.25
ELECTIVE,8,6.2
URGENT,2,1.55


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT COUNT(*) as total_mortality_count
FROM mimicdata_2567611159492892.mimic.ADMISSIONS 
WHERE deathtime IS NOT NULL;

total_mortality_count
40


In [0]:
%sql
SELECT COUNT(*) as total_mortality_count
FROM mimicdata_2567611159492892.mimic.ADMISSIONS 
WHERE hospital_expire_flag = 1;

SELECT 
    hospital_expire_flag,
    COUNT(*) as count,
    CASE 
        WHEN hospital_expire_flag = 1 THEN 'Deaths'
        WHEN hospital_expire_flag = 0 THEN 'Survivors'
        ELSE 'Unknown'
    END as status
FROM mimicdata_2567611159492892.mimic.ADMISSIONS 
GROUP BY hospital_expire_flag
ORDER BY hospital_expire_flag;

SELECT 
    COUNT(*) as total_admissions,
    SUM(hospital_expire_flag) as total_deaths,
    ROUND((SUM(hospital_expire_flag) * 100.0 / COUNT(*)), 2) as mortality_rate_percent
FROM mimicdata_2567611159492892.mimic.ADMISSIONS;

total_admissions,total_deaths,mortality_rate_percent
129,40,31.01


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
--Average length of stay
SELECT 
    ROUND(AVG(DATEDIFF(dischtime, admittime)), 2) as avg_length_of_stay_days
FROM 
    mimicdata_2567611159492892.mimic.admissions
WHERE 
    dischtime IS NOT NULL AND admittime IS NOT NULL;

-- Monthly trend
SELECT 
    DATE_FORMAT(admittime, 'yyyy-MM') as month_year,
    ROUND(AVG(DATEDIFF(dischtime, admittime)), 2) as avg_length_of_stay_days,
    COUNT(*) as admission_count
FROM 
    mimicdata_2567611159492892.mimic.admissions
WHERE 
    dischtime IS NOT NULL AND admittime IS NOT NULL
GROUP BY 
    month_year
ORDER BY 
    month_year;

month_year,avg_length_of_stay_days,admission_count
2102-08,8.0,1
2104-09,6.0,1
2104-10,8.0,1
2105-05,13.0,1
2106-08,1.0,1
2107-01,14.67,3
2107-03,9.0,1
2107-05,6.0,1
2110-12,16.0,1
2112-02,7.0,1


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    diagnosis,
    COUNT(*) as total_cases,
    SUM(hospital_expire_flag) as deaths,
    ROUND(SUM(hospital_expire_flag) * 100.0 / COUNT(*), 2) as mortality_rate
FROM 
    mimicdata_2567611159492892.mimic.admissions
GROUP BY 
    diagnosis
HAVING 
    total_cases > 1 -- Filter for statistical significance
ORDER BY 
    total_cases DESC, mortality_rate DESC;

diagnosis,total_cases,deaths,mortality_rate
SEPSIS,10,2,20.0
PNEUMONIA,8,0,0.0
FEVER,4,2,50.0
SHORTNESS OF BREATH,4,1,25.0
CONGESTIVE HEART FAILURE,3,2,66.67
FAILURE TO THRIVE,3,0,0.0
LIVER FAILURE,2,2,100.0
HYPOTENSION,2,1,50.0
UPPER GI BLEED,2,1,50.0
STROKE/TIA,2,1,50.0


Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    diagnosis,
    COUNT(*) as count,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM mimicdata_2567611159492892.mimic.admissions), 2) as percentage
FROM 
    mimicdata_2567611159492892.mimic.admissions
GROUP BY 
    diagnosis
ORDER BY 
    count DESC
LIMIT 10;

diagnosis,count,percentage
SEPSIS,10,7.75
PNEUMONIA,8,6.2
SHORTNESS OF BREATH,4,3.1
FEVER,4,3.1
CONGESTIVE HEART FAILURE,3,2.33
FAILURE TO THRIVE,3,2.33
ASTHMA;CHRONIC OBST PULM DISEASE,2,1.55
UPPER GI BLEED,2,1.55
GASTROINTESTINAL BLEED,2,1.55
LIVER FAILURE,2,1.55


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    insurance,
    COUNT(*) as count,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM mimicdata_2567611159492892.mimic.admissions), 2) as percentage
FROM 
    mimicdata_2567611159492892.mimic.admissions
GROUP BY 
    insurance
ORDER BY 
    count DESC;

insurance,count,percentage
Medicare,98,75.97
Private,24,18.6
Medicaid,6,4.65
Government,1,0.78


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    ROUND(AVG((DATEDIFF(admittime, edregtime)) * 24), 2) as avg_er_to_admission_hours,
    MIN((DATEDIFF(admittime, edregtime)) * 24) as min_hours,
    MAX((DATEDIFF(admittime, edregtime)) * 24) as max_hours,
    COUNT(*) as patient_count
FROM 
    mimicdata_2567611159492892.mimic.admissions
WHERE 
    edregtime IS NOT NULL AND admittime IS NOT NULL;

avg_er_to_admission_hours,min_hours,max_hours,patient_count
2.61,0,24,92


In [0]:
%sql
SELECT 
    CASE 
        WHEN (DATEDIFF(admittime, edregtime)) * 24 < 1 THEN 'Under 1 hour'
        WHEN (DATEDIFF(admittime, edregtime)) * 24 < 2 THEN '1-2 hours'
        WHEN (DATEDIFF(admittime, edregtime)) * 24 < 4 THEN '2-4 hours'
        WHEN (DATEDIFF(admittime, edregtime)) * 24 < 8 THEN '4-8 hours'
        ELSE 'Over 8 hours'
    END as time_bracket,
    COUNT(*) as patient_count
FROM 
    mimicdata_2567611159492892.mimic.admissions
WHERE 
    edregtime IS NOT NULL AND admittime IS NOT NULL
GROUP BY 
    time_bracket
ORDER BY 
    MIN((DATEDIFF(admittime, edregtime)) * 24);

time_bracket,patient_count
Under 1 hour,82
Over 8 hours,10


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    date_format(admittime, 'yyyy-MM') as month_year,
    COUNT(*) as admission_count
FROM 
    mimicdata_2567611159492892.mimic.admissions
GROUP BY 
    month_year
ORDER BY 
    month_year;

month_year,admission_count
2102-08,1
2104-09,1
2104-10,1
2105-05,1
2106-08,1
2107-01,3
2107-03,1
2107-05,1
2110-12,1
2112-02,1


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    discharge_location,
    COUNT(*) as count,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM mimicdata_2567611159492892.mimic.admissions), 2) as percentage
FROM 
    mimicdata_2567611159492892.mimic.admissions
GROUP BY 
    discharge_location
ORDER BY 
    count DESC;

discharge_location,count,percentage
DEAD/EXPIRED,40,31.01
SNF,39,30.23
HOME,15,11.63
HOME HEALTH CARE,14,10.85
REHAB/DISTINCT PART HOSP,13,10.08
ICF,3,2.33
LONG TERM CARE HOSPITAL,2,1.55
DISCH-TRAN TO PSYCH HOSP,1,0.78
HOSPICE-HOME,1,0.78
HOME WITH HOME IV PROVIDR,1,0.78


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    ethnicity,
    COUNT(*) as count,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM mimicdata_2567611159492892.mimic.admissions), 2) as percentage
FROM 
    mimicdata_2567611159492892.mimic.admissions
GROUP BY 
    ethnicity
ORDER BY 
    count DESC;

ethnicity,count,percentage
WHITE,86,66.67
HISPANIC/LATINO - PUERTO RICAN,15,11.63
UNKNOWN/NOT SPECIFIED,11,8.53
BLACK/AFRICAN AMERICAN,7,5.43
OTHER,3,2.33
HISPANIC OR LATINO,2,1.55
AMERICAN INDIAN/ALASKA NATIVE FEDERALLY RECOGNIZED TRIBE,2,1.55
ASIAN,2,1.55
UNABLE TO OBTAIN,1,0.78


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    CASE 
        WHEN date_format(admittime, 'E') IN ('Sat', 'Sun') THEN 'Weekend'
        ELSE 'Weekday' 
    END as day_type,
    COUNT(*) as admission_count,
    ROUND(AVG(DATEDIFF(dischtime, admittime)), 2) as avg_los_days,
    ROUND(SUM(hospital_expire_flag) * 100.0 / COUNT(*), 2) as mortality_rate
FROM 
    mimicdata_2567611159492892.mimic.admissions
GROUP BY 
    CASE 
        WHEN date_format(admittime, 'E') IN ('Sat', 'Sun') THEN 'Weekend'
        ELSE 'Weekday' 
    END

day_type,admission_count,avg_los_days,mortality_rate
Weekday,95,9.11,32.63
Weekend,34,10.0,26.47


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
    insurance,
    ethnicity,
    COUNT(*) as patient_count,
    ROUND(AVG(DATEDIFF(dischtime, admittime)), 2) as avg_los_days,
    ROUND(SUM(hospital_expire_flag) * 100.0 / COUNT(*), 2) as mortality_rate
FROM 
    mimicdata_2567611159492892.mimic.admissions
GROUP BY 
    insurance, ethnicity
HAVING 
    patient_count > 3 -- Filter small groups for statistical relevance
ORDER BY 
    patient_count DESC;

insurance,ethnicity,patient_count,avg_los_days,mortality_rate
Medicare,WHITE,66,7.7,34.85
Private,WHITE,17,17.88,11.76
Medicare,HISPANIC/LATINO - PUERTO RICAN,15,5.67,0.0
Medicare,BLACK/AFRICAN AMERICAN,7,12.43,28.57
Private,UNKNOWN/NOT SPECIFIED,5,11.2,40.0
Medicare,UNKNOWN/NOT SPECIFIED,4,5.0,25.0


Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.