In [19]:
import pandas as pd
import re
import os
from sqlalchemy import create_engine
from IPython.display import display

# === DB Connection ===
engine = create_engine("postgresql://jtaft:GunnersMate2003!@100.118.151.104:5432/datalake")

# === Load Data from Cleaned Table ===
print("📥 Loading data from PostgreSQL (ahaems_cleaned)...")
df = pd.read_sql("SELECT * FROM ahaems_cleaned", con=engine)

# === Rename for internal use ===
df = df.rename(columns={
    "UniqueIncidentKey": "incident_id",
    "Patient Age (ePatient.15)": "age",
    "Patient Age Units (ePatient.16)": "age_units",
    "Primary Impression": "primary_impression",
    "Secondary Impression": "secondary_impression",
    "Transport Disposition": "transport_disposition",
    "Response Type Of Service Requested With Code (eResponse.05)": "response_type",
    "Situation Last Known Well Date Time (eSituation.18)": "lkw_time",
    "Vitals Signs Taken Date Time (eVitals.01)": "vitals_time",
    "Patient Cincinnati Stroke Scale Used (eVitals.30)": "stroke_scale_type",
    "Patient Initial Stroke Scale Score (eVitals.29)": "stroke_scale_score",
    "Cardiac Arrest During EMS Event With Code (eArrest.01)": "cardiac_arrest",
    "Disposition Final Patient Acuity Code (eDisposition.19)": "final_acuity"
})

# === Convert datetime fields ===
df["lkw_time"] = pd.to_datetime(df["lkw_time"], format="%m/%d/%Y %I:%M:%S %p", errors="coerce")
df["vitals_time"] = pd.to_datetime(df["vitals_time"], format="%m/%d/%Y %I:%M:%S %p", errors="coerce")

# === Extract ICDs ===
def extract_icd_prefix(text):
    if isinstance(text, str):
        match = re.search(r"\(([A-Z]\d{2}(?:\.\d+)?)\)", text.upper())
        if match:
            return match.group(1)[:3]
    return ""

df["primary_icd"] = df["primary_impression"].apply(extract_icd_prefix)
df["secondary_icd"] = df["secondary_impression"].apply(extract_icd_prefix)

# === Aggregate to 1 row per incident ===
grouped = df.groupby("incident_id").agg({
    "age": "first",
    "age_units": "first",
    "primary_icd": "first",
    "secondary_icd": "first",
    "transport_disposition": "first",
    "response_type": "first",
    "vitals_time": "min",
    "lkw_time": "first",
    "stroke_scale_type": "first",
    "stroke_scale_score": "first",
    "cardiac_arrest": "first",
    "final_acuity": "first"
}).reset_index()

# === Quarter Extraction ===
grouped["quarter"] = grouped["vitals_time"].dt.to_period("Q")

# === Denominator Logic ===
grouped["age"] = pd.to_numeric(grouped["age"], errors="coerce")
age_valid = (grouped["age"] >= 18) & (grouped["age_units"].str.lower() == "years")
stroke_icds = ["I60", "I61", "I63", "G45", "G46"]
impression_valid = grouped["primary_icd"].isin(stroke_icds) | grouped["secondary_icd"].isin(stroke_icds)
transport_valid = grouped["transport_disposition"].str.contains("transport by", case=False, na=False)
response_valid = grouped["response_type"].str.contains("2205001|2205003|2205009", na=False)

# Exclusions
exclude_lkw = (
    grouped["lkw_time"].notna() & grouped["vitals_time"].notna() &
    ((grouped["vitals_time"] - grouped["lkw_time"]).dt.total_seconds() >= 86400)
)
exclude_arrest = grouped["cardiac_arrest"].astype(str).isin(["3001003", "3001005"])
exclude_acuity = grouped["final_acuity"].astype(str) == "4219909"

grouped["in_denominator"] = (
    age_valid & impression_valid & transport_valid & response_valid &
    ~exclude_lkw & ~exclude_arrest & ~exclude_acuity
)

# === Valid values for numerator ===
valid_stroke_scores = [
    "3329001 - Negative",
    "3329003 - Non-Conclusive",
    "3329005 - Positive"
]

# === Numerator Logic (simplified for local practice) ===
grouped["in_numerator"] = (
    grouped["in_denominator"] &
    (grouped["stroke_scale_type"].astype(str).str.lower() == "true") &
    grouped["stroke_scale_score"].isin(valid_stroke_scores)
)

# === Debug output: Unique values after normalization ===
print("✅ Stroke Scale Score Value Counts:")
print(grouped["stroke_scale_score"].value_counts(dropna=False))

print("\n🧠 Inclusion Check Summary:")
print("In Denominator:", grouped["in_denominator"].sum())
print("In Numerator:", grouped["in_numerator"].sum())

# === Summary Output ===
summary = (
    grouped[grouped["in_denominator"]]
    .groupby("quarter")
    .agg(
        AHAEMS4_Denominator=("in_denominator", "sum"),
        AHAEMS4_Numerator=("in_numerator", "sum")
    )
    .reset_index()
)
summary["AHAEMS4_Percentage"] = (summary["AHAEMS4_Numerator"] / summary["AHAEMS4_Denominator"] * 100).round(2)
display(summary)

# === Export Fallout CSV ===
fallouts = grouped[grouped["in_denominator"] & ~grouped["in_numerator"]]
fallout_path = "/Volumes/jupyter/EMS_QI_Projects/ahaems-2025-submission/output/fallouts/ahaems4_fallouts.csv"
os.makedirs(os.path.dirname(fallout_path), exist_ok=True)
fallouts.to_csv(fallout_path, index=False)

📥 Loading data from PostgreSQL (ahaems_cleaned)...
✅ Stroke Scale Score Value Counts:
stroke_scale_score
None                        28878
3329001 - Negative          15428
3329003 - Non-Conclusive      763
3329005 - Positive            525
Name: count, dtype: int64

🧠 Inclusion Check Summary:
In Denominator: 756
In Numerator: 658


Unnamed: 0,quarter,AHAEMS4_Denominator,AHAEMS4_Numerator,AHAEMS4_Percentage
0,2024Q1,151,141,93.38
1,2024Q2,149,136,91.28
2,2024Q3,64,57,89.06
3,2024Q4,155,148,95.48
4,2025Q1,139,108,77.7
5,2025Q2,97,68,70.1


In [6]:
print("Cardiac Arrest Codes:", grouped["cardiac_arrest"].value_counts(dropna=False))
print("Final Acuity Codes:", grouped["final_acuity"].value_counts(dropna=False))

Cardiac Arrest Codes: cardiac_arrest
No (3001001)                                                                                   19960
Not Recorded (7701003)                                                                         13168
None                                                                                            7426
Not Applicable (7701001)                                                                        4568
Yes, Prior to Any EMS Arrival (includes Transport EMS & Medical First Responders) (3001003)      431
Yes, After Any EMS Arrival (includes Transport EMS & Medical First Responders) (3001005)          41
Name: count, dtype: int64
Final Acuity Codes: final_acuity
None    45594
Name: count, dtype: int64


In [7]:
mask = grouped["lkw_time"].notna() & grouped["vitals_time"].notna()
print("LKW Time Delta >= 24hr:", ((grouped["vitals_time"] - grouped["lkw_time"]).dt.total_seconds() >= 86400).sum())
print("Total Valid Time Pairs:", mask.sum())

LKW Time Delta >= 24hr: 167
Total Valid Time Pairs: 793


In [8]:
print("Response Type Samples:", grouped["response_type"].value_counts(dropna=False).head(10))

Response Type Samples: response_type
911 Response (Scene) (2205001)                               44129
IFT (2205005)                                                 1347
None                                                            42
Crew Transport Only (2205025)                                   30
Standby (2205013)                                               26
Support Services (2205021)                                      10
Air/IFT (it2205.116)                                             5
Intercept (2205003)                                              2
Mutual Aid (2205009)                                             1
Evaluation for Special Referral/Intake Programs (2205033)        1
Name: count, dtype: int64


In [9]:
print("Transport Disposition Samples:", grouped["transport_disposition"].value_counts(dropna=False).head(10))

Transport Disposition Samples: transport_disposition
Transport by This EMS Unit (This Crew Only)                  34820
No Transport                                                  2764
Patient Refused Transport                                     2645
Not Applicable                                                2111
Not Recorded                                                  1489
None                                                          1457
Transport by Another EMS Unit                                  189
Transport by This EMS Unit, with a Member of Another Crew       70
Non-Patient Transport (Not Otherwise Listed)                    35
Transport by Another EMS Unit, with a Member of This Crew       13
Name: count, dtype: int64


In [10]:
print("Primary ICD Samples:", grouped["primary_icd"].value_counts(dropna=False).head(10))
print("Secondary ICD Samples:", grouped["secondary_icd"].value_counts(dropna=False).head(10))

Primary ICD Samples: primary_icd
T14    6310
Z00    4951
       4050
G89    3037
R10    2487
R53    2290
J80    1845
F99    1599
I20    1578
R41    1509
Name: count, dtype: int64
Secondary ICD Samples: secondary_icd
       38966
R53      631
R11      605
J80      397
T14      388
Z00      384
R42      313
F99      308
R41      294
G89      279
Name: count, dtype: int64


In [11]:
print("Age Valid Count:", grouped[grouped["age"] >= 18]["age_units"].value_counts(dropna=False))

Age Valid Count: age_units
Years     37886
Days         42
None         10
Months        8
Hours         5
Name: count, dtype: int64


In [12]:
print("Stroke Scale Type Sample:", grouped["stroke_scale_type"].value_counts(dropna=False))


Stroke Scale Type Sample: stroke_scale_type
True     22645
False    19923
None      3026
Name: count, dtype: int64


In [14]:
print("Stroke Scale Score (normalized):", grouped["stroke_scale_score"].value_counts(dropna=False))

Stroke Scale Score (normalized): stroke_scale_score
None                        28878
3329001 - Negative          15428
3329003 - Non-Conclusive      763
3329005 - Positive            525
Name: count, dtype: int64
