In [7]:
import pandas as pd
import re
import os
from sqlalchemy import create_engine
from IPython.display import display

# 📦 Project path setup
import sys
from pathlib import Path
sys.path.append(str(Path.home() / "work" / "EMS_QI_Projects"))

from project_paths import (
    BASE_DIR, FALLOUTS_DIR, DATA_RAW_DIR, DATA_CLEANED_DIR, REPORTS_DIR, print_project_paths
)

# Optional sanity check
print_project_paths()

# === DB Connection ===
engine = create_engine("postgresql://jtaft:GunnersMate2003!@100.118.151.104:5432/datalake")

# === Load Data from Cleaned Table ===
print("📥 Loading data from PostgreSQL (ahaems_cleaned)...")
df = pd.read_sql("SELECT * FROM ahaems_cleaned", con=engine)

# === Rename for internal use ===
df = df.rename(columns={
    "UniqueIncidentKey": "incident_id",
    "Patient Age (ePatient.15)": "age",
    "Patient Age Units (ePatient.16)": "age_units",
    "Primary Impression": "primary_impression",
    "Secondary Impression": "secondary_impression",
    "Transport Disposition": "transport_disposition",
    "Stroke Alert": "stroke_alert",
    "Situation Last Known Well Date Time (eSituation.18)": "lkw_time",
    "Situation Symptom Onset Date Time (eSituation.01)": "onset_time",
    "Vitals Signs Taken Date Time (eVitals.01)": "vitals_time",
    "Cardiac Arrest During EMS Event With Code (eArrest.01)": "cardiac_arrest",
    "Disposition Final Patient Acuity Code (eDisposition.19)": "final_acuity",
    "Response Type Of Service Requested With Code (eResponse.05)": "response_type"
})

# === Convert time columns ===
df["lkw_time"] = pd.to_datetime(df["lkw_time"], format="%m/%d/%Y %I:%M:%S %p", errors="coerce")
df["onset_time"] = pd.to_datetime(df["onset_time"], format="%m/%d/%Y %I:%M:%S %p", errors="coerce")
df["vitals_time"] = pd.to_datetime(df["vitals_time"], format="%m/%d/%Y %I:%M:%S %p", errors="coerce")

# === Extract ICDs ===
def extract_icd_prefix(text):
    if isinstance(text, str):
        match = re.search(r"\(([A-Z]\d{2}(?:\.\d+)?)\)", text.upper())
        if match:
            return match.group(1)[:3]
    return ""

df["primary_icd"] = df["primary_impression"].apply(extract_icd_prefix)
df["secondary_icd"] = df["secondary_impression"].apply(extract_icd_prefix)

# === Aggregate by incident ===
grouped = df.groupby("incident_id").agg({
    "age": "first",
    "age_units": "first",
    "primary_icd": "first",
    "secondary_icd": "first",
    "transport_disposition": "first",
    "response_type": "first",
    "vitals_time": "min",
    "lkw_time": "first",
    "onset_time": "first",
    "cardiac_arrest": "first",
    "final_acuity": "first",
    "stroke_alert": lambda x: any(str(val).strip().lower() == "true" for val in x)
}).reset_index()

# === Quarter Extraction ===
grouped["quarter"] = grouped["vitals_time"].dt.to_period("Q")

# === Logic filters ===
grouped["age"] = pd.to_numeric(grouped["age"], errors="coerce")
age_valid = grouped["age"] >= 18
stroke_icds = ["I60", "I61", "I62", "I63", "G45", "G46"]
impression_valid = grouped["primary_icd"].isin(stroke_icds) | grouped["secondary_icd"].isin(stroke_icds)
transport_valid = grouped["transport_disposition"].str.contains("transport by this ems unit", case=False, na=False)
response_valid = grouped["response_type"].str.contains("2205001", na=False)

# === LKW vs onset fallback ===
grouped["time_reference"] = grouped["lkw_time"]
grouped.loc[grouped["time_reference"].isna(), "time_reference"] = grouped["onset_time"]

lkw_exclude = (
    grouped["time_reference"].notna() & grouped["vitals_time"].notna() &
    ((grouped["vitals_time"] - grouped["time_reference"]).dt.total_seconds() >= 86400)
)

arrest_exclude = grouped["cardiac_arrest"].astype(str).isin(["3001003", "3001005"])
acuity_exclude = grouped["final_acuity"].astype(str) == "4219909"

grouped["in_denominator"] = (
    age_valid & impression_valid & transport_valid & response_valid &
    ~lkw_exclude & ~arrest_exclude & ~acuity_exclude
)

grouped["in_numerator"] = grouped["in_denominator"] & grouped["stroke_alert"]

# === Summary ===
summary = (
    grouped[grouped["in_denominator"]]
    .groupby("quarter")
    .agg(
        AHAEMS1_Denominator=("in_denominator", "sum"),
        AHAEMS1_Numerator=("in_numerator", "sum")
    )
    .reset_index()
)
summary["AHAEMS1_Percentage"] = (summary["AHAEMS1_Numerator"] / summary["AHAEMS1_Denominator"] * 100).round(2)

display(summary)

# === Export Fallout CSV ===
fallouts = grouped[grouped["in_denominator"] & ~grouped["in_numerator"]]
fallout_path = FALLOUTS_DIR / "ahaems1_fallouts.csv"

# Ensure directory exists before writing
os.makedirs(fallout_path.parent, exist_ok=True)

# Write fallout file
fallouts.to_csv(fallout_path, index=False)

ModuleNotFoundError: No module named 'project_paths'

In [5]:
import os
print("📂 Current working directory:", os.getcwd())

📂 Current working directory: /home/jovyan


In [1]:
from pathlib import Path

expected_path = Path.home() / "work" / "EMS_QI_Projects" / "project_paths.py"
print("🔍 Looking for:", expected_path)
print("📦 Exists?", expected_path.exists())

🔍 Looking for: /home/jovyan/work/EMS_QI_Projects/project_paths.py
📦 Exists? False
