In [None]:
#| output: false
%pip install sqlalchemy
# be sure to adjust this if the duckdb version in the dbt driver is differs
%pip install duckdb==0.5.1
%pip install duckdb-engine
%pip install psycopg2
%pip install python-dotenv

import matplotlib.pylab as plt
plt.rcParams['figure.dpi'] = 100

from IPython.display import display, Markdown, HTML
from datetime import datetime, timezone
import pandas as pd
from dotenv import load_dotenv
import os
from pathlib import Path 

In [None]:
#| tags: [parameters]

load_dotenv(override=True)
connection_string = os.environ.get('DB_CONNECTION_STRING')
connection_string

In [None]:
from sqlalchemy import create_engine
con = create_engine(connection_string)

In [None]:
metric_time = con.execute("SELECT MAX(run_started_at) FROM quality_metrics").fetchall()
as_of_time = metric_time[0][0]

if as_of_time:
    utc_dt = datetime.fromisoformat(as_of_time)
    utc_dt = utc_dt.replace(tzinfo=timezone.utc).astimezone(tz=None)
    display(Markdown("## Run at " + utc_dt.strftime("%b %d, %Y at %-I:%M:%S %p")))
else:
    display(Markdown("## No quality metrics found"))   

## c_pt_count

In [None]:
#| output: false
# check if table exists
c_pt_count_exists = False
try:
	con.execute(f"""SELECT * FROM c_pt_count LIMIT 1""")
	c_pt_count_exists = True
except:
    c_pt_count_exists = False

c_pt_count_exists

#### Patient Count by Birth Decade and Sex

In [None]:
sql = f"""
    SELECT 
       (FLOOR(birth_year/10)*10)::INTEGER AS birth_decade,
       sex,
       SUM(cnt) AS cnt
    FROM c_pt_count
    GROUP BY 1,2
    ORDER BY birth_decade DESC, sex DESC
"""

if c_pt_count_exists:
    patients = pd.read_sql(sql, con)
    pivot = patients.pivot(columns=["sex"], index="birth_decade", values="cnt")
    pivot.plot.bar(xlabel="birth decade", ylabel="count")
else:
    display(Markdown("### No data found"))   

#### Patient Count by Race and Ethnicity Codes

In [None]:
ethnicity_codes = {
    "2135-2":"Hisp or Lat",
    "2186-5":"Not Hisp or Lat",
    "nan": "Missing"
}

race_codes = {
    "1002-5": "Am Indian or Alaska Nat",
    "2028-9": "Asian",
    "2054-5": "Black or African Am",
    "2076-8": "Nat Hawaiian or Other Pac Isl",
    "2106-3": "White",
    "2131-1": "Other",
    "nan": "Missing"
}

sql = f"""
    SELECT 
       omb_race_code,
       CASE 
           WHEN omb_ethnicity_code = '2135-2'
               THEN '{ethnicity_codes["2135-2"]}'
            WHEN omb_ethnicity_code = '2186-5'
               THEN '{ethnicity_codes["2186-5"]}'
            WHEN omb_ethnicity_code IS NULL
                THEN '{ethnicity_codes["nan"]}'
            ELSE omb_ethnicity_code
        END AS omb_ethnicity_code,
       SUM(cnt) AS cnt
    FROM c_pt_count
    GROUP BY 1,2
"""

if c_pt_count_exists:
    patients = pd.read_sql(sql, con)
    pivot = patients.pivot(columns=["omb_race_code"], index="omb_ethnicity_code", values="cnt")
    plot = pivot.plot.bar(xlabel="", ylabel="count", rot=0)
    for text in plot.legend().get_texts():
        new_text = text.get_text()
        for k,v in race_codes.items():
            new_text = new_text.replace(k, v)
        text.set_text(new_text)
else:
    display(Markdown("### No data found"))   

## c_pt_deceased_count

In [None]:
#| output: false
# check if table exists
c_pt_count_exists = False
try:
    con.execute(f"""SELECT * FROM c_pt_deceased_count LIMIT 1""")
    c_pt_deceased_count_exists = True
except:
    c_pt_deceased_count_exists = False

c_pt_deceased_count_exists

### Deceased Patients by Death Decade

In [None]:
sql = f"""
SELECT
    (FLOOR(death_year/10)*10)::INTEGER AS death_decade,
    SUM(cnt) AS cnt
FROM c_pt_deceased_count
GROUP BY 1
ORDER BY 1
"""

if c_pt_deceased_count_exists:
    patients = pd.read_sql(sql, con)
    plot = patients.plot.bar(x="death_decade", y="cnt", xlabel="death decade", ylabel="count")
    plot.get_legend().remove()
else:
    display(Markdown("### No data found"))   

In [None]:
#| output: false
# close the database
con.dispose()