In [None]:
#| output: false

from IPython.display import display, Markdown, HTML
from datetime import datetime, timezone
import pandas as pd
from dotenv import load_dotenv
import os
from pathlib import Path

In [None]:
#| tags: [parameters]
#| output: false

load_dotenv(override=True)
default_connection_string = "duckdb:////" + os.path.abspath("../fhir.duckdb")
connection_string = os.environ.get('DB_CONNECTION_STRING', default_connection_string)
connection_string

In [None]:
from sqlalchemy import create_engine
from sqlalchemy.pool import NullPool
con = create_engine(connection_string, poolclass=NullPool)

## c_resource_count

In [None]:
#| output: false
# check if table exists
c_resource_count_exists = False
try:
	con.execute(f"""SELECT * FROM c_resource_count LIMIT 1""")
	c_resource_count_exists = True
except:
    c_resource_count_exists = False

c_resource_count_exists

### Resource Count by Category

In [None]:
sql = f"""
    SELECT 
		resource_type AS "resource type",
		SUM(cnt) AS count
    FROM c_resource_count
    GROUP BY 1
    ORDER BY resource_type
"""

if c_resource_count_exists:
    resources = pd.read_sql(sql, con)
    table = resources.to_html(formatters={
        'count': '{:,}'.format, 
    }, index=False)

    display(HTML(table))
else:
    display(Markdown("### No data found")) 
  

### Resource Date Ranges

In [None]:
sql = f"""
    SELECT 
		resource_type,
        MIN(primary_year)::INTEGER AS "earliest year",
        MAX(primary_year)::INTEGER AS "latest year"
    FROM c_resource_count
    WHERE primary_year IS NOT NULL
    GROUP BY 1
    ORDER BY resource_type
"""

if c_resource_count_exists:
    resources = pd.read_sql(sql, con)
    table = resources.to_html(formatters={
        'latest': '{:}'.format, 
        'earliest': '{:}'.format, 
    }, index=False)

    display(HTML(table))
else:
    display(Markdown("### No data found"))  

### Resource Count by Decade

In [None]:
import matplotlib.ticker as tick

sql = f"""
SELECT
    (FLOOR(primary_year/10)*10)::TEXT AS decade,
    SUM(cnt) AS cnt
FROM c_resource_count
GROUP BY 1
ORDER BY 1
"""

if c_resource_count_exists:
    patients = pd.read_sql(sql, con)
    plot = patients.plot.bar(x="decade", y="cnt", xlabel="decade", ylabel="count")
    plot.get_yaxis().set_major_formatter(tick.FuncFormatter(lambda x, p: format(int(x), ',')))
    plot.get_legend().remove()
else:
    display(Markdown("### No data found"))  

### Observation Count by Category

In [None]:
def resource_to_table(resource_type):
    sql = f"""
        SELECT 
            resource_type,
            category,
            SUM(cnt) AS count
        FROM c_resource_count
        WHERE resource_type = '{resource_type}'
        GROUP BY 1,2
        ORDER BY resource_type
    """
    detail = pd.read_sql(sql, con)
    return detail.to_html(formatters={
        'count': '{:,.0f}'.format, 
    }, index=False)

sql = f"""
    SELECT resource_type
    FROM c_resource_count
    WHERE category IS NOT NULL
    GROUP BY 1
    ORDER BY resource_type
"""
if c_resource_count_exists:
    resources = pd.read_sql(sql, con)
    for resource_type in resources["resource_type"]:
        display(Markdown("### " + resource_type + " by Category"))
        display(HTML(resource_to_table(resource_type)))
else:
    display(Markdown("### No data found"))  

## c_pt_count

In [None]:
#| output: false
# check if table exists
c_pt_count_exists = False
try:
	con.execute(f"""SELECT * FROM c_pt_count LIMIT 1""")
	c_pt_count_exists = True
except:
    c_pt_count_exists = False

c_pt_count_exists

#### Patient Count by Birth Decade and Sex

In [None]:
sql = f"""
    SELECT 
       (FLOOR(birth_year/10)*10)::INTEGER AS birth_decade,
       sex,
       SUM(cnt) AS cnt
    FROM c_pt_count
    GROUP BY 1,2
    ORDER BY birth_decade DESC, sex DESC
"""

if c_pt_count_exists:
    patients = pd.read_sql(sql, con)
    pivot = patients.pivot(columns=["sex"], index="birth_decade", values="cnt")
    pivot.plot.bar(xlabel="birth decade", ylabel="count")
else:
    display(Markdown("### No data found"))   

#### Patient Count by Race and Ethnicity Codes

In [None]:
ethnicity_codes = {
    "2135-2":"Hisp or Lat",
    "2186-5":"Not Hisp or Lat",
    "nan": "Missing"
}

race_codes = {
    "1002-5": "Am Indian or Alaska Nat",
    "2028-9": "Asian",
    "2054-5": "Black or African Am",
    "2076-8": "Nat Hawaiian or Other Pac Isl",
    "2106-3": "White",
    "2131-1": "Other",
    "nan": "Missing"
}

sql = f"""
    SELECT 
       omb_race_code,
       CASE 
           WHEN omb_ethnicity_code = '2135-2'
               THEN '{ethnicity_codes["2135-2"]}'
            WHEN omb_ethnicity_code = '2186-5'
               THEN '{ethnicity_codes["2186-5"]}'
            WHEN omb_ethnicity_code IS NULL
                THEN '{ethnicity_codes["nan"]}'
            ELSE omb_ethnicity_code
        END AS omb_ethnicity_code,
       SUM(cnt) AS cnt
    FROM c_pt_count
    GROUP BY 1,2
"""

if c_pt_count_exists:
    patients = pd.read_sql(sql, con)
    pivot = patients.pivot(columns=["omb_race_code"], index="omb_ethnicity_code", values="cnt")
    plot = pivot.plot.bar(xlabel="", ylabel="count", rot=0)
    for text in plot.legend().get_texts():
        new_text = text.get_text()
        for k,v in race_codes.items():
            new_text = new_text.replace(k, v)
        text.set_text(new_text)
else:
    display(Markdown("### No data found"))   

## c_pt_deceased_count

In [None]:
#| output: false
# check if table exists
c_pt_count_exists = False
try:
    con.execute(f"""SELECT * FROM c_pt_deceased_count LIMIT 1""")
    c_pt_deceased_count_exists = True
except:
    c_pt_deceased_count_exists = False

c_pt_deceased_count_exists

### Deceased Patients by Death Decade

In [None]:
sql = f"""
SELECT
    (FLOOR(death_year/10)*10)::INTEGER AS death_decade,
    SUM(cnt) AS cnt
FROM c_pt_deceased_count
GROUP BY 1
ORDER BY 1
"""

if c_pt_deceased_count_exists:
    patients = pd.read_sql(sql, con)
    plot = patients.plot.bar(x="death_decade", y="cnt", xlabel="death decade", ylabel="count")
    plot.get_legend().remove()
else:
    display(Markdown("### No data found"))   

In [None]:
#| output: false
# close the database
con.dispose()