In [None]:

# Load duckdb, which lets us efficiently load large files
import duckdb

# Load pandas, which lets us manipulate dataframes
import pandas as pd

# Import jupysql Jupyter extension to create SQL cells
%load_ext sql

# Set configrations on jupysql to directly output data to Pandas and to simplify the output that is printed to the notebook.
%config SqlMagic.autopandas = True

%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

# Connect jupysql to DuckDB using a SQLAlchemy-style connection string. Either connect to an in memory DuckDB, or a file backed db.
%sql duckdb:///:memory:

In [None]:
!head '/Users/reethamgubba/Programming Projects/OneFact NonProfit/datathinking.org-codespace/notebooks/01-0804233_Select_Specialty_Hospital-Quad_Cities_standardcharges.csv'

CHARGE_CODE,CHARGE_DESCRIPTION,CHARGE,REVENUE_CODE,CPT_CODE,PHARMACY_NDC
3505006,ROOM & BOARD ICU,6668.56,200,,
3505009,ROOM & BOARD ICU,6668.56,202,,
3506000,ROOM & BOARD ICU,6668.56,201,,
3506001,ROOM & BOARD ICU,6668.56,203,,
3506002,ROOM & BOARD ICU,6668.56,204,,
3510007,ROOM & BOARD,2963.81,120,,
3515014,ROOM & BOARD,2963.81,110,,
3515017,ROOM & BOARD,2963.81,194,,
3515018,ROOM & BOARD,2963.81,213,,


In [None]:
%%sql
SELECT *
FROM read_csv('/Users/reethamgubba/Programming Projects/OneFact NonProfit/datathinking.org-codespace/notebooks/01-0804233_Select_Specialty_Hospital-Quad_Cities_standardcharges.csv',
  header=True,
  nullstr='n/a',
  columns={'CHARGE_CODE': 'VARCHAR',
           'CHARGE_DESCRIPTION': 'VARCHAR',
           'CHARGE': 'DOUBLE',
           'REVENUE_CODE': 'VARCHAR',
           'CPT_CODE': 'VARCHAR',
           'PHARMACY_NDC': 'VARCHAR'})
LIMIT 10;

Unnamed: 0,CHARGE_CODE,CHARGE_DESCRIPTION,CHARGE,REVENUE_CODE,CPT_CODE,PHARMACY_NDC
0,3505006,ROOM & BOARD ICU,6668.56,200,,
1,3505009,ROOM & BOARD ICU,6668.56,202,,
2,3506000,ROOM & BOARD ICU,6668.56,201,,
3,3506001,ROOM & BOARD ICU,6668.56,203,,
4,3506002,ROOM & BOARD ICU,6668.56,204,,
5,3510007,ROOM & BOARD,2963.81,120,,
6,3515014,ROOM & BOARD,2963.81,110,,
7,3515017,ROOM & BOARD,2963.81,194,,
8,3515018,ROOM & BOARD,2963.81,213,,
9,3516001,ROOM & BOARD,2963.81,191,,


#Parquet File Conversion

In [None]:
%%sql
COPY (SELECT *
FROM read_csv('/Users/reethamgubba/Programming Projects/OneFact NonProfit/datathinking.org-codespace/notebooks/01-0804233_Select_Specialty_Hospital-Quad_Cities_standardcharges.csv',
  header=True,
  nullstr='n/a',
  columns={'CHARGE_CODE': 'VARCHAR',
           'CHARGE_DESCRIPTION': 'VARCHAR',
           'CHARGE': 'DOUBLE',
           'REVENUE_CODE': 'VARCHAR',
           'CPT_CODE': 'VARCHAR',
           'PHARMACY_NDC': 'VARCHAR'})
) TO '/Users/reethamgubba/Programming Projects/OneFact NonProfit/datathinking.org-codespace/notebooks/01-0804233_Select_Specialty_Hospital-Quad_Cities_standardcharges.parquet' (COMPRESSION ZSTD);


Unnamed: 0,Success


In [None]:
import vegafusion as vf
import polars as pl
import altair as alt

alt.data_transformers.disable_max_rows()
alt.renderers.enable('html')

# Configure DuckDB connection
vf.runtime.set_connection("duckdb")

# Enable Mime Renderer
vf.enable(row_limit=100000000)

vegafusion.enable(mimetype='html', row_limit=100000000, embed_options=None)

In [None]:
charges = pl.read_parquet('/Users/reethamgubba/Programming Projects/OneFact NonProfit/datathinking.org-codespace/notebooks/01-0804233_Select_Specialty_Hospital-Quad_Cities_standardcharges.parquet')

In [None]:
print(charges.schema)

{'CHARGE_CODE': Utf8, 'CHARGE_DESCRIPTION': Utf8, 'CHARGE': Float64, 'REVENUE_CODE': Utf8, 'CPT_CODE': Utf8, 'PHARMACY_NDC': Utf8}


Altair Visualizations

In [None]:
# Create a bar chart (Revenue Code)
alt.Chart(charges).mark_bar().encode(
    x='REVENUE_CODE:Q',
    y='count()',
)

In [None]:
# Create a bar chart (Charge Code)
alt.Chart(charges).mark_bar().encode(
    x='CHARGE_CODE:Q',
    y='count()',
)

In [None]:
# Create a bar chart (Charge)
alt.Chart(charges).mark_bar().encode(
    x='CHARGE:Q',
    y='count()',
)

#Subset


In [None]:
%%sql
COPY (SELECT
      CHARGE_CODE,
      CHARGE_DESCRIPTION,
      CHARGE,
      REVENUE_CODE
      FROM '/Users/reethamgubba/Programming Projects/OneFact NonProfit/datathinking.org-codespace/notebooks/01-0804233_Select_Specialty_Hospital-Quad_Cities_standardcharges.parquet'
      -- LIMIT 1000 -- comment out if needed
) TO '/Users/reethamgubba/Programming Projects/OneFact NonProfit/datathinking.org-codespace/notebooks/01-0804233_Select_Specialty_Hospital-Quad_Cities_standardcharges_subset.parquet' (FORMAT 'parquet')

Unnamed: 0,Success


#Mosaic Prompt From Claude


meta:
  title: Hospital Charges
  description: An interactive dashboard of hospital charge codes, descriptions, and amounts.

data:
  charges:
    file: /Users/reethamgubba/Programming Projects/OneFact NonProfit/datathinking.org-codespace/notebooks/01-0804233_Select_Specialty_Hospital-Quad_Cities_standardcharges_subset.parquet

hconcat:

- vconcat:

  - hconcat:
    - input: menu
      label: Revenue Code
      as: $query
      from: charges
      column: REVENUE_CODE

    - input: search
      label: Description
      as: $query
      from: charges
      column: CHARGE_DESCRIPTION
      type: contains

  - plot:
    - mark: bar
      data: {from: charges, filterBy: $query}
      x: CHARGE_CODE
      y: CHARGE
      fill: REVENUE_CODE

    - select: interval
      as: $query
      brush: {fillOpacity: 0.2}

    xyDomain: Fixed

  - input: table
    from: charges
    maxRows: 10
    filterBy: $query
    columns:
    - CHARGE_CODE
    - CHARGE_DESCRIPTION
    - CHARGE
    - REVENUE_CODE