In [1]:
import sys
import json
import pickle
import snowflake.connector
from snowflake.connector.pandas_tools import write_pandas, pd_writer
import getpass as gt
import pandas as pd
import numpy as np

from snowflake_conn import *

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


In [2]:
conn = get_connection()

## Figure out why only outpatient claims are populating in cost, even though there are inpatient/snf claims in the Tuva medical claims model

#### Start with raw Tuva medical claim
`dbt_packages/the_tuva_project/models/claims_preprocessing/service_category/staging/service_category__stg_medical_claim.sql`

In [3]:
sql='''
select
APR_DRG_CODE,
BILL_TYPE_CODE,
CLAIM_ID,
CLAIM_LINE_NUMBER,
CLAIM_TYPE,
HCPCS_CODE,
MS_DRG_CODE,
PLACE_OF_SERVICE_CODE,
REVENUE_CENTER_CODE
from SANDBOX_FFS._tuva_claims.medical_claim
limit 1000000
'''

raw_claim = read_sql(sql)
print(raw_claim.shape)
raw_claim.head(1)

(1000000, 9)


Unnamed: 0,apr_drg_code,bill_type_code,claim_id,claim_line_number,claim_type,hcpcs_code,ms_drg_code,place_of_service_code,revenue_center_code
0,,111,-10000931441070202160,5,institutional,93229,951,,1


In [5]:
raw_claim.ms_drg_code.unique()

array(['951', '965', '976', '019', '964', '102', '947', '563', '950',
       '559', '949', '791', '599', None, '152', '914', '202', '564',
       '190', '793', '887', '203', '281', '100', '683', '865', '605',
       '963', '057', '884', '922', '056', '307', '316', '314', '315',
       '923', '008', '682', '222', '583', '597', '157', '604', '093',
       '607', '562', '302', '566', '561', '560', '013', '913', '544',
       '543', '582', '374', '090', '204', '153', '282', '283', '684',
       '555', '556', '280', '376', '598', '284', '303', '565', '089',
       '542', '195', '975', '103', '101', '011', '223'], dtype=object)

#### Go through each type of service grouper
`dbt_packages/the_tuva_project/models/claims_preprocessing/service_category/intermediate/*.sql`
- *Note: the source of truth from these apparently comes from `SANDBOX_FFS._tuva_claims.medical_claim` -- I don't know how this differs from `SANDBOX_FFS.tuva_claims.medical_claim`

Data:
- acute inpatient professional -- (294, 3)
    - place_of_service_code = '21'
- dme professional -- (6303, 3)
    - hcpcs_code between 'E0100' and 'E8002'
- emergency department professional -- (282, 3)
    - place_of_service_code = '23'
- home health institutional -- (493, 2)
    - left(bill_type_code,2) in ('31','32','33')
- home health professional -- (4225, 3)
    - join on dme professional (claim number, claim line number)
    - medical claim place_of_service_code in ('12')
    - (dme claim_id is null and dme claim_line_number is null)
- hospice institutional -- (1086, 2)
    - left(bill_type_code,2) in ('81','82')
- hospice professional -- (2631, 3)
    - place_of_service_code in ('34')
- office visit professional -- (951163, 3)
    - place_of_service_code in ('11','02')
- outpatient hospital or clinic institutional -- (402653, 3)
    - join on emergency dept insitutional --> no data
    - join on urgent care institutional --> no data
    - left(a.bill_type_code,2) in ('13','71','73') --> populates
    - *I don't see what the point of joining on two null tables is, even if we did have did have data there*
- skilled nursing insitutitional -- (1632, 2)
    - left(bill_type_code,2) in ('21','22')
- skilled nursing professional -- (5174, 3)
    - join on dme professional (on claim id and claim line number)
    - place_of_service_code in ('31','32') -- not sure which one
    - (dme claim_id is null and dme claim_line_number is null)
- urgent care professional -- (175672, 3)
    - place_of_service_code in ('20')

No rows:
- acute impatient institutional
    - room and board requirement --> populates ('0100','0101','0110','0111','0112','0113','0114','0116','0117','0118','0119','0120','0121','0122','0123','0124','0126','0127','0128','0129','0130','0131','0132','0133','0134','0136','0137','0138','0139','0140','0141','0142','0143','0144','0146','0147','0148','0149','0150','0151','0152','0153','0154','0156','0157','0158','0159','0160','0164','0167','0169','0170','0171','0172','0173','0174','0179','0190','0191','0192','0193','0194','0199','0200','0201','0202','0203','0204','0206','0207','0208','0209','0210','0211','0212','0213','0214','0219','1000','1001','1002')
    - drg requirement --> does not populate (join medical claim diagnosis with `SANDBOX_FFS.terminology.ms_drg` and `SANDBOX_FFS.terminology.apr_drg`)
    - bill type requirement --> populates left(bill_type_code,2) in ('11','12') 
- ambulence professional
    - (hcpcs_code between 'A0425' and 'A0436' or place_of_service_code in ('41','42'))
- ambualatory surgery professional
    - join on dme professional (on claim id and claim line number) --> this populates
    - medical claim place_of_service_code in ('24') --> this doesn't populate
    - (dme prof claim_id is null and dme prof claim_line_number is null) --> this populates
- dialysis institutional
    - left(bill_type_code,2) in ('72')
- dialysis professional
    - place_of_service_code in ('65')
- emergency department institutional
    - revenue_center_code in ('0450','0451','0452','0459','0981')
    - left(bill_type_code,2) in ('13','71','73') 
    - *Note: 0456, urgent care, is included in most published definitions*
- inpatient psychiatric professional
    - place_of_service_code in ('51','55','56')
- inpatient rehab professional
    - place_of_service_code in ('61')
- lab institutional
    - left(bill_type_code,2) in ('14')
- lab professional
    - place_of_service_code = '81'
- outpatient hospital or clinic professional
    - place_of_service_code in ('15','17','19','22','49','50','60','71','72')
- outpatient psychiatric institutional
    - left(bill_type_code,2) in ('52')
- outpatient psychiatric professional
    - place_of_service_code in ('52','53','57','58')
- outpatient rehab professional
    - place_of_service_code in ('62')
- urgent care insitutional
    - revenue_center_code = '0456'
    - left(bill_type_code,2) in ('13','71','73')

Professional combined -- (1164504, 4)
- YES: 
    - acute inpatient professional -- 294
    - dme professional -- 6303
    - emergency dept professional -- 282
    - home health professional -- 4225
    - hospice professional -- 2631
    - outpatient hospital or clinic -- 18760
    - office visit professional -- 951163
    - skilled nursing professional -- 5174
    - urgent care professional -- 175672
- No:  
    - ambulence professional 
    - ambulatory surgery professional
    - dialysis professional
    - inpatient psychiatric professional
    - inpatient rehab professional
    - lab professional
    - outpatient hospital or clinic professional
    - outpatient psychiatric professional
    - outpatient rehab professional

Insitutional combined -- (405864, 3)
- YES:
    - home health institutional -- 493
    - hospice institutional -- 1086
    - outpatient hospital or clinic -- 402653
    - skilled nursing institutional -- 1632
- No:
    - acute impatient institutional
    - dialysis institutional
    - emergency department institutioanl
    - lab institutional
    - outpatient hospital or clinic institutional
    - outpatient psychiatric institutional
    - urgent care insitutional

In [6]:
loc = 'SANDBOX_FFS._tuva_claims.medical_claim'

In [7]:
# service category grouper
sql=f'''
select * from SANDBOX_FFS.claims_preprocessing.service_category_grouper
'''
scg = read_sql(sql)
print(scg.shape)
scg.head(1)

(1822814, 6)


Unnamed: 0,claim_id,claim_line_number,claim_type,service_category_1,service_category_2,tuva_last_run
0,-10000930438735201771,10,professional,Office Visit,Office Visit,2023-10-06 13:01:55.037514+00:00


In [8]:
# all professional data
sql=f'''
select * from SANDBOX_FFS.claims_preprocessing._int_combined_professional
'''
prof = read_sql(sql)
print(prof.shape)

(1158786, 4)


In [9]:
# all institutional data
sql=f'''
select * from SANDBOX_FFS.claims_preprocessing._int_combined_institutional
'''
inst = read_sql(sql)
print(inst.shape)
inst.groupby('service_category_2').size()

(405864, 2)


service_category_2
Home Health                         493
Hospice                            1086
Outpatient Hospital or Clinic    402653
Skilled Nursing                    1632
dtype: int64

In [10]:
# notice that this institutional data has more rows -- because it allows nulls, whereas the 
# istituional claim above does not
sql=f'''
select distinct 
    a.claim_id
    , a.claim_line_number
    , a.claim_type
    , case
        when service_category_2 = 'Acute Inpatient'               then 'Inpatient'
        when service_category_2 = 'Ambulatory Surgery'            then 'Outpatient'
        when service_category_2 = 'Dialysis'                      then 'Outpatient'
        when service_category_2 = 'Emergency Department'          then 'Outpatient'
        when service_category_2 = 'Home Health'                   then 'Outpatient'
        when service_category_2 = 'Hospice'                       then 'Outpatient'
        when service_category_2 = 'Inpatient Psychiatric'         then 'Inpatient'
        when service_category_2 = 'Inpatient Rehabilitation'      then 'Inpatient'
        when service_category_2 = 'Lab'                           then 'Ancillary'
        when service_category_2 = 'Office Visit'                  then 'Office Visit'
        when service_category_2 = 'Outpatient Hospital or Clinic' then 'Outpatient'
        when service_category_2 = 'Outpatient Psychiatric'        then 'Outpatient'
        when service_category_2 = 'Skilled Nursing'               then 'Inpatient'
        when service_category_2 = 'Urgent Care'                   then 'Outpatient'
        when service_category_2 is null                           then 'Other'
    end service_category_1
    , case
        when service_category_2 is null then 'Other'
        else service_category_2
    end service_category_2
    , '2023-10-05 19:23:07.304048+00:00' as tuva_last_run
from {loc} a
left join SANDBOX_FFS.claims_preprocessing._int_combined_institutional b
on a.claim_id = b.claim_id
where a.claim_type = 'institutional'
'''
tmp = read_sql(sql)
print(tmp.shape)
tmp.groupby('service_category_2').size()

(664028, 6)


service_category_2
Home Health                        6215
Hospice                           12107
Other                             58066
Outpatient Hospital or Clinic    575092
Skilled Nursing                   12548
dtype: int64

In [12]:
# the difference in the institutional claims is shown above (has to do with allowing nulls in this version)
scg.groupby('claim_type')['service_category_2'].value_counts()

claim_type     service_category_2           
institutional  Outpatient Hospital or Clinic    575092
               Other                             58066
               Skilled Nursing                   12548
               Hospice                           12107
               Home Health                        6215
professional   Office Visit                     948424
               Urgent Care                      175333
               Outpatient Hospital or Clinic     17085
               Durable Medical Equipment          6303
               Skilled Nursing                    5174
               Home Health                        4225
               Hospice                            2041
               Emergency Department                201
Name: count, dtype: int64

### Acute Inpatient
No claims meet this criteria (consistent with what we saw above)

In [13]:
# acute inpatient medical claim
sql=f'''
select 
  claim_id
, claim_line_number
, patient_id
, claim_type
, claim_start_date
, claim_end_date
, admission_date
, discharge_date
, facility_npi
, ms_drg_code
, apr_drg_code
, admit_source_code
, admit_type_code
, discharge_disposition_code
, paid_amount
, allowed_amount
, charge_amount
from SANDBOX_FFS._tuva_claims.medical_claim
where claim_type = 'institutional'
limit 100000
'''
mc = read_sql(sql)
print(mc.shape)
mc.head(1)

(100000, 17)


Unnamed: 0,claim_id,claim_line_number,patient_id,claim_type,claim_start_date,claim_end_date,admission_date,discharge_date,facility_npi,ms_drg_code,apr_drg_code,admit_source_code,admit_type_code,discharge_disposition_code,paid_amount,allowed_amount,charge_amount
0,-10000930413889201640,6,-10000010262984,institutional,,2016-11-10,,,1487662961,,,,,1,625.25,,625.25


In [14]:
# acute inpatient service category
sql = f'''
SELECT
  claim_id
, claim_type
, service_category_2
, '2023-10-05 16:59:34.487348+00:00' as tuva_last_run
from SANDBOX_FFS.claims_preprocessing.service_category_grouper
where claim_id in {tuple(mc.claim_id.unique().tolist())}
'''
sc = read_sql(sql)
print(sc.shape)
sc.head(1)

(5674, 4)


Unnamed: 0,claim_id,claim_type,service_category_2,tuva_last_run
0,-10000930581790201840,institutional,Outpatient Hospital or Clinic,2023-10-05 16:59:34.487348+00:00


In [15]:
# none of these are inpatient
sc.service_category_2.unique()

array(['Outpatient Hospital or Clinic'], dtype=object)

In [16]:
# check the terminology requirement
sql = f'''
select * from SANDBOX_FFS.terminology.ms_drg
'''
msdrg = read_sql(sql)
print(msdrg.shape)
msdrg.head(1)

(797, 6)


Unnamed: 0,ms_drg_code,mdc_code,medical_surgical,ms_drg_description,deprecated,deprecated_date
0,1,,Surgical,Heart transplant or implant of heart assist sy...,0,2023-04-01


In [17]:
# check the terminology requirement
sql = f'''
select * from SANDBOX_FFS.terminology.apr_drg
'''
aprdrg = read_sql(sql)
print(aprdrg.shape)
aprdrg.head(1)

(1272, 3)


Unnamed: 0,apr_drg_code,severity,apr_drg_description
0,1,1,Liver transplant &/or intestinal transplant


In [26]:
# room and board requirement
sql = f'''
with  __dbt__cte__service_category__stg_medical_claim as (
select
APR_DRG_CODE,
BILL_TYPE_CODE,
CLAIM_ID,
CLAIM_LINE_NUMBER,
CLAIM_TYPE,
HCPCS_CODE,
MS_DRG_CODE,
PLACE_OF_SERVICE_CODE,
REVENUE_CENTER_CODE,
'2023-10-05 16:59:34.487348+00:00' as tuva_last_run
from SANDBOX_FFS._tuva_claims.medical_claim
), 

room_and_board_requirement as (
select distinct 
  claim_id
from __dbt__cte__service_category__stg_medical_claim
where claim_type = 'institutional'
  and revenue_center_code in
  ('0100','0101',
   '0110','0111','0112','0113','0114','0116','0117','0118','0119',
   '0120','0121','0122','0123','0124','0126','0127','0128','0129',
   '0130','0131','0132','0133','0134','0136','0137','0138','0139',
   '0140','0141','0142','0143','0144','0146','0147','0148','0149',
   '0150','0151','0152','0153','0154','0156','0157','0158','0159',
   '0160','0164','0167','0169',
   '0170','0171','0172','0173','0174','0179',
   '0190','0191','0192','0193','0194','0199',
   '0200','0201','0202','0203','0204','0206','0207','0208','0209',
   '0210','0211','0212','0213','0214','0219',
   '1000','1001','1002')
  and claim_id like '-100009300380312020%'

)

select distinct 
  a.claim_id,
  APR_DRG_CODE,
  BILL_TYPE_CODE,
  CLAIM_TYPE,
  HCPCS_CODE,
  MS_DRG_CODE,
  PLACE_OF_SERVICE_CODE,
  REVENUE_CENTER_CODE,
  'Acute Inpatient' as service_category_2
from __dbt__cte__service_category__stg_medical_claim a
inner join room_and_board_requirement b
  on a.claim_id = b.claim_id
'''
room_board = read_sql(sql)
print(room_board.shape)
room_board.head(1)

(0, 9)


Unnamed: 0,claim_id,apr_drg_code,bill_type_code,claim_type,hcpcs_code,ms_drg_code,place_of_service_code,revenue_center_code,service_category_2


In [27]:
# bill
sql = f'''
with  __dbt__cte__service_category__stg_medical_claim as (
select
APR_DRG_CODE,
BILL_TYPE_CODE,
CLAIM_ID,
CLAIM_LINE_NUMBER,
CLAIM_TYPE,
HCPCS_CODE,
MS_DRG_CODE,
PLACE_OF_SERVICE_CODE,
REVENUE_CENTER_CODE,
'2023-10-05 16:59:34.487348+00:00' as tuva_last_run
from SANDBOX_FFS._tuva_claims.medical_claim
)

, bill_type_requirement as (
select distinct 
  claim_id
from __dbt__cte__service_category__stg_medical_claim
where claim_type = 'institutional'
  and left(bill_type_code,2) in ('11','12') 
  and claim_id like '-100009300380312020%'

)

select distinct 
  a.claim_id,
  APR_DRG_CODE,
  BILL_TYPE_CODE,
  CLAIM_TYPE,
  HCPCS_CODE,
  MS_DRG_CODE,
  PLACE_OF_SERVICE_CODE,
  REVENUE_CENTER_CODE,
  'Acute Inpatient' as service_category_2
from __dbt__cte__service_category__stg_medical_claim a
inner join bill_type_requirement b
  on a.claim_id = b.claim_id
'''
bill = read_sql(sql)
print(bill.shape)
bill.head(1)

(0, 9)


Unnamed: 0,claim_id,apr_drg_code,bill_type_code,claim_type,hcpcs_code,ms_drg_code,place_of_service_code,revenue_center_code,service_category_2


In [22]:
r = room_board.claim_id.unique().tolist()
b = bill.claim_id.unique().tolist()

set(r) & set(b)

set()