In [3]:
#@title ## Base imports
import os
import sys
import numpy as np
import scipy
import pandas as pd
import geopandas
import plotly.express as px

import sklearn
import sklearn.linear_model
import statsmodels.api as sm

## Load ENT county df (specifically wide-type df) from csv file
To understand what is meant by long type and wife type dataframes, see https://towardsdatascience.com/visualization-with-plotly-express-comprehensive-guide-eb5ee4b50b57

In [4]:
df_bill_orig = pd.read_csv("data/2022_05_05 sums and slopes ent with HCPCS descriptions.csv", 
                           dtype={
                               "HCPCS Code": str,
                               "Total Number of Services": np.int64,
                               **{f"Total Number of Services: {year}": np.int64 for year in range(2015,2019+1)}
                               })  # gets per healthcare code info

In [5]:
df_bill_orig.head(2)

Unnamed: 0,HCPCS Code,HCPCS Description,Total Number of Services,Total Medicare Payment Amount,Total Number of Services: 2019,Total Medicare Payment Amount: 2019,Total Number of Services: 2018,Total Medicare Payment Amount: 2018,Total Number of Services: 2017,Total Medicare Payment Amount: 2017,Total Number of Services: 2016,Total Medicare Payment Amount: 2016,Total Number of Services: 2015,Total Medicare Payment Amount: 2015,Total Medicare Payment Amount: Slope,Total Medicare Payment Amount: Pearson Coef,Total Number of Services: Slope,Total Number of Services: Pearson Coef
0,11042,Removal of skin and tissue first 20 sq cm or less,46073,4237372.2,6537,644733.1,11483,1317431.9,14608,1165644.85,8266,600263.11,5179,509299.24,2e-06,0.425707,0.000101,0.245111
1,11642,Removal of malignant growth (1.1 to 2.0 centim...,14749,1201135.31,2634,214496.56,2585,213254.24,2923,238474.17,3055,245483.24,3552,289427.1,-4.7e-05,-0.929111,-0.003798,-0.935792


In [12]:
df_bill_wide = df_bill_orig.set_index(["HCPCS Code", "HCPCS Description"])
# Rename the columns so they can be split  easier. The 20 is the first two digits of the year columns
df_bill_wide.columns = [col.replace(": ",": : ").replace(": 20","Annual: 20") for col in df_bill_wide.columns]
# Multiindex
df_bill_wide.columns = pd.MultiIndex.from_tuples([tuple(col.split(": ")) if ":" in col else (col,"","Sum") for col in df_bill_wide.columns], names=["Category","Stat","Year"])
df_bill_wide = df_bill_wide[sorted(df_bill_wide)]  # rearrange cols alphabetically
df_bill_wide = df_bill_wide.sort_values(by=("Total Number of Services","","Sum"), ascending=False)  # sort rows by volume 
categories = df_bill_wide.columns.levels[0]  #["Total Number of Services", "Total Medicare Payment Amount"]

The slope given in the csv file is actually the inverse slope. We need to either recalculate it or

In [63]:
def calc_slope(y, x, invert=False ):
    if invert: 
        # the inverse linear regression does not necessarily have 1/slope of the regular linear regression
        # the "2022_05_05 sums and slopes ent with HCPCS descriptions.csv" contains the inverse linear regression
        temp = x
        x = y 
        y = temp
    regress = scipy.stats.linregress(x, y=y)  # x=np.arange(2015,2019+1)
    return {"Slope": regress.slope, "Pearson Coef": regress.rvalue, "P": regress.pvalue}
    #return {"Slope2": regress.slope, "Pearson Coef2": regress.rvalue, "P": regress.pvalue}
    #return {"Slope": regress.slope, "Pearson Coef": regress.rvalue, "Intercept": regress.intercept, "P": regress.pvalue}

In [64]:
df_bill_wide_stats = df_bill_wide.copy()
for category in categories:
    new_df = df_bill_wide[(category,"Annual")].apply(calc_slope,axis=1, result_type="expand", args=(np.arange(2015,2019+1),) )
    df_bill_wide_stats[[(category,"",new_col) for new_col in new_df.columns ]]=new_df
    #df_bill_wide[(category,"","Slope")]=df_bill_wide[(category,"Annual")].apply(calc_slope,axis=1)
df_bill_wide_stats = df_bill_wide_stats[sorted(df_bill_wide_stats.columns)]  # rearrange cols alphabetically

In [65]:
df_bill_wide_stats.head(2)

Unnamed: 0_level_0,Category,Total Medicare Payment Amount,Total Medicare Payment Amount,Total Medicare Payment Amount,Total Medicare Payment Amount,Total Medicare Payment Amount,Total Medicare Payment Amount,Total Medicare Payment Amount,Total Medicare Payment Amount,Total Medicare Payment Amount,Total Number of Services,Total Number of Services,Total Number of Services,Total Number of Services,Total Number of Services,Total Number of Services,Total Number of Services,Total Number of Services,Total Number of Services
Unnamed: 0_level_1,Stat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Annual,Annual,Annual,Annual,Annual,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Annual,Annual,Annual,Annual,Annual
Unnamed: 0_level_2,Year,P,Pearson Coef,Slope,Sum,2015,2016,2017,2018,2019,P,Pearson Coef,Slope,Sum,2015,2016,2017,2018,2019
HCPCS Code,HCPCS Description,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3
31575,Diagnostic examination of voice box using flexible endoscope,0.135205,-0.760895,-222163.937,16598776.03,4095655.56,3367114.38,2963951.91,3007322.23,3164731.95,0.456102,-0.442025,-1342.5,307623,69604,58104,58140,59663,62112
31231,Diagnostic examination of nasal passages using an endoscope,0.093238,0.814438,86219.337,8122459.37,1561172.64,1473524.08,1495479.72,1726503.13,1865779.8,0.057107,0.866909,2394.0,179197,33396,32245,32767,38601,42188


In [54]:
1/-222163.937	

-4.50118058539807e-06

In [45]:
df_bill_wide

Unnamed: 0_level_0,Category,Total Medicare Payment Amount,Total Medicare Payment Amount,Total Medicare Payment Amount,Total Medicare Payment Amount,Total Medicare Payment Amount,Total Medicare Payment Amount,Total Medicare Payment Amount,Total Medicare Payment Amount,Total Number of Services,Total Number of Services,Total Number of Services,Total Number of Services,Total Number of Services,Total Number of Services,Total Number of Services,Total Number of Services
Unnamed: 0_level_1,Stat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Annual,Annual,Annual,Annual,Annual,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Annual,Annual,Annual,Annual,Annual
Unnamed: 0_level_2,Year,Pearson Coef,Slope,Sum,2015,2016,2017,2018,2019,Pearson Coef,Slope,Sum,2015,2016,2017,2018,2019
HCPCS Code,HCPCS Description,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3
31575,Diagnostic examination of voice box using flexible endoscope,-0.7608948,-2.61e-06,16598776.03,4095655.56,3367114.38,2963951.91,3007322.23,3164731.95,-0.442025,-0.000145539,307623,69604,58104,58140,59663,62112
31231,Diagnostic examination of nasal passages using an endoscope,0.8144384,7.69e-06,8122459.37,1561172.64,1473524.08,1495479.72,1726503.13,1865779.8,0.8669094,0.000313923,179197,33396,32245,32767,38601,42188
14060,"Tissue transfer repair of wound (10 sq centimeters or less) of eyelids, nose, ears, and/or lips",-0.6440928,-2.68e-06,35388675.22,7253164.21,7562395.26,6718042.68,7190368.43,6664704.64,-0.9312263,-0.002590151,66964,13950,14043,13157,13033,12781
30140,Removal of nasal air passage,-0.4595438,-1.38e-06,20173276.59,4090571.97,4163402.15,4804703.77,3414962.46,3699636.24,0.9902758,0.000887141,61413,9942,11063,12663,13489,14256
31579,Examination to assess movement of vocal cord flaps using an endoscope,0.2649743,2.26e-06,5998496.16,1329457.32,1080066.04,957141.66,1213382.4,1418448.74,0.7083424,0.000547581,60509,11995,9957,10470,13064,15023
30520,Reshaping of nasal cartilage,0.8823509,2.46e-06,20608078.41,3822391.75,3510467.18,3887689.27,4454847.45,4932682.76,0.9331389,0.002378443,50240,9409,9508,10270,10119,10934
11042,Removal of skin and tissue first 20 sq cm or less,0.4257069,1.83e-06,4237372.2,509299.24,600263.11,1165644.85,1317431.9,644733.1,0.2451112,0.000101263,46073,5179,8266,14608,11483,6537
31267,Removal of nasal sinus tissue using an endoscope,0.9239828,1.25e-06,21870283.2,2454520.71,4313264.94,4546272.86,5081500.91,5474723.78,0.9764155,0.001598838,44474,7561,8394,9166,9227,10126
14040,"Tissue transfer repair of wound (10 sq centimeters or less) of the forehead, cheeks, chin, mouth, neck, underarms, genitals, hands, and/or feet",-0.9218206,-5.24e-06,18862149.7,4175118.98,3907715.69,3725634.72,3471107.63,3582572.68,-0.9076939,-0.001957026,36699,8379,7635,7356,6475,6854
15260,"Relocation of patient skin to nose, ears, eyelids, and/or lips (20 sq centimeters or less)",-0.5065926,-2.57e-06,19492504.15,3816163.3,4358891.55,3827316.88,3987570.86,3502561.56,-0.8851952,-0.003129276,35022,7255,7518,7164,6646,6439
