In [1]:
import os
import urllib.request
import pandas as pd

In [2]:
dataFol = os.getcwd() + "/dataFiles/"

In [3]:
# CMS url for Inpatient Prospective Payment System (IPPS) Provider Summary for All Diagnosis-Related Groups (DRG) - FY2016
# The Inpatient Utilization and Payment Public Use File (Inpatient PUF) provides information on inpatient discharges for 
# Medicare fee-for-service beneficiaries. The Inpatient PUF includes information on utilization, payment (total payment 
# and Medicare payment), and hospital-specific charges for the more than 3,000 U.S. hospitals that receive Medicare Inpatient 
# Prospective Payment System (IPPS) payments. The PUF is organized by hospital and Medicare Severity Diagnosis Related Group (MS-DRG) 
# and covers Fiscal Year (FY) 2016. MS-DRGs included in the PUF represent more than 7 million discharges or 75 percent of total 
# Medicare IPPS discharges.
ippsUrl = "https://data.cms.gov/api/views/fm2n-hjj6/rows.csv?accessType=DOWNLOAD"

In [4]:
### check that the file is not already there
if not os.path.isfile(dataFol + "cms/ippsProviderSummary.csv"):
    if os.path.isfile("ippsProviderSummary.csv"):
        # if in the current dir just move it
        os.system("mv " + "ippsProviderSummary.csv " + dataFol+"cms/")
        print ("Moving file!, Please check!")
        #otherwise start looking for the zip file
    else:
        if not os.path.isfile("ippsProviderSummary.csv"):
            urllib.request.urlretrieve(ippsUrl, "ippsProviderSummary.csv")
            #os.system("curl -O ")
            ###  To move it I use the os.system() functions to run bash commands with arguments
            os.system("mv " + "ippsProviderSummary.csv " + dataFol+"cms/")
            ### One final check:

if not os.path.isfile(dataFol+ "cms/ippsProviderSummary.csv"):
    print ("WARNING!!! something is wrong: the file is not there!")
else:
    print ("file in place, you can continue")

file in place, you can continue


In [6]:
ippsRaw = pd.read_csv(dataFol+"cms/ippsProviderSummary.csv")

In [11]:
ippsRaw.head()

Unnamed: 0,DRG Definition,Provider Id,Provider Name,Provider Street Address,Provider City,Provider State,Provider Zip Code,Hospital Referral Region (HRR) Description,Total Discharges,Average Covered Charges,Average Total Payments,Average Medicare Payments,drg,drgDesc
0,001 - HEART TRANSPLANT OR IMPLANT OF HEART ASS...,10033,UNIVERSITY OF ALABAMA HOSPITAL,619 SOUTH 19TH STREET,BIRMINGHAM,AL,35233,AL - Birmingham,13,1016806.46,296937.0,150139.69,1,HEART TRANSPLANT OR IMPLANT OF HEART ASSIST SY...
1,001 - HEART TRANSPLANT OR IMPLANT OF HEART ASS...,30103,MAYO CLINIC HOSPITAL,5777 EAST MAYO BOULEVARD,PHOENIX,AZ,85054,AZ - Phoenix,26,443387.54,215059.54,163889.31,1,HEART TRANSPLANT OR IMPLANT OF HEART ASSIST SY...
2,001 - HEART TRANSPLANT OR IMPLANT OF HEART ASS...,40114,BAPTIST HEALTH MEDICAL CENTER-LITTLE ROCK,"9601 INTERSTATE 630, EXIT 7",LITTLE ROCK,AR,72205,AR - Little Rock,33,711472.0,180315.55,145192.61,1,HEART TRANSPLANT OR IMPLANT OF HEART ASSIST SY...
3,001 - HEART TRANSPLANT OR IMPLANT OF HEART ASS...,50025,UC SAN DIEGO HEALTH HILLCREST - HILLCREST MED CTR,200 WEST ARBOR DRIVE,SAN DIEGO,CA,92103,CA - San Diego,17,796343.82,299244.41,270131.59,1,HEART TRANSPLANT OR IMPLANT OF HEART ASSIST SY...
4,001 - HEART TRANSPLANT OR IMPLANT OF HEART ASS...,50100,SHARP MEMORIAL HOSPITAL,7901 FROST ST,SAN DIEGO,CA,92123,CA - San Diego,13,1434651.46,239537.46,215205.0,1,HEART TRANSPLANT OR IMPLANT OF HEART ASSIST SY...


In [9]:
ippsRaw.columns

Index(['DRG Definition', 'Provider Id', 'Provider Name',
       'Provider Street Address', 'Provider City', 'Provider State',
       'Provider Zip Code', 'Hospital Referral Region (HRR) Description',
       'Total Discharges', 'Average Covered Charges', 'Average Total Payments',
       'Average Medicare Payments'],
      dtype='object')

In [12]:
ippsRaw.dtypes

DRG Definition                                 object
Provider Id                                     int64
Provider Name                                  object
Provider Street Address                        object
Provider City                                  object
Provider State                                 object
Provider Zip Code                               int64
Hospital Referral Region (HRR) Description     object
Total Discharges                                int64
Average Covered Charges                       float64
Average Total Payments                        float64
Average Medicare Payments                     float64
drg                                            object
drgDesc                                        object
dtype: object

In [10]:
# split the DRG definition column into code and description
ippsRaw['drg'], ippsRaw['drgDesc'] = ippsRaw['DRG Definition'].str.split(' - ', 1).str

In [17]:
ipps = ippsRaw[ippsRaw['Provider State'] == "NY"][['Provider Id', 'drg', 'drgDesc',
         'Total Discharges', 'Average Covered Charges', 
         'Average Total Payments', 'Average Medicare Payments']]

In [20]:
ipps.head()

Unnamed: 0,Provider Id,drg,drgDesc,Total Discharges,Average Covered Charges,Average Total Payments,Average Medicare Payments
44,330024,1,HEART TRANSPLANT OR IMPLANT OF HEART ASSIST SY...,23,505030.22,279821.43,236846.35
45,330059,1,HEART TRANSPLANT OR IMPLANT OF HEART ASSIST SY...,16,771758.13,283074.31,269391.38
46,330101,1,HEART TRANSPLANT OR IMPLANT OF HEART ASSIST SY...,44,1091512.32,334808.75,297501.18
47,330234,1,HEART TRANSPLANT OR IMPLANT OF HEART ASSIST SY...,14,1781830.21,358592.36,328005.64
48,330285,1,HEART TRANSPLANT OR IMPLANT OF HEART ASSIST SY...,23,560964.13,267191.74,196727.52


In [26]:
nycHosp = pd.read_csv(dataFol+"compiledMasterchargesNycHospitals.csv")

In [27]:
nycHosp.head()

Unnamed: 0.1,Unnamed: 0,drgDesc,avgCharges,providerId,drg,msDrg,numDischarges,Charge No,Charge Description,Standard Charges,...,Cases,SITE,DEPT,SITE CHRG,SITE D DESCRIPTION,HCPCS CD,SITE RATE,REV,OMCR,COUNT
0,0,ISOLATION ROOM,1244.0,330204.0,,,,,,,...,,,,,,,,,,
1,1,MS 1ST HR OBS ISOLATION,713.0,330204.0,,,,,,,...,,,,,,,,,,
2,2,MS 1ST HR OBS TELEMETRY,809.0,330204.0,,,,,,,...,,,,,,,,,,
3,3,MS ADD HR OBS ISOLATION,21.0,330204.0,,,,,,,...,,,,,,,,,,
4,4,MS ADD OBS TELEMETRY,24.0,330204.0,,,,,,,...,,,,,,,,,,
