In [1]:
#diabetes df
import pandas as pd
import numpy as np
import pandas_profiling

directory = 'C:/githubrepo/7331_Project/data/'
df_zg = pd.read_csv(directory + 'diabetic_data.csv')
df_zg.replace('?', np.nan, inplace = True)

In [2]:
df_zg.shape

(101766, 50)

In [3]:
# Data Distribution
df_zg.describe()

Unnamed: 0,encounter_id,patient_nbr,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,number_diagnoses
count,101766.0,101766.0,101766.0,101766.0,101766.0,101766.0,101766.0,101766.0,101766.0,101766.0,101766.0,101766.0,101766.0
mean,165201600.0,54330400.0,2.675904,4.388362,4.372443,4.589696,34.047383,1.456577,16.560492,0.369357,0.212232,0.635566,7.422607
std,102640300.0,38696360.0,1.530799,6.013795,4.687549,3.160314,22.386944,1.792201,9.10598,1.267265,0.917842,1.262863,1.9336
min,12522.0,135.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
25%,84961190.0,23413220.0,1.0,1.0,1.0,2.0,16.0,0.0,10.0,0.0,0.0,0.0,6.0
50%,152389000.0,45505140.0,3.0,1.0,2.0,4.0,30.0,1.0,15.0,0.0,0.0,0.0,8.0
75%,230270900.0,87545950.0,4.0,6.0,7.0,6.0,49.0,2.0,20.0,0.0,0.0,1.0,9.0
max,443867200.0,189502600.0,8.0,28.0,25.0,14.0,129.0,6.0,81.0,42.0,76.0,21.0,16.0


In [4]:
missing = pd.DataFrame({'column':df_zg.columns ,'na_percent':df_zg.isnull().sum()/len(df_zg)*100, 'na_count':df_zg.isnull().sum()})
missing[missing.na_percent > 0]

Unnamed: 0,column,na_percent,na_count
race,race,2.233555,2273
weight,weight,96.858479,98569
payer_code,payer_code,39.557416,40256
medical_specialty,medical_specialty,49.082208,49949
diag_1,diag_1,0.020636,21
diag_2,diag_2,0.351787,358
diag_3,diag_3,1.398306,1423


### Fixing race, weight, payer_code, medical_speciality, examide, and citoglipton

In [5]:
#Remove Weight and payer code because they have too many NA's
#Remove examide and citoglipton because they are categorical with only 1 category
df_zg.drop(['weight', 'payer_code', 'examide', 'citoglipton'], axis=1, inplace = True)

#Fix NA's in medical speciality
df_zg.loc[df_zg['medical_specialty'].isnull(), 'medical_specialty'] = "Unspecified"
#Fix race NA's
df_zg.loc[df_zg['race'].isnull(), 'race'] = "Unspecified"

### Checking the number of NA's again

In [6]:
missing = pd.DataFrame({'column':df_zg.columns ,'na_percent':df_zg.isnull().sum()/len(df_zg)*100, 'na_count':df_zg.isnull().sum()})
missing[missing.na_percent > 0]

Unnamed: 0,column,na_percent,na_count
diag_1,diag_1,0.020636,21
diag_2,diag_2,0.351787,358
diag_3,diag_3,1.398306,1423


##### Prepping to fix diag_1,2,3. Because the codes starting with E and V's relate to something specific just like the others that are numeric, we are changing them to be numeric so that we can replace them with meaningful categories

In [7]:
#The 3 columns to fix
d_nums = ['diag_1', 'diag_2', 'diag_3']

#Removing the Letter at the start with a number
df_zg[d_nums] = df_zg[d_nums].replace({'E':'-1'}, regex = True)
df_zg[d_nums] = df_zg[d_nums].replace({'V':'-2'}, regex = True)

#Changing it to be numeric
df_zg[d_nums] = df_zg[d_nums].astype(np.float)

##### Fixing diag_1

In [8]:
#Supress all warnings
pd.options.mode.chained_assignment = None

#Function to fix the three diag columns
def fixdiags(name, newname):
    #Website with the codes
    #http://www.icd9data.com/2012/Volume1/780-799/780-789/default.htm
    #First creating an empty column so that we don't mess with the original
    df_zg[newname] = np.nan #empty column

    #Making the categories based on the numerical value
    df_zg[newname][(df_zg[name] >= -2000) & (df_zg[name] < 1)] = "Other"
    df_zg[newname][(df_zg[name] >= 1) & (df_zg[name] < 250)] = "Neoplasms"
    df_zg[newname][(df_zg[name] >= 250) & (df_zg[name] < 251)] = "Diabeties"
    df_zg[newname][(df_zg[name] >= 251) & (df_zg[name] < 280)] = "Neoplasms"
    df_zg[newname][(df_zg[name] >= 280) & (df_zg[name] <= 389)] = "Other"
    df_zg[newname][((df_zg[name] >= 390) & (df_zg[name] <= 459)) | (df_zg[name] == 785)] = "Circulatory"
    df_zg[newname][((df_zg[name] >= 460) & (df_zg[name] <= 519)) | (df_zg[name] == 786)] = "Respiratory"
    df_zg[newname][((df_zg[name] >= 520) & (df_zg[name] <= 579)) | (df_zg[name] == 787)] = "Digestive"
    df_zg[newname][((df_zg[name] >= 580) & (df_zg[name] <= 629)) | (df_zg[name] == 788)] = "Genitourinary"
    df_zg[newname][(df_zg[name] >= 630) & (df_zg[name] <= 679)] = "Other"
    df_zg[newname][((df_zg[name] >= 680) & (df_zg[name] <= 709)) | (df_zg[name] == 782)] = "Neoplasms"
    df_zg[newname][(df_zg[name] >= 710) & (df_zg[name] <= 739)] = "Musculoskeletal"
    df_zg[newname][(df_zg[name] >= 740) & (df_zg[name] <= 759)] = "Other"
    df_zg[newname][(df_zg[name] == 783)] = "Digestive"
    df_zg[newname][(df_zg[name] == 789)] = "Other"
    df_zg[newname][((df_zg[name] >= 790) & (df_zg[name] <= 799)) | (df_zg[name] == 780) | (df_zg[name] == 781) | (df_zg[name] == 784)] = "Neoplasms"
    df_zg[newname][(df_zg[name] >= 800) & (df_zg[name] <= 999)] = "Injury"

    #Sets the value of the null diagnosis to None
    df_zg[newname][df_zg[name].isnull()] = "None"
    return df_zg

#Actually running the code to fix diag_1, 2, and 3
df_zg = fixdiags('diag_1', 'diag_1_val')
df_zg = fixdiags('diag_2', 'diag_2_val')
df_zg = fixdiags('diag_3', 'diag_3_val')

#Drop the three columns that are now unneeded
d_nums = ['diag_1', 'diag_2', 'diag_3']
df_zg.drop(d_nums, axis=1, inplace = True)

### Checking NA's again

In [9]:
missing = pd.DataFrame({'column':df_zg.columns ,'na_percent':df_zg.isnull().sum()/len(df_zg)*100, 'na_count':df_zg.isnull().sum()})
missing[missing.na_percent > 0]

Unnamed: 0,column,na_percent,na_count


### Adding the Readmitted Yes/No (1/0) column

In [10]:
#Supress all warnings
pd.options.mode.chained_assignment = None

#Make a new column for this value
df_zg['readmitted_tf'] = np.nan

#Set the values
df_zg.loc[df_zg['readmitted'] == "NO", 'readmitted_tf'] = "0"
df_zg.loc[df_zg['readmitted'] == "<30", 'readmitted_tf'] = "1"
df_zg.loc[df_zg['readmitted'] == ">30", 'readmitted_tf'] = "1"

#Set the type that it needs to be (integer)
df_zg["readmitted_tf"]=df_zg["readmitted_tf"].astype('int64')

### Gender has only 3 set to unassigned, so we need to impute it. Most common is Female

In [11]:
df_zg['gender'] = df_zg['gender'].replace(to_replace = 'Unknown/Invalid', value = 'Female')

### Making the values 1/0 so that we can easily get a count of how many meds each row is taking

In [12]:
df_zg['metformin-rosiglitazone_tf'] = 1
df_zg['metformin-rosiglitazone_tf'][df_zg["metformin-rosiglitazone"] == "No"] = 0
df_zg['metformin-pioglitazone_tf'] = 1
df_zg['metformin-pioglitazone_tf'][df_zg["metformin-pioglitazone"] == "No"] = 0
df_zg['glimepiride-pioglitazone_tf'] = 1
df_zg['glimepiride-pioglitazone_tf'][df_zg["glimepiride-pioglitazone"] == "No"] = 0
df_zg['glipizide-metformin_tf'] = 1
df_zg['glipizide-metformin_tf'][df_zg["glipizide-metformin"] == "No"] = 0
df_zg['glyburide-metformin_tf'] = 1
df_zg['glyburide-metformin_tf'][df_zg["glyburide-metformin"] == "No"] = 0
df_zg['insulin_tf'] = 1
df_zg['insulin_tf'][df_zg["insulin"] == "No"] = 0
df_zg['tolazamide_tf'] = 1
df_zg['tolazamide_tf'][df_zg["tolazamide"] == "No"] = 0
df_zg['troglitazone_tf'] = 1
df_zg['troglitazone_tf'][df_zg["troglitazone"] == "No"] = 0
df_zg['miglitol_tf'] = 1
df_zg['miglitol_tf'][df_zg["miglitol"] == "No"] = 0
df_zg['acarbose_tf'] = 1
df_zg['acarbose_tf'][df_zg["acarbose"] == "No"] = 0
df_zg['rosiglitazone_tf'] = 1
df_zg['rosiglitazone_tf'][df_zg["rosiglitazone"] == "No"] = 0
df_zg['pioglitazone_tf'] = 1
df_zg['pioglitazone_tf'][df_zg["pioglitazone"] == "No"] = 0
df_zg['metformin_tf'] = 1
df_zg['metformin_tf'][df_zg["metformin"] == "No"] = 0
df_zg['repaglinide_tf'] = 1
df_zg['repaglinide_tf'][df_zg["repaglinide"] == "No"] = 0
df_zg['nateglinide_tf'] = 1
df_zg['nateglinide_tf'][df_zg["nateglinide"] == "No"] = 0
df_zg['chlorpropamide_tf'] = 1
df_zg['chlorpropamide_tf'][df_zg["chlorpropamide"] == "No"] = 0
df_zg['glimepiride_tf'] = 1
df_zg['glimepiride_tf'][df_zg["glimepiride"] == "No"] = 0
df_zg['acetohexamide_tf'] = 1
df_zg['acetohexamide_tf'][df_zg["acetohexamide"] == "No"] = 0
df_zg['glipizide_tf'] = 1
df_zg['glipizide_tf'][df_zg["glipizide"] == "No"] = 0
df_zg['glyburide_tf'] = 1
df_zg['glyburide_tf'][df_zg["glyburide"] == "No"] = 0
df_zg['tolbutamide_tf'] = 1
df_zg['tolbutamide_tf'][df_zg["tolbutamide"] == "No"] = 0

### Make a new column containing the number of meds a person is taking then delete the 1/0 meds columns

In [13]:
medCount = ['metformin_tf','repaglinide_tf','nateglinide_tf','chlorpropamide_tf',
                  'glimepiride_tf','acetohexamide_tf','glipizide_tf','glyburide_tf',
                  'tolbutamide_tf','pioglitazone_tf','rosiglitazone_tf','acarbose_tf',
                  'miglitol_tf','troglitazone_tf', 'tolazamide_tf','insulin_tf',
                  'glyburide-metformin_tf','glipizide-metformin_tf',
                  'glimepiride-pioglitazone','metformin-rosiglitazone_tf',
                  'metformin-pioglitazone_tf',"glimepiride-pioglitazone_tf",
                  'metformin-rosiglitazone_tf','metformin-pioglitazone_tf']
df_zg["medication_count"] = np.nan
df_zg["medication_count"] = df_zg[medCount].sum(axis=1)
df_zg.drop(medCount, axis=1, inplace = True)

### Setting the object types to the correct ones

In [14]:
df_zg['encounter_id'] = df_zg['encounter_id'].astype(object)
df_zg['patient_nbr'] = df_zg['patient_nbr'].astype(object)
df_zg['admission_type_id'] = df_zg['admission_type_id'].astype(object)
df_zg['discharge_disposition_id'] = df_zg['discharge_disposition_id'].astype(object)
df_zg['admission_source_id'] = df_zg['admission_source_id'].astype(object)
df_zg['race'] = df_zg['race'].astype(object)
df_zg["A1Cresult"] = df_zg["A1Cresult"].astype(object)
df_zg["age"] = df_zg["age"].astype(object)
df_zg['max_glu_serum'] = df_zg['max_glu_serum'].astype(object)
df_zg['gender'] = df_zg['gender'].astype(object)
df_zg['diabetesMed'] = df_zg['diabetesMed'].astype(object)
df_zg['change'] = df_zg['change'].astype(object)

### Makes a new column that indicates what the admission_source_id means

In [15]:
df_zg['admission_source'] = np.nan
df_zg.loc[df_zg['admission_source_id'] == 1, "admission_source"] = "Physician Referral"
df_zg.loc[df_zg['admission_source_id'] == 2, "admission_source"] = "Clinic Referral"
df_zg.loc[df_zg['admission_source_id'] == 3, "admission_source"] = "HMO Referral"
df_zg.loc[df_zg['admission_source_id'] == 4, "admission_source"] = "Transfer from a hospital"
df_zg.loc[df_zg['admission_source_id'] == 5, 'admission_source'] = "Transfer from a Skilled Nursing Facility (SNF)"
df_zg.loc[df_zg['admission_source_id'] == 6, 'admission_source'] = "Transfer from another health care facility"
df_zg.loc[df_zg['admission_source_id'] == 7, 'admission_source'] = "Emergency Room"
df_zg.loc[df_zg['admission_source_id'] == 8, 'admission_source'] = "Court/Law Enforcement"
df_zg.loc[df_zg['admission_source_id'] == 9, 'admission_source'] = "Not Available"
df_zg.loc[df_zg['admission_source_id'] == 10, 'admission_source'] = "Transfer from critical access hospital"
df_zg.loc[df_zg['admission_source_id'] == 11, 'admission_source'] = "Normal Delivery"
df_zg.loc[df_zg['admission_source_id'] == 12, 'admission_source'] = "Premature Delivery"
df_zg.loc[df_zg['admission_source_id'] == 13, 'admission_source'] = "Sick Baby"
df_zg.loc[df_zg['admission_source_id'] == 14, 'admission_source'] = "Extramural Birth"
df_zg.loc[df_zg['admission_source_id'] == 15, 'admission_source'] = "Not Available"
df_zg.loc[df_zg['admission_source_id'] == 17, 'admission_source'] = "NULL"
df_zg.loc[df_zg['admission_source_id'] == 18, 'admission_source'] = "Transfer From Another Home Health Agency"
df_zg.loc[df_zg['admission_source_id'] == 19, 'admission_source'] = "Readmission to Same Home Health Agency"
df_zg.loc[df_zg['admission_source_id'] == 20, 'admission_source'] = "Not Mapped"
df_zg.loc[df_zg['admission_source_id'] == 21, 'admission_source'] = "Unknown/Invalid"
df_zg.loc[df_zg['admission_source_id'] == 22, 'admission_source'] = "Transfer from hospital inpt/same fac reslt in a sep claim"
df_zg.loc[df_zg['admission_source_id'] == 23, 'admission_source'] = "Born inside this hospital"
df_zg.loc[df_zg['admission_source_id'] == 24, 'admission_source'] = "Born outside this hospital"
df_zg.loc[df_zg['admission_source_id'] == 25, 'admission_source'] = "Transfer from Ambulatory Surgery Center"
df_zg.loc[df_zg['admission_source_id'] == 26, 'admission_source'] = "Transfer from Hospice"

### Makes a new column that indicates what the discharge_disposition_id means

In [16]:
df_zg['discharge_disposition'] = np.nan
df_zg.loc[df_zg['discharge_disposition_id'] == 1, 'discharge_disposition'] = "Discharged to home"
df_zg.loc[df_zg['discharge_disposition_id'] == 2, 'discharge_disposition'] = "Discharged/transferred to another short term hospital"
df_zg.loc[df_zg['discharge_disposition_id'] == 3, 'discharge_disposition'] = "Discharged/transferred to SNF"
df_zg.loc[df_zg['discharge_disposition_id'] == 4, 'discharge_disposition'] = "Discharged/transferred to ICF"
df_zg.loc[df_zg['discharge_disposition_id'] == 5, 'discharge_disposition'] = "Discharged/transferred to another type of inpatient care institution"
df_zg.loc[df_zg['discharge_disposition_id'] == 6, 'discharge_disposition'] = "Discharged/transferred to home with home health service"
df_zg.loc[df_zg['discharge_disposition_id'] == 7, 'discharge_disposition'] = "Left AMA"
df_zg.loc[df_zg['discharge_disposition_id'] == 8, 'discharge_disposition'] = "Discharged/transferred to home under care of Home IV provider"
df_zg.loc[df_zg['discharge_disposition_id'] == 9, 'discharge_disposition'] = "Admitted as an inpatient to this hospital"
df_zg.loc[df_zg['discharge_disposition_id'] == 10, 'discharge_disposition'] = "Neonate discharged to another hospital for neonatal aftercare"
df_zg.loc[df_zg['discharge_disposition_id'] == 11, 'discharge_disposition'] = "Expired"
df_zg.loc[df_zg['discharge_disposition_id'] == 12, 'discharge_disposition'] = "Still patient or expected to return for outpatient services"
df_zg.loc[df_zg['discharge_disposition_id'] == 13, 'discharge_disposition'] = "Hospice / home"
df_zg.loc[df_zg['discharge_disposition_id'] == 14, 'discharge_disposition'] = "Hospice / medical facility"
df_zg.loc[df_zg['discharge_disposition_id'] == 15, 'discharge_disposition'] = "Discharged/transferred within this institution to Medicare approved swing bed"
df_zg.loc[df_zg['discharge_disposition_id'] == 16, 'discharge_disposition'] = "Discharged/transferred/referred another institution for outpatient services"
df_zg.loc[df_zg['discharge_disposition_id'] == 17, 'discharge_disposition'] = "Discharged/transferred/referred to this institution for outpatient services"
df_zg.loc[df_zg['discharge_disposition_id'] == 18, 'discharge_disposition'] = "NULL"
df_zg.loc[df_zg['discharge_disposition_id'] == 19, 'discharge_disposition'] = "Expired at home. Medicaid only, hospice."
df_zg.loc[df_zg['discharge_disposition_id'] == 20, 'discharge_disposition'] = "Expired in a medical facility. Medicaid only, hospice."
df_zg.loc[df_zg['discharge_disposition_id'] == 21, 'discharge_disposition'] = "Expired, place unknown. Medicaid only, hospice."
df_zg.loc[df_zg['discharge_disposition_id'] == 22, 'discharge_disposition'] = "Discharged/transferred to another rehab fac including rehab units of a hospital."
df_zg.loc[df_zg['discharge_disposition_id'] == 23, 'discharge_disposition'] = "Discharged/transferred to a long term care hospital."
df_zg.loc[df_zg['discharge_disposition_id'] == 24, 'discharge_disposition'] = "Discharged/transferred to a nursing facility certified under Medicaid but not certified under Medicare."
df_zg.loc[df_zg['discharge_disposition_id'] == 25, 'discharge_disposition'] = "Not Mapped"
df_zg.loc[df_zg['discharge_disposition_id'] == 26, 'discharge_disposition'] = "Unknown/Invalid"
df_zg.loc[df_zg['discharge_disposition_id'] == 27, 'discharge_disposition'] = "Discharged/transferred to a federal health care facility."
df_zg.loc[df_zg['discharge_disposition_id'] == 28, 'discharge_disposition'] = "Discharged/transferred/referred to a psychiatric hospital of psychiatric distinct part unit of a hospital"
df_zg.loc[df_zg['discharge_disposition_id'] == 29, 'discharge_disposition'] = "Discharged/transferred to a Critical Access Hospital (CAH)."
df_zg.loc[df_zg['discharge_disposition_id'] == 30, 'discharge_disposition'] = "Discharged/transferred to another Type of Health Care Institution not Defined Elsewhere"

### Makes a new column that indicates what the admission_type_id means

In [17]:
df_zg['admission_type'] = np.nan
df_zg.loc[df_zg['admission_type_id'] == 1, 'admission_type'] = "Emergency"
df_zg.loc[df_zg['admission_type_id'] == 2, 'admission_type'] = "Urgent"
df_zg.loc[df_zg['admission_type_id'] == 3, 'admission_type'] = "Elective"
df_zg.loc[df_zg['admission_type_id'] == 4, 'admission_type'] = "Newborn"
df_zg.loc[df_zg['admission_type_id'] == 5, 'admission_type'] = "Not Available"
df_zg.loc[df_zg['admission_type_id'] == 6, 'admission_type'] = "NULL"
df_zg.loc[df_zg['admission_type_id'] == 7, 'admission_type'] = "Trauma Center"
df_zg.loc[df_zg['admission_type_id'] == 8, 'admission_type'] = "Not Mapped"

### Make a new column that indicates if the amount of medication increased

In [18]:
df_zg['meds_increased'] = 0
df_zg['meds_increased'][(df_zg['metformin'] == "Up") | (df_zg['repaglinide'] == "Up") | 
                      (df_zg['nateglinide'] == "Up") | (df_zg['metformin-rosiglitazone'] == "Up") |
                      (df_zg['metformin-pioglitazone'] == "Up") | (df_zg['chlorpropamide'] == "Up") |
                      (df_zg['glimepiride'] == "Up") | (df_zg['acetohexamide'] == "Up") | 
                      (df_zg['glipizide'] == "Up") | (df_zg['glyburide'] == "Up") | 
                      (df_zg['tolbutamide'] == "Up") | (df_zg['pioglitazone'] == "Up") | 
                      (df_zg['rosiglitazone'] == "Up") | (df_zg['acarbose'] == "Up") | 
                      (df_zg['miglitol'] == "Up") | (df_zg['troglitazone'] == "Up") | 
                      (df_zg['tolazamide'] == "Up") | (df_zg['insulin'] == "Up") | 
                      (df_zg['glyburide-metformin'] == "Up")|(df_zg['glipizide-metformin'] == "Up")] = 1
df_zg['meds_increased'] = df_zg['meds_increased'].astype(object)

In [19]:
pandas_profiling.ProfileReport(df_zg)

0,1
Number of variables,51
Number of observations,101766
Total Missing (%),0.0%
Total size in memory,39.6 MiB
Average record size in memory,408.0 B

0,1
Numeric,14
Categorical,35
Boolean,2
Date,0
Text (Unique),0
Rejected,0
Unsupported,0

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
,84748
>8,8216
Norm,4990

Value,Count,Frequency (%),Unnamed: 3
,84748,83.3%,
>8,8216,8.1%,
Norm,4990,4.9%,
>7,3812,3.7%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,101458
Steady,295
Up,10

Value,Count,Frequency (%),Unnamed: 3
No,101458,99.7%,
Steady,295,0.3%,
Up,10,0.0%,
Down,3,0.0%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,101765
Steady,1

Value,Count,Frequency (%),Unnamed: 3
No,101765,100.0%,
Steady,1,0.0%,

0,1
Distinct count,15
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Physician Referral,36269
Emergency Room,25607
Clinic Referral,23989
Other values (12),15901

Value,Count,Frequency (%),Unnamed: 3
Physician Referral,36269,35.6%,
Emergency Room,25607,25.2%,
Clinic Referral,23989,23.6%,
,5983,5.9%,
Transfer from a hospital,3433,3.4%,
Transfer from another health care facility,3153,3.1%,
Transfer from critical access hospital,1158,1.1%,
Transfer from Ambulatory Surgery Center,1103,1.1%,
Transfer from a Skilled Nursing Facility (SNF),618,0.6%,
HMO Referral,166,0.2%,

0,1
Distinct count,15
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,4.3724
Minimum,1
Maximum,25
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,1
Q1,1
Median,2
Q3,7
95-th percentile,17
Maximum,25
Range,24
Interquartile range,6

0,1
Standard deviation,4.6875
Coef of variation,1.0721
Kurtosis,4.5201
Mean,4.3724
MAD,3.552
Skewness,2.0355
Sum,444966
Variance,21.973
Memory size,795.1 KiB

Value,Count,Frequency (%),Unnamed: 3
1,36269,35.6%,
7,25607,25.2%,
2,23989,23.6%,
17,5983,5.9%,
4,3433,3.4%,
6,3153,3.1%,
10,1158,1.1%,
25,1103,1.1%,
5,618,0.6%,
3,166,0.2%,

Value,Count,Frequency (%),Unnamed: 3
1,36269,35.6%,
2,23989,23.6%,
3,166,0.2%,
4,3433,3.4%,
5,618,0.6%,

Value,Count,Frequency (%),Unnamed: 3
14,1,0.0%,
17,5983,5.9%,
20,160,0.2%,
22,2,0.0%,
25,1103,1.1%,

0,1
Distinct count,8
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Emergency,32219
Elective,27609
Newborn,17288
Other values (5),24650

Value,Count,Frequency (%),Unnamed: 3
Emergency,32219,31.7%,
Elective,27609,27.1%,
Newborn,17288,17.0%,
Urgent,14582,14.3%,
,4082,4.0%,
Not Available,3810,3.7%,
Trauma Center,1996,2.0%,
Not Mapped,180,0.2%,

0,1
Distinct count,8
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,2.6759
Minimum,1
Maximum,8
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,1
Q1,1
Median,3
Q3,4
95-th percentile,6
Maximum,8
Range,7
Interquartile range,3

0,1
Standard deviation,1.5308
Coef of variation,0.57207
Kurtosis,0.15432
Mean,2.6759
MAD,1.2549
Skewness,0.74298
Sum,272316
Variance,2.3433
Memory size,795.1 KiB

Value,Count,Frequency (%),Unnamed: 3
1,32219,31.7%,
3,27609,27.1%,
4,17288,17.0%,
2,14582,14.3%,
6,4082,4.0%,
5,3810,3.7%,
7,1996,2.0%,
8,180,0.2%,

Value,Count,Frequency (%),Unnamed: 3
1,32219,31.7%,
2,14582,14.3%,
3,27609,27.1%,
4,17288,17.0%,
5,3810,3.7%,

Value,Count,Frequency (%),Unnamed: 3
4,17288,17.0%,
5,3810,3.7%,
6,4082,4.0%,
7,1996,2.0%,
8,180,0.2%,

0,1
Distinct count,10
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
[70-80),26068
[60-70),22483
[50-60),17256
Other values (7),35959

Value,Count,Frequency (%),Unnamed: 3
[70-80),26068,25.6%,
[60-70),22483,22.1%,
[50-60),17256,17.0%,
[80-90),17197,16.9%,
[40-50),9685,9.5%,
[30-40),3775,3.7%,
[90-100),2793,2.7%,
[20-30),1657,1.6%,
[10-20),691,0.7%,
[0-10),161,0.2%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,54755
Ch,47011

Value,Count,Frequency (%),Unnamed: 3
No,54755,53.8%,
Ch,47011,46.2%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,101680
Steady,79
Up,6

Value,Count,Frequency (%),Unnamed: 3
No,101680,99.9%,
Steady,79,0.1%,
Up,6,0.0%,
Down,1,0.0%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Yes,72669
No,29097

Value,Count,Frequency (%),Unnamed: 3
Yes,72669,71.4%,
No,29097,28.6%,

0,1
Distinct count,10
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Circulatory,30437
Respiratory,14423
Neoplasms,14056
Other values (7),42850

Value,Count,Frequency (%),Unnamed: 3
Circulatory,30437,29.9%,
Respiratory,14423,14.2%,
Neoplasms,14056,13.8%,
Digestive,9504,9.3%,
Diabeties,8757,8.6%,
Other,7520,7.4%,
Injury,6974,6.9%,
Genitourinary,5117,5.0%,
Musculoskeletal,4957,4.9%,
,21,0.0%,

0,1
Distinct count,10
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Circulatory,31881
Neoplasms,18805
Diabeties,12794
Other values (7),38286

Value,Count,Frequency (%),Unnamed: 3
Circulatory,31881,31.3%,
Neoplasms,18805,18.5%,
Diabeties,12794,12.6%,
Respiratory,10895,10.7%,
Other,10271,10.1%,
Genitourinary,8376,8.2%,
Digestive,4194,4.1%,
Injury,2428,2.4%,
Musculoskeletal,1764,1.7%,
,358,0.4%,

0,1
Distinct count,10
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Circulatory,30306
Neoplasms,17849
Diabeties,17157
Other values (7),36454

Value,Count,Frequency (%),Unnamed: 3
Circulatory,30306,29.8%,
Neoplasms,17849,17.5%,
Diabeties,17157,16.9%,
Other,13145,12.9%,
Respiratory,7358,7.2%,
Genitourinary,6680,6.6%,
Digestive,3987,3.9%,
Injury,1946,1.9%,
Musculoskeletal,1915,1.9%,
,1423,1.4%,

0,1
Distinct count,26
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Discharged to home,55967
Discharged/transferred to SNF,12688
Discharged/transferred to home with home health service,11943
Other values (23),21168

Value,Count,Frequency (%),Unnamed: 3
Discharged to home,55967,55.0%,
Discharged/transferred to SNF,12688,12.5%,
Discharged/transferred to home with home health service,11943,11.7%,
,3680,3.6%,
Left AMA,3521,3.5%,
Discharged/transferred to a long term care hospital.,3330,3.3%,
Discharged/transferred to another short term hospital,2195,2.2%,
Expired,1642,1.6%,
Discharged/transferred to another rehab fac including rehab units of a hospital.,1508,1.5%,
Discharged/transferred to another type of inpatient care institution,1451,1.4%,

0,1
Distinct count,26
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,4.3884
Minimum,1
Maximum,28
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,1
Q1,1
Median,1
Q3,6
95-th percentile,22
Maximum,28
Range,27
Interquartile range,5

0,1
Standard deviation,6.0138
Coef of variation,1.3704
Kurtosis,3.7699
Mean,4.3884
MAD,4.1846
Skewness,2.1784
Sum,446586
Variance,36.166
Memory size,795.1 KiB

Value,Count,Frequency (%),Unnamed: 3
1,55967,55.0%,
3,12688,12.5%,
6,11943,11.7%,
18,3680,3.6%,
7,3521,3.5%,
23,3330,3.3%,
2,2195,2.2%,
11,1642,1.6%,
22,1508,1.5%,
5,1451,1.4%,

Value,Count,Frequency (%),Unnamed: 3
1,55967,55.0%,
2,2195,2.2%,
3,12688,12.5%,
4,1107,1.1%,
5,1451,1.4%,

Value,Count,Frequency (%),Unnamed: 3
23,3330,3.3%,
24,41,0.0%,
25,984,1.0%,
27,5,0.0%,
28,88,0.1%,

0,1
Distinct count,101766
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,165200000
Minimum,12522
Maximum,443867222
Zeros (%),0.0%

0,1
Minimum,12522
5-th percentile,27171000
Q1,84961000
Median,152390000
Q3,230270000
95-th percentile,378960000
Maximum,443867222
Range,443854700
Interquartile range,145310000

0,1
Standard deviation,102640000
Coef of variation,0.6213
Kurtosis,-0.10207
Mean,165200000
MAD,81937000
Skewness,0.69914
Sum,16811910668468
Variance,1.0535e+16
Memory size,795.1 KiB

Value,Count,Frequency (%),Unnamed: 3
246677502,1,0.0%,
191454822,1,0.0%,
127434234,1,0.0%,
46489200,1,0.0%,
160364346,1,0.0%,
92364402,1,0.0%,
109195680,1,0.0%,
107830902,1,0.0%,
68240916,1,0.0%,
152919672,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
12522,1,0.0%,
15738,1,0.0%,
16680,1,0.0%,
28236,1,0.0%,
35754,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
443847548,1,0.0%,
443847782,1,0.0%,
443854148,1,0.0%,
443857166,1,0.0%,
443867222,1,0.0%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Female,54711
Male,47055

Value,Count,Frequency (%),Unnamed: 3
Female,54711,53.8%,
Male,47055,46.2%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,96575
Steady,4670
Up,327

Value,Count,Frequency (%),Unnamed: 3
No,96575,94.9%,
Steady,4670,4.6%,
Up,327,0.3%,
Down,194,0.2%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,89080
Steady,11356
Up,770

Value,Count,Frequency (%),Unnamed: 3
No,89080,87.5%,
Steady,11356,11.2%,
Up,770,0.8%,
Down,560,0.6%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,101753
Steady,13

Value,Count,Frequency (%),Unnamed: 3
No,101753,100.0%,
Steady,13,0.0%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,91116
Steady,9274
Up,812

Value,Count,Frequency (%),Unnamed: 3
No,91116,89.5%,
Steady,9274,9.1%,
Up,812,0.8%,
Down,564,0.6%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,101060
Steady,692
Up,8

Value,Count,Frequency (%),Unnamed: 3
No,101060,99.3%,
Steady,692,0.7%,
Up,8,0.0%,
Down,6,0.0%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,47497
Steady,23910
Down,21163

Value,Count,Frequency (%),Unnamed: 3
No,47497,46.7%,
Steady,23910,23.5%,
Down,21163,20.8%,
Up,9196,9.0%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
,96420
Norm,2597
>200,1485

Value,Count,Frequency (%),Unnamed: 3
,96420,94.7%,
Norm,2597,2.6%,
>200,1485,1.5%,
>300,1264,1.2%,

0,1
Distinct count,73
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0

0,1
Unspecified,49949
InternalMedicine,14635
Emergency/Trauma,7565
Other values (70),29617

Value,Count,Frequency (%),Unnamed: 3
Unspecified,49949,49.1%,
InternalMedicine,14635,14.4%,
Emergency/Trauma,7565,7.4%,
Family/GeneralPractice,7440,7.3%,
Cardiology,5352,5.3%,
Surgery-General,3099,3.0%,
Nephrology,1613,1.6%,
Orthopedics,1400,1.4%,
Orthopedics-Reconstructive,1233,1.2%,
Radiologist,1140,1.1%,

0,1
Distinct count,7
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,1.1786
Minimum,0
Maximum,6
Zeros (%),23.5%

0,1
Minimum,0
5-th percentile,0
Q1,1
Median,1
Q3,2
95-th percentile,3
Maximum,6
Range,6
Interquartile range,1

0,1
Standard deviation,0.92498
Coef of variation,0.7848
Kurtosis,0.23475
Mean,1.1786
MAD,0.71647
Skewness,0.66582
Sum,119943
Variance,0.85558
Memory size,795.1 KiB

Value,Count,Frequency (%),Unnamed: 3
1,46463,45.7%,
0,23890,23.5%,
2,22213,21.8%,
3,7812,7.7%,
4,1327,1.3%,
5,56,0.1%,
6,5,0.0%,

Value,Count,Frequency (%),Unnamed: 3
0,23890,23.5%,
1,46463,45.7%,
2,22213,21.8%,
3,7812,7.7%,
4,1327,1.3%,

Value,Count,Frequency (%),Unnamed: 3
2,22213,21.8%,
3,7812,7.7%,
4,1327,1.3%,
5,56,0.1%,
6,5,0.0%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Mean,0.12009

0,1
0,89545
1,12221

Value,Count,Frequency (%),Unnamed: 3
0,89545,88.0%,
1,12221,12.0%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,81778
Steady,18346
Up,1067

Value,Count,Frequency (%),Unnamed: 3
No,81778,80.4%,
Steady,18346,18.0%,
Up,1067,1.0%,
Down,575,0.6%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,101765
Steady,1

Value,Count,Frequency (%),Unnamed: 3
No,101765,100.0%,
Steady,1,0.0%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,101764
Steady,2

Value,Count,Frequency (%),Unnamed: 3
No,101764,100.0%,
Steady,2,0.0%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,101728
Steady,31
Down,5

Value,Count,Frequency (%),Unnamed: 3
No,101728,100.0%,
Steady,31,0.0%,
Down,5,0.0%,
Up,2,0.0%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,101063
Steady,668
Up,24

Value,Count,Frequency (%),Unnamed: 3
No,101063,99.3%,
Steady,668,0.7%,
Up,24,0.0%,
Down,11,0.0%,

0,1
Distinct count,121
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,34.047
Minimum,1
Maximum,129
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,3
Q1,16
Median,30
Q3,49
95-th percentile,73
Maximum,129
Range,128
Interquartile range,33

0,1
Standard deviation,22.387
Coef of variation,0.65752
Kurtosis,0.097047
Mean,34.047
MAD,18.526
Skewness,0.62446
Sum,3464866
Variance,501.18
Memory size,795.1 KiB

Value,Count,Frequency (%),Unnamed: 3
1,2813,2.8%,
25,2009,2.0%,
26,1987,2.0%,
30,1981,1.9%,
29,1959,1.9%,
43,1935,1.9%,
23,1903,1.9%,
19,1891,1.9%,
9,1884,1.9%,
17,1858,1.8%,

Value,Count,Frequency (%),Unnamed: 3
1,2813,2.8%,
2,1775,1.7%,
3,1560,1.5%,
4,1394,1.4%,
5,1380,1.4%,

Value,Count,Frequency (%),Unnamed: 3
117,50,0.0%,
118,39,0.0%,
119,41,0.0%,
120,35,0.0%,
129,1,0.0%,

0,1
Distinct count,75
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,16.56
Minimum,1
Maximum,81
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,6
Q1,10
Median,15
Q3,20
95-th percentile,33
Maximum,81
Range,80
Interquartile range,10

0,1
Standard deviation,9.106
Coef of variation,0.54986
Kurtosis,4.3994
Mean,16.56
MAD,6.6411
Skewness,1.6337
Sum,1685295
Variance,82.919
Memory size,795.1 KiB

Value,Count,Frequency (%),Unnamed: 3
13,5966,5.9%,
12,5897,5.8%,
11,5691,5.6%,
15,5634,5.5%,
14,5592,5.5%,
16,5299,5.2%,
10,5240,5.1%,
9,4831,4.7%,
17,4799,4.7%,
18,4424,4.3%,

Value,Count,Frequency (%),Unnamed: 3
1,260,0.3%,
2,464,0.5%,
3,892,0.9%,
4,1399,1.4%,
5,1998,2.0%,

Value,Count,Frequency (%),Unnamed: 3
72,3,0.0%,
74,1,0.0%,
75,2,0.0%,
79,1,0.0%,
81,1,0.0%,

0,1
Distinct count,7
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,1.4566
Minimum,0
Maximum,6
Zeros (%),44.2%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,1
Q3,2
95-th percentile,6
Maximum,6
Range,6
Interquartile range,2

0,1
Standard deviation,1.7922
Coef of variation,1.2304
Kurtosis,0.36278
Mean,1.4566
MAD,1.4675
Skewness,1.1806
Sum,148230
Variance,3.212
Memory size,795.1 KiB

Value,Count,Frequency (%),Unnamed: 3
0,45030,44.2%,
1,19893,19.5%,
2,12200,12.0%,
3,10022,9.8%,
6,5733,5.6%,
4,4967,4.9%,
5,3921,3.9%,

Value,Count,Frequency (%),Unnamed: 3
0,45030,44.2%,
1,19893,19.5%,
2,12200,12.0%,
3,10022,9.8%,
4,4967,4.9%,

Value,Count,Frequency (%),Unnamed: 3
2,12200,12.0%,
3,10022,9.8%,
4,4967,4.9%,
5,3921,3.9%,
6,5733,5.6%,

0,1
Distinct count,16
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,7.4226
Minimum,1
Maximum,16
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,4
Q1,6
Median,8
Q3,9
95-th percentile,9
Maximum,16
Range,15
Interquartile range,3

0,1
Standard deviation,1.9336
Coef of variation,0.2605
Kurtosis,-0.079056
Mean,7.4226
MAD,1.6684
Skewness,-0.87675
Sum,755369
Variance,3.7388
Memory size,795.1 KiB

Value,Count,Frequency (%),Unnamed: 3
9,49474,48.6%,
5,11393,11.2%,
8,10616,10.4%,
7,10393,10.2%,
6,10161,10.0%,
4,5537,5.4%,
3,2835,2.8%,
2,1023,1.0%,
1,219,0.2%,
16,45,0.0%,

Value,Count,Frequency (%),Unnamed: 3
1,219,0.2%,
2,1023,1.0%,
3,2835,2.8%,
4,5537,5.4%,
5,11393,11.2%,

Value,Count,Frequency (%),Unnamed: 3
12,9,0.0%,
13,16,0.0%,
14,7,0.0%,
15,10,0.0%,
16,45,0.0%,

0,1
Distinct count,32
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.21223
Minimum,0
Maximum,76
Zeros (%),88.1%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,0
Q3,0
95-th percentile,1
Maximum,76
Range,76
Interquartile range,0

0,1
Standard deviation,0.91784
Coef of variation,4.3247
Kurtosis,1025.3
Mean,0.21223
MAD,0.37381
Skewness,20.327
Sum,21598
Variance,0.84243
Memory size,795.1 KiB

Value,Count,Frequency (%),Unnamed: 3
0,89622,88.1%,
1,7854,7.7%,
2,2292,2.3%,
3,1105,1.1%,
4,359,0.4%,
5,180,0.2%,
6,88,0.1%,
7,72,0.1%,
8,49,0.0%,
9,33,0.0%,

Value,Count,Frequency (%),Unnamed: 3
0,89622,88.1%,
1,7854,7.7%,
2,2292,2.3%,
3,1105,1.1%,
4,359,0.4%,

Value,Count,Frequency (%),Unnamed: 3
42,1,0.0%,
46,1,0.0%,
54,1,0.0%,
63,1,0.0%,
76,1,0.0%,

0,1
Distinct count,21
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.63557
Minimum,0
Maximum,21
Zeros (%),66.5%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,0
Q3,1
95-th percentile,3
Maximum,21
Range,21
Interquartile range,1

0,1
Standard deviation,1.2629
Coef of variation,1.987
Kurtosis,20.719
Mean,0.63557
MAD,0.84475
Skewness,3.6141
Sum,64679
Variance,1.5948
Memory size,795.1 KiB

Value,Count,Frequency (%),Unnamed: 3
0,67630,66.5%,
1,19521,19.2%,
2,7566,7.4%,
3,3411,3.4%,
4,1622,1.6%,
5,812,0.8%,
6,480,0.5%,
7,268,0.3%,
8,151,0.1%,
9,111,0.1%,

Value,Count,Frequency (%),Unnamed: 3
0,67630,66.5%,
1,19521,19.2%,
2,7566,7.4%,
3,3411,3.4%,
4,1622,1.6%,

Value,Count,Frequency (%),Unnamed: 3
16,6,0.0%,
17,1,0.0%,
18,1,0.0%,
19,2,0.0%,
21,1,0.0%,

0,1
Distinct count,39
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.36936
Minimum,0
Maximum,42
Zeros (%),83.6%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,0
Q3,0
95-th percentile,2
Maximum,42
Range,42
Interquartile range,0

0,1
Standard deviation,1.2673
Coef of variation,3.431
Kurtosis,147.91
Mean,0.36936
MAD,0.61721
Skewness,8.833
Sum,37588
Variance,1.606
Memory size,795.1 KiB

Value,Count,Frequency (%),Unnamed: 3
0,85027,83.6%,
1,8547,8.4%,
2,3594,3.5%,
3,2042,2.0%,
4,1099,1.1%,
5,533,0.5%,
6,303,0.3%,
7,155,0.2%,
8,98,0.1%,
9,83,0.1%,

Value,Count,Frequency (%),Unnamed: 3
0,85027,83.6%,
1,8547,8.4%,
2,3594,3.5%,
3,2042,2.0%,
4,1099,1.1%,

Value,Count,Frequency (%),Unnamed: 3
37,1,0.0%,
38,1,0.0%,
39,1,0.0%,
40,1,0.0%,
42,1,0.0%,

0,1
Distinct count,71518
Unique (%),70.3%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,54330000
Minimum,135
Maximum,189502619
Zeros (%),0.0%

0,1
Minimum,135
5-th percentile,1457000
Q1,23413000
Median,45505000
Q3,87546000
95-th percentile,111480000
Maximum,189502619
Range,189502484
Interquartile range,64133000

0,1
Standard deviation,38696000
Coef of variation,0.71224
Kurtosis,-0.34737
Mean,54330000
MAD,33217000
Skewness,0.47128
Sum,5528987557122
Variance,1497400000000000
Memory size,795.1 KiB

Value,Count,Frequency (%),Unnamed: 3
88785891,40,0.0%,
43140906,28,0.0%,
23199021,23,0.0%,
88227540,23,0.0%,
1660293,23,0.0%,
23643405,22,0.0%,
84428613,22,0.0%,
92709351,21,0.0%,
90609804,20,0.0%,
37096866,20,0.0%,

Value,Count,Frequency (%),Unnamed: 3
135,2,0.0%,
378,1,0.0%,
729,1,0.0%,
774,1,0.0%,
927,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
189351095,1,0.0%,
189365864,1,0.0%,
189445127,1,0.0%,
189481478,1,0.0%,
189502619,1,0.0%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,94438
Steady,6976
Up,234

Value,Count,Frequency (%),Unnamed: 3
No,94438,92.8%,
Steady,6976,6.9%,
Up,234,0.2%,
Down,118,0.1%,

0,1
Distinct count,6
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Caucasian,76099
AfricanAmerican,19210
Unspecified,2273
Other values (3),4184

Value,Count,Frequency (%),Unnamed: 3
Caucasian,76099,74.8%,
AfricanAmerican,19210,18.9%,
Unspecified,2273,2.2%,
Hispanic,2037,2.0%,
Other,1506,1.5%,
Asian,641,0.6%,

0,1
Distinct count,3
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
NO,54864
>30,35545
<30,11357

Value,Count,Frequency (%),Unnamed: 3
NO,54864,53.9%,
>30,35545,34.9%,
<30,11357,11.2%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Mean,0.46088

0,1
0,54864
1,46902

Value,Count,Frequency (%),Unnamed: 3
0,54864,53.9%,
1,46902,46.1%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,100227
Steady,1384
Up,110

Value,Count,Frequency (%),Unnamed: 3
No,100227,98.5%,
Steady,1384,1.4%,
Up,110,0.1%,
Down,45,0.0%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,95401
Steady,6100
Up,178

Value,Count,Frequency (%),Unnamed: 3
No,95401,93.7%,
Steady,6100,6.0%,
Up,178,0.2%,
Down,87,0.1%,

0,1
Distinct count,14
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,4.5897
Minimum,1
Maximum,14
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,1
Q1,2
Median,4
Q3,6
95-th percentile,11
Maximum,14
Range,13
Interquartile range,4

0,1
Standard deviation,3.1603
Coef of variation,0.68857
Kurtosis,0.54156
Mean,4.5897
MAD,2.5193
Skewness,1.0755
Sum,467075
Variance,9.9876
Memory size,795.1 KiB

Value,Count,Frequency (%),Unnamed: 3
3,17170,16.9%,
2,16683,16.4%,
1,13864,13.6%,
4,13440,13.2%,
5,9600,9.4%,
6,7271,7.1%,
7,6038,5.9%,
8,4677,4.6%,
9,3304,3.2%,
10,2662,2.6%,

Value,Count,Frequency (%),Unnamed: 3
1,13864,13.6%,
2,16683,16.4%,
3,17170,16.9%,
4,13440,13.2%,
5,9600,9.4%,

Value,Count,Frequency (%),Unnamed: 3
10,2662,2.6%,
11,2222,2.2%,
12,1814,1.8%,
13,1593,1.6%,
14,1428,1.4%,

0,1
Distinct count,3
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,101727
Steady,38
Up,1

Value,Count,Frequency (%),Unnamed: 3
No,101727,100.0%,
Steady,38,0.0%,
Up,1,0.0%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,101743
Steady,23

Value,Count,Frequency (%),Unnamed: 3
No,101743,100.0%,
Steady,23,0.0%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
No,101763
Steady,3

Value,Count,Frequency (%),Unnamed: 3
No,101763,100.0%,
Steady,3,0.0%,

Unnamed: 0,encounter_id,patient_nbr,race,gender,age,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,medical_specialty,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,number_diagnoses,max_glu_serum,A1Cresult,metformin,repaglinide,nateglinide,chlorpropamide,glimepiride,acetohexamide,glipizide,glyburide,tolbutamide,pioglitazone,rosiglitazone,acarbose,miglitol,troglitazone,tolazamide,insulin,glyburide-metformin,glipizide-metformin,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted,diag_1_val,diag_2_val,diag_3_val,readmitted_tf,medication_count,admission_source,discharge_disposition,admission_type,meds_increased
0,2278392,8222157,Caucasian,Female,[0-10),6,25,1,1,Pediatrics-Endocrinology,41,0,1,0,0,0,1,,,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,NO,Diabeties,,,0,0,Physician Referral,Not Mapped,,0
1,149190,55629189,Caucasian,Female,[10-20),1,1,7,3,Unspecified,59,0,18,0,0,0,9,,,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Up,No,No,No,No,Ch,Yes,>30,Neoplasms,Diabeties,Neoplasms,1,1,Emergency Room,Discharged to home,Emergency,1
2,64410,86047875,AfricanAmerican,Female,[20-30),1,1,7,2,Unspecified,11,5,13,2,0,1,6,,,No,No,No,No,No,No,Steady,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Yes,NO,Other,Diabeties,Other,0,1,Emergency Room,Discharged to home,Emergency,0
3,500364,82442376,Caucasian,Male,[30-40),1,1,7,2,Unspecified,44,1,16,0,0,0,7,,,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Up,No,No,No,No,Ch,Yes,NO,Neoplasms,Diabeties,Circulatory,0,1,Emergency Room,Discharged to home,Emergency,1
4,16680,42519267,Caucasian,Male,[40-50),1,1,7,1,Unspecified,51,0,8,0,0,0,5,,,No,No,No,No,No,No,Steady,No,No,No,No,No,No,No,No,Steady,No,No,No,No,Ch,Yes,NO,Neoplasms,Neoplasms,Diabeties,0,2,Emergency Room,Discharged to home,Emergency,0


In [20]:
df_zg.to_csv("Diabetes_tmp_Cleaned.csv", index = False, sep=',')
print("Done Saving File")

Done Saving File
