In [100]:
import pandas as pd

# Load data

In [101]:
data_df = pd.read_csv("diabetic_data.csv")

# Final dataset

Create final dataset for analysis

1. Replace missing medical specialty values with "missing"
2. Select only the first encounter for each patient
3. Remove patient encounters resulting in dischange to hospice or expiration

In [102]:
data_df["medical_specialty"] = data_df["medical_specialty"].replace(["?"],"missing")
#print(data_df["medical_specialty"].head(5))

In [103]:
filterpt_df = data_df.groupby("patient_nbr").first()
#print(filterpt_df)

Hospice and expiration discharge codes:

11, 13-14, 19-21

In [104]:
final_df = filterpt_df[filterpt_df.discharge_disposition_id !=11] 
final_df = final_df[final_df.discharge_disposition_id  != 13]
final_df = final_df[final_df.discharge_disposition_id  != 14]
final_df = final_df[final_df.discharge_disposition_id  != 19]
final_df = final_df[final_df.discharge_disposition_id  != 20]
final_df = final_df[final_df.discharge_disposition_id  != 21]


total_encounters =len(final_df['encounter_id'])
#print(total_encounters)
#Differences in total may have been due to different selection of categorical codes for filtering 

# A1C

Create dataframe:
1. Find the total number of encounters meeting the criteria for each category
2. Find the total number of encounters for each category for readmissions (<30)
3. Calculate the percent total of each category
4. Calculate the percent of encounters for each category that were readmissions 

Codes for each criteria:


No test performed = None

Result was high and the medication was changed = >8/Ch

Result was high and no medication change = >8/No

Normal result = Norm & >7

In [105]:
def total_count(x):
    """" Counts number of encounters for each category"""
    if x["A1Cresult"] == "None":
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False
     
no_test = final_df.apply(total_count, axis=1)
no_test_total = len(no_test[no_test == True].index)+len(no_test[no_test == "re"].index)
no_test_re = len(no_test[no_test == "re"].index)
# print(no_test_total)
# print(no_test_re)

In [106]:
def total_count(x):
    """" Counts number of encounters for each category"""
    if x["A1Cresult"] == ">8" and x["change"] == "Ch":
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False


high_ch = final_df.apply(total_count, axis=1)
high_ch_total= len(high_ch[high_ch == True].index)+ len(high_ch[high_ch == "re"].index)
high_ch_re = len(high_ch[high_ch == "re"].index)
# print(high_ch_total)
# print(high_ch_re)

In [107]:
def total_count(x):
    """" Counts number of encounters for each category"""
    if x["A1Cresult"] == ">8" and x["change"] == "No":
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False


high_no = final_df.apply(total_count, axis=1)
high_no_total = len(high_no[high_no == True].index)+len(high_no[high_no == "re"].index)
high_no_re = len(high_no[high_no == "re"].index)
# print(high_no_total)
# print(high_no_re)

In [108]:
def total_count(x):
    """" Counts number of encounters for each category"""
    if x["A1Cresult"] == ">7" or x["A1Cresult"] == "Norm":
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False
      

normal = final_df.apply(total_count, axis=1)
normal_total = len(normal[normal == True].index)+len(normal[normal == "re"].index)
normal_re = len(normal[normal == "re"].index)
# print(normal_total)
# print(normal_re)


In [109]:
#Create a list from all the values to add to the final dataframe
A1C_encounters = [no_test_total, high_ch_total, high_no_total, normal_total]
A1C_re = [no_test_re, high_ch_re, high_no_re, normal_re]

### Percent of population

In [110]:
def population_percent(portion):
    """"Divides encounters per category by total number of encounters"""
    result = round((portion/total_encounters)*100,1)
    return f'{result}%'

no_test_totalper = population_percent(no_test_total)

high_ch_totalper = population_percent(high_ch_total)

high_no_totalper = population_percent(high_no_total)

normal_totalper = population_percent(normal_total)


#Create list to add results to final dataframe
total_percent = [no_test_totalper,high_ch_totalper,high_no_totalper,normal_totalper]
#print(total_percent)

### Readmitted population percent

In [111]:
def population_percent(portion,total):
    """"Divides readmission encounters per category by total encounters per category"""
    result = round((portion/total)*100,1)
    return f'{result}%'
    
    
no_test_reper = population_percent(no_test_re,no_test_total)

high_ch_reper = population_percent(high_ch_re,high_ch_total)

high_no_reper = population_percent(high_no_re, high_no_total)

normal_reper = population_percent(normal_re, normal_total)

#Create list to add results to final dataframe
readmitted_percent = [no_test_reper,high_ch_reper,high_no_reper,normal_reper]
#print(readmitted_percent)

### Put results together to create dataframe

1. Create 2 dictionaries, one for total encounters and another for readmissions
2. Create list of rows with category names
3. Create 2 dataframes with respective results and column names
4. Merge dataframes on columns and indicate "Readmitted" columns
5. Format dataframe (add title, align)

In [112]:
HbA1c = {"Number of encounters" : A1C_encounters, "% of the population": total_percent}
HbA1c_re = {"Number of encounters": A1C_re, "% in group" : readmitted_percent}

rows= ["No test was performed", "Result was high and the diabetic medication was changed", 
       "Result was high but the diabetic medication was not changed", "Normal result of the test"]

total = pd.DataFrame(HbA1c, columns = ["Number of encounters", "% of the population"], index = rows)
re = pd.DataFrame(HbA1c_re,columns = ["Number of encounters", "% in group" ], index = rows)

HbA1c_df = pd.concat([total,re], axis=1, keys = ['','Readmitted'])
index = HbA1c_df.index
index.name = "HbA1c"
HbA1c_df .style.set_properties(**{'text-align': 'center'})

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Readmitted,Readmitted
Unnamed: 0_level_1,Number of encounters,% of the population,Number of encounters,% in group
HbA1c,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
No test was performed,57128,81.6%,5199,9.1%
Result was high and the diabetic medication was changed,4058,5.8%,348,8.6%
Result was high but the diabetic medication was not changed,2181,3.1%,161,7.4%
Normal result of the test,6606,9.4%,569,8.6%


Differences in results may have been due in part to the ambiguity of the label >7% which could technically either be >8% or =7%
It may also be due to the sorting of the encounters. We were not provided dates for these encounters so it is difficult to know whether the first encounter was selected.
It is also known that for certain visits (0.1%) encounters previous encounters were referenced for test results.

## Admission Source

Create dataframe:
1. Find the total number of encounters meeting the criteria for each category
2. Find the total number of encounters for each category for readmissions (<30)
3. Calculate the percent total of each category
4. Calculate the percent of encounters for each category that were readmissions 

Category codes:

Admitted from emergency room = 7

Admitted because of physician/clinic referral = 1 & 2

Otherwise = 3-6,8-26

In [113]:
def total_count(x):
    """" Counts number of encounters for each category"""
    if x["admission_source_id"] == 7:
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False
     
ER = final_df.apply(total_count, axis=1)
ER_total = len(ER[ER == True].index)+len(ER[ER == "re"].index)
ER_re = len(ER[ER == "re"].index)

# print(ER_total)
# print(ER_re)

In [114]:
def total_count(x):
    """" Counts number of encounters for each category"""
    if x["admission_source_id"] == 1 or x["admission_source_id"] == 2:
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False
     
pc = final_df.apply(total_count, axis=1)
pc_total = len(pc[pc == True].index)+len(pc[pc == "re"].index)
pc_re = len(pc[pc == "re"].index)

# print(pc_total)
# print(pc_re)

In [115]:
def total_count(x):
    """" Counts number of encounters for each category"""
    if x["admission_source_id"] != 1 and x["admission_source_id"] != 2 and x["admission_source_id"] != 7:
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False
     
other = final_df.apply(total_count, axis=1)
other_total = len(other[other == True].index)+len(other[other == "re"].index)
other_re = len(other[other == "re"].index)

# print(other_total)
# print(other_re)

In [116]:
#Create a list from all the values to add to the final dataframe
ad_source_total = [ER_total,pc_total, other_total]
ad_source_re = [ER_re,pc_re, other_re]

### Percent population

In [117]:
def population_percent(portion):
    """"Divides encounters per category by total number of encounters"""
    result = round((portion/total_encounters)*100,1)
    return f'{result}%'

ER_totalper = population_percent(ER_total)

pc_totalper = population_percent(pc_total)

other_totalper = population_percent(other_total)

#Create list to add results to final dataframe
total_percent = [ER_totalper, pc_totalper,other_totalper]
#print(total_percent)

### Readmitted population percent

In [118]:
def population_percent(portion,total):
    """"Divides readmission encounters per category by total encounters per category"""
    result = round((portion/total)*100,1)
    return f'{result}%'

ER_reper = population_percent(ER_re,ER_total)

pc_reper = population_percent(pc_re,pc_total)

other_reper = population_percent(other_re,other_total)

#Create list to add results to final dataframe
readmitted_percent = [ER_reper, pc_reper,other_reper]
#print(readmitted_percent)

### Put results together to create dataframe

1. Create 2 dictionaries, one for total encounters and another for readmissions
2. Create list of rows with category names
3. Create 2 dataframes with respective results and column names
4. Merge dataframes on columns and indicate "Readmitted" columns
5. Format dataframe (add title, align)

In [119]:
adsource = {"Number of encounters" : ad_source_total, "% of the population": total_percent}
adsource_re = {"Number of encounters": ad_source_re, "% in group" : readmitted_percent}
rows= ["Admitted from emergency room", "Admitted because of physician/clinic referral", "Otherwise"] 
total = pd.DataFrame(adsource, columns = ["Number of encounters", "% of the population"], index = rows)
re = pd.DataFrame(adsource_re,columns = ["Number of encounters", "% in group" ], index = rows)

adsource_df = pd.concat([total,re], axis=1, keys = ['','Readmitted'])
index = adsource_df.index
index.name = "Admission source"
adsource_df.style.set_properties(**{'text-align': 'center'})

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Readmitted,Readmitted
Unnamed: 0_level_1,Number of encounters,% of the population,Number of encounters,% in group
Admission source,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Admitted from emergency room,37260,53.2%,3446,9.2%
Admitted because of physician/clinic referral,22654,32.4%,1952,8.6%
Otherwise,10059,14.4%,879,8.7%


DIfferences in results may have been due to the sorting of the encounters. We were not provided dates for these encounters so it is difficult to know whether the first encounter was selected.

# Specialty of an admitting physician

Create dataframe:
1. Find the total number of encounters meeting the criteria for each category
2. Find the total number of encounters for each category for readmissions (<30)
3. Calculate the percent total of each category
4. Calculate the percent of encounters for each category that were readmissions 

Codes for each category:

Internal medicine = InternalMedicine

Cardiology = Cardiology/ Cardiology-Pediatric

Surgery = Surgeon, Surgery-Cardiovascular, Surgery-Cardiovascular/Thoracic, Surgery-Colon&Rectal

Family/general practice = Family/GeneralPractice

Missing or unknown = missing

Other

In [120]:
def total_count(x):
    """" Counts number of encounters for each category"""
    if x["medical_specialty"] == "InternalMedicine":
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False
     
IM = final_df.apply(total_count, axis=1)
IM_total = len(IM[IM == True].index)+len(IM[IM == "re"].index)
IM_re = len(IM[IM == "re"].index)
#print(no_test_total)
#print(no_test_re)

In [121]:
def total_count(x):
    """" Counts number of encounters for each category"""
    if x["medical_specialty"] == "Cardiology" or x["medical_specialty"] == "Cardiology-Pediatric":
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False
     
car = final_df.apply(total_count, axis=1)
car_total = len(car[car == True].index)+len(car[car == "re"].index)
car_re = len(car[car == "re"].index)
#print(car_total)
#print(car_re)

In [122]:
def total_count(x):
    """" Counts number of encounters for each category"""
    if x["medical_specialty"] == "Surgeon" or x["medical_specialty"].startswith("Surgery"):
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False
     
sur = final_df.apply(total_count, axis=1)
sur_total = len(sur[sur == True].index)+len(sur[sur == "re"].index)
sur_re = len(sur[sur == "re"].index)
#print(sur_total)
#print(sur_re)

In [123]:
def total_count(x):
    """" Counts number of encounters for each category"""
    if x["medical_specialty"] == "Family/GeneralPractice":
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False
     
fam = final_df.apply(total_count, axis=1)
fam_total = len(fam[fam == True].index)+len(fam[fam == "re"].index)
fam_re = len(fam[fam == "re"].index)
# print(fam_total)
# print(fam_re)

In [124]:
def total_count(x):
    """" Counts number of encounters for each category"""
    if x["medical_specialty"] == "missing":
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False
     
mis = final_df.apply(total_count, axis=1)
mis_total = len(mis[mis == True].index)+len(mis[mis == "re"].index)
mis_re = len(mis[mis == "re"].index)
# print(mis_total)
# print(mis_re)

In [125]:
def total_count(x):
    """" Counts number of encounters for each category"""
    if x["medical_specialty"] != "Cardiology" and x["medical_specialty"] != "Cardiology-Pediatric" and x["medical_specialty"] != "Family/GeneralPractice" and x["medical_specialty"] != "InternalMedicine" and x["medical_specialty"] != "Surgeon" and not x["medical_specialty"].startswith("Surgery") and x["medical_specialty"] != "missing":
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False
     
other = final_df.apply(total_count, axis=1)
other_total = len(other[other == True].index)+len(other[other == "re"].index)
other_re = len(other[other == "re"].index)
# print(other_total)
# print(other_re)

In [126]:
#Create a list from all the values to add to the final dataframe
specialty_total = [IM_total,car_total, sur_total, fam_total, mis_total, other_total]
specialty_re = [IM_re,car_re, sur_re, fam_re, mis_re, other_re]

### Percent population

In [127]:
def population_percent(portion):
    """"Divides encounters per category by total number of encounters"""
    result = round((portion/total_encounters)*100,1)
    return f'{result}%'

IM_totalper = population_percent(IM_total)

car_totalper = population_percent(car_total)

sur_totalper = population_percent(sur_total)

fam_totalper = population_percent(fam_total)

mis_totalper = population_percent(mis_total)

other_totalper = population_percent(other_total)

#Create list to add results to final dataframe
total_percent = [IM_totalper, car_totalper, sur_totalper, fam_totalper, mis_totalper, other_totalper]
#print(total_percent)

### Readmitted population percent

In [128]:
def population_percent(portion, total):
    """"Divides readmission encounters per category by total encounters per category"""
    result = round((portion/total)*100,1)
    return f'{result}%'

IM_reper = population_percent(IM_re, IM_total)

car_reper = population_percent(car_re, car_total)

sur_reper = population_percent(sur_re, sur_total)

fam_reper = population_percent(fam_re, fam_total)

mis_reper = population_percent(mis_re, mis_total)

other_reper = population_percent(other_re, other_total)

#Create list to add results to final dataframe
readmitted_percent = [IM_reper, car_reper, sur_reper, fam_reper, mis_reper, other_reper]
#print(readmitted_percent)

### Put results together to create dataframe

1. Create 2 dictionaries, one for total encounters and another for readmissions
2. Create list of rows with category names
3. Create 2 dataframes with respective results and column names
4. Merge dataframes on columns and indicate "Readmitted" columns
5. Format dataframe (add title, align)

In [129]:
docspecialty = {"Number of encounters" : specialty_total, "% of the population": total_percent}
docspecialty_re = {"Number of encounters": specialty_re, "% in group" : readmitted_percent}
rows= ["Internal Medicine", "Cardiology", "Surgery", "Family/general practice", "Missing or unknown", "Other"] 
total = pd.DataFrame(docspecialty, columns = ["Number of encounters", "% of the population"], index = rows)
re = pd.DataFrame(docspecialty_re,columns = ["Number of encounters", "% in group" ], index = rows)

docspecialty_df = pd.concat([total,re], axis=1, keys = ['','Readmitted'])
index = docspecialty_df.index
index.name = "Specialty of the admitting physician"
docspecialty_df.style.set_properties(**{'text-align': 'center'})

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Readmitted,Readmitted
Unnamed: 0_level_1,Number of encounters,% of the population,Number of encounters,% in group
Specialty of the admitting physician,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Internal Medicine,10641,15.2%,1039,9.8%
Cardiology,4214,6.0%,303,7.2%
Surgery,3725,5.3%,297,8.0%
Family/general practice,4978,7.1%,485,9.7%
Missing or unknown,33639,48.1%,3104,9.2%
Other,12776,18.3%,1049,8.2%


Differences in results may have been due to the sorting of the encounters. We were not provided dates for these encounters so it is difficult to know whether the first encounter was selected.

# Primary diagnosis

Create dataframe:
1. Convert non-numerical values
2. Find the total number of encounters meeting the criteria for each category
2. Find the total number of encounters for each category for readmissions (<30)
3. Calculate the percent total of each category
4. Calculate the percent of encounters for each category that were readmissions 

Codes for each catergory:

Circulary system = 390-459, 785
 
Diabetes = 250.xx
 
Respiratory system = 460-519, 786
 
Digestive system = 520-579, 787
 
Injury and poisoning = 800-999
 
Musculoskeletal and connective tissue = 710-739
 
Genitourinary system = 580-629, 788
 
Neoplasms = 140-239

In [130]:
def total_count(x):
    """" Counts number of encounters for each category"""
    if x["diag_1"].startswith('250'):
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False

diabetes = final_df.apply(total_count, axis=1)
diabetes_total = len(diabetes[diabetes == True].index)+len(diabetes[diabetes == "re"].index)
diabetes_re = len(diabetes[diabetes == "re"].index)
# print(diabetes_total)
# print(diabetes_re)

In [131]:
#Replace non-numerical values to convert values to integers for range evaluation
final_df["diag_1"] = final_df.diag_1.str.replace('V','999').str.replace('E','999').str.replace('?','999')

def total_count(x):
    """" Counts number of encounters for each category"""
    # Convert values in to numbers to evaluate ranges 
    x['diag_1'] = pd.to_numeric(x['diag_1'], downcast='integer')
    if x["diag_1"] in range(390,459) or x["diag_1"] == 785:
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False

circulatory = final_df.apply(total_count, axis=1)
circulatory_total = len(circulatory[circulatory == True].index)+len(circulatory[circulatory == "re"].index)
circulatory_re = len(circulatory[circulatory == "re"].index)
# print(circulatory_total)
# print(circulatory_re)

In [132]:
def total_count(x):
    """" Counts number of encounters for each category"""
    # Convert values in to numbers to evaluate ranges
    x['diag_1'] = pd.to_numeric(x['diag_1'], downcast='integer')
    if x["diag_1"] in range(460,519) or x["diag_1"] == 786:
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False

respiratory = final_df.apply(total_count, axis=1)
respiratory_total = len(respiratory[respiratory == True].index)+len(respiratory[respiratory == "re"].index)
respiratory_re = len(respiratory[respiratory == "re"].index)
# print(respiratory_total)
# print(respiratory_re)

In [133]:
def total_count(x):
    """" Counts number of encounters for each category"""
    # Convert values in to numbers to evaluate ranges
    x['diag_1'] = pd.to_numeric(x['diag_1'], downcast='integer')
    if x["diag_1"] in range(520,579) or x["diag_1"] == 787:
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False

digestive = final_df.apply(total_count, axis=1)
digestive_total = len(digestive[digestive == True].index)+len(digestive[digestive == "re"].index)
digestive_re = len(digestive[digestive == "re"].index)
# print(digestive_total)
# print(digestive_re)

In [134]:
def total_count(x):
    """" Counts number of encounters for each category"""
    # Convert values in to numbers to evaluate ranges
    x['diag_1'] = pd.to_numeric(x['diag_1'], downcast='integer')
    if x["diag_1"] in range(800,999):
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False

ip = final_df.apply(total_count, axis=1)
ip_total = len(ip[ip == True].index)+len(ip[ip == "re"].index)
ip_re = len(ip[ip == "re"].index)
# print(ip_total)
# print(ip_re)

In [135]:
def total_count(x):
    """" Counts number of encounters for each category"""
    # Convert values in to numbers to evaluate ranges
    x['diag_1'] = pd.to_numeric(x['diag_1'], downcast='integer')
    if x["diag_1"] in range(710,739):
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False

muscle = final_df.apply(total_count, axis=1)
muscle_total = len(muscle[muscle == True].index)+len(muscle[muscle == "re"].index)
muscle_re = len(muscle[muscle == "re"].index)
# print(muscle_total)
# print(muscle_re)

In [136]:
def total_count(x):
    """" Counts number of encounters for each category"""
    # Convert values in to numbers to evaluate ranges
    x['diag_1'] = pd.to_numeric(x['diag_1'], downcast='integer')
    if x["diag_1"] in range(580,629) or x["diag_1"] == 788:
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False

geni = final_df.apply(total_count, axis=1)
geni_total = len(geni[geni == True].index)+len(geni[geni == "re"].index)
geni_re = len(geni[geni == "re"].index)
# print(geni_total)
# print(geni_re)

In [137]:
def total_count(x):
    """" Counts number of encounters for each category"""
    # Convert values in to numbers to evaluate ranges
    x['diag_1'] = pd.to_numeric(x['diag_1'], downcast='integer')
    if x["diag_1"] in range(140,239):
        if x["readmitted"] == "<30":
            return "re"
        return True
    else:
        return False

neoplasm = final_df.apply(total_count, axis=1)
neoplasm_total = len(neoplasm[neoplasm == True].index)+len(neoplasm[neoplasm == "re"].index)
neoplasm_re = len(neoplasm[neoplasm == "re"].index)
# print(neoplasm_total)
# print(neoplasm_re)

In [138]:
#Calculate percent of other by subtracting sum of all categories from total encounters

total_re = len(final_df[(final_df["readmitted"] == "<30")])
other_total = total_encounters - (circulatory_total + diabetes_total + respiratory_total + digestive_total 
                                  + ip_total +  muscle_total + geni_total + neoplasm_total)
other_re = total_re - (circulatory_re + diabetes_re + respiratory_re + digestive_re + ip_re +  muscle_re 
                       + geni_re + neoplasm_re)
# print(total_re)
# print(other_total)
# print(other_re)

In [139]:
#Create a list from all the values to add to the final dataframe
diag_total = [circulatory_total, diabetes_total, respiratory_total, digestive_total, 
                                  ip_total, muscle_total, geni_total, neoplasm_total, other_total]
diag_re = [circulatory_re, diabetes_re, respiratory_re, digestive_re, ip_re, muscle_re,
                       geni_re, neoplasm_re, other_re]

### Percent population

In [140]:
def population_percent(portion):
    """"Divides encounters per category by total number of encounters"""
    result = round((portion/total_encounters)*100,1)
    return f'{result}%'

circulatory_totalper = population_percent(circulatory_total)

diabetes_totalper = population_percent(diabetes_total)

respiratory_totalper = population_percent(respiratory_total)

digestive_totalper = population_percent(digestive_total)

ip_totalper = population_percent(ip_total)

muscle_totalper = population_percent(muscle_total)

geni_totalper = population_percent(geni_total)

neoplasm_totalper = population_percent(neoplasm_total)

other_totalper = population_percent(other_total)

#Create list to add results to final dataframe
total_percent = [circulatory_totalper, diabetes_totalper, respiratory_totalper, digestive_totalper, 
                                  ip_totalper, muscle_totalper, geni_totalper, neoplasm_totalper, other_totalper]
#print(total_percent)

### Readmitted population percent

In [141]:
def population_percent(portion, total):
    """"Divides readmission encounters per category by total encounters per category"""
    result = round((portion/total)*100,1)
    return f'{result}%'

circulatory_reper = population_percent(circulatory_re, circulatory_total)

diabetes_reper = population_percent(diabetes_re, diabetes_total)

respiratory_reper = population_percent(respiratory_re, respiratory_total)

digestive_reper = population_percent(digestive_re, digestive_total)

ip_reper = population_percent(ip_re, ip_total)

muscle_reper = population_percent(muscle_re, muscle_total)

geni_reper = population_percent(geni_re, geni_total)

neoplasm_reper = population_percent(neoplasm_re, neoplasm_total)

other_reper = population_percent(other_re, other_total)

#Create list to add results to final dataframe
readmitted_percent = [circulatory_reper, diabetes_reper, respiratory_reper, digestive_reper, 
                                  ip_reper, muscle_reper, geni_reper, neoplasm_reper, other_reper]
#print(readmitted_percent)

### Put results together to create dataframe

1. Create 2 dictionaries, one for total encounters and another for readmissions
2. Create list of rows with category names
3. Create 2 dataframes with respective results and column names
4. Merge dataframes on columns and indicate "Readmitted" columns
5. Format dataframe (add title, align)

In [142]:
pridiag_total = {"Number of encounters" : diag_total, "% of the population": total_percent}
pridiag_re = {"Number of encounters": diag_re, "% in group" : readmitted_percent}
rows= ["Adisease of the circulatory system (icd9: 390-459)", "Diabetes(icd9:250.xx)", "A disease of the respiratory system (icd9: 460-519, 786)", "Disease of the digestive system (icd9: 520-579,787)", "Injury and poisonin (icd9: 800-999)", "Diseases of the misculoskeletal system and connective tissue (icd9: 710-739)", "Diseases of the genitourinary system (icd9: 580-629, 788)", "Neoplasms (icd9: 140-239)", "Other"] 
total = pd.DataFrame(pridiag_total, columns = ["Number of encounters", "% of the population"], index = rows)
re = pd.DataFrame(pridiag_re,columns = ["Number of encounters", "% in group" ], index = rows)

pridiag_df = pd.concat([total,re], axis=1, keys = ['','Readmitted'])
index = pridiag_df.index
index.name = "Primary diagnosis"
pridiag_df.style.set_properties(**{'text-align': 'center'})

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Readmitted,Readmitted
Unnamed: 0_level_1,Number of encounters,% of the population,Number of encounters,% in group
Primary diagnosis,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Adisease of the circulatory system (icd9: 390-459),21323,30.5%,2062,9.7%
Diabetes(icd9:250.xx),5748,8.2%,524,9.1%
"A disease of the respiratory system (icd9: 460-519, 786)",9457,13.5%,689,7.3%
"Disease of the digestive system (icd9: 520-579,787)",6481,9.3%,520,8.0%
Injury and poisonin (icd9: 800-999),4677,6.7%,506,10.8%
Diseases of the misculoskeletal system and connective tissue (icd9: 710-739),4064,5.8%,341,8.4%
"Diseases of the genitourinary system (icd9: 580-629, 788)",3440,4.9%,309,9.0%
Neoplasms (icd9: 140-239),2515,3.6%,226,9.0%
Other,12268,17.5%,1100,9.0%


Differences in results may have been due to the sorting of the encounters. We were not provided dates for these encounters so it is difficult to know whether the first encounter was selected.