# U.S. Medical Insurance Costs

In [1]:
#creates dictionary from csv file
list_of_ages = []
list_of_sexes = []
list_of_bmis = []
list_of_children = []
list_of_smokers = []
list_of_regions = []
list_of_charges = []

import csv

with open('insurance.csv', newline='') as csvfile:
    insurance_csv_dict = csv.DictReader(csvfile)
    for row in insurance_csv_dict:
        list_of_ages.append(row['age'])
        list_of_sexes.append(row['sex'])
        list_of_bmis.append(row['bmi'])
        list_of_children.append(row['children'])
        list_of_smokers.append(row['smoker'])
        list_of_regions.append(row['region'])
        list_of_charges.append(row['charges'])
        

In [2]:
#creates medical IDs for patients and converts values
medical_ID = []

num_patients = len(list_of_ages)

for i in range(num_patients):
    medical_ID.append(i+1)
    
for i in range(num_patients):
    list_of_charges[i] = int(float(list_of_charges[i]))
    list_of_children[i] = int(float(list_of_children[i]))
    list_of_bmis[i] = float(list_of_bmis[i])
    list_of_ages[i] = int(list_of_ages[i])

In [3]:
#creates dictionary with medical IDs as key and insurance information as nested dictionary values
def med_ins_charge_dict(ID, age, sex, bmi, children, smoker, region, charges):
    charges_and_details = {}
    
    for i in range(num_patients):
        charges_and_details[medical_ID[i]] = {"Age": list_of_ages[i],
                               "Sex": list_of_sexes[i],
                               "BMI": list_of_bmis[i],
                               "Children": list_of_children[i],
                               "Smoker": list_of_smokers[i],
                               "Region": list_of_regions[i],
                               "Charges": list_of_charges[i]}
    
    return charges_and_details

charges_and_details = med_ins_charge_dict(medical_ID, list_of_ages, list_of_sexes, list_of_bmis, list_of_children, list_of_smokers, list_of_regions, list_of_charges)

In [4]:
#function calculates average insurance charges for each region
def regional_average_cost(charges_and_details):
    
    northwest_costs = []
    northeast_costs = []
    southwest_costs = []
    southeast_costs = []
    for patient in charges_and_details:
        if charges_and_details[patient]['Region'] == 'northwest':
            northwest_costs.append(charges_and_details[patient]['Charges'])
        if charges_and_details[patient]['Region'] == 'northeast':
            northeast_costs.append(charges_and_details[patient]['Charges'])
        if charges_and_details[patient]['Region'] == 'southwest':
            southwest_costs.append(charges_and_details[patient]['Charges'])
        if charges_and_details[patient]['Region'] == 'southeast':
            southeast_costs.append(charges_and_details[patient]['Charges'])
    northwest_avg = int(sum(northwest_costs)/len(northwest_costs))
    northeast_avg = int(sum(northeast_costs)/len(northeast_costs))
    southwest_avg = int(sum(southwest_costs)/len(southwest_costs))
    southeast_avg = int(sum(southeast_costs)/len(southeast_costs))
    
    return northwest_avg, northeast_avg, southwest_avg, southeast_avg

northwest_avg, northeast_avg, southwest_avg, southeast_avg = regional_average_cost(charges_and_details)
print("Northwest Average Cost: " + str(northwest_avg) + " dollars\n" +
      "Northeast Average Cost: " + str(northeast_avg) + " dollars\n" +
      "Southwest Average Cost: " + str(southwest_avg) + " dollars\n" +
      "Southeast Average Cost: " + str(southeast_avg) + " dollars")

Northwest Average Cost: 12417 dollars
Northeast Average Cost: 13405 dollars
Southwest Average Cost: 12346 dollars
Southeast Average Cost: 14734 dollars


In [5]:
#creates dictionary with regions as values and adds patient insurance information as nested dictionary list
def regional_dict(charges_and_details):
    
    patients_by_region = {}
    for patient in charges_and_details:
        current_region = charges_and_details[patient]['Region']
        current_patient = charges_and_details[patient]
        if current_region not in patients_by_region:
            patients_by_region[current_region] = [current_patient]
        else:
            patients_by_region[current_region].append(current_patient)
    
    return patients_by_region

patients_by_region = regional_dict(charges_and_details)

In [6]:
#calculates average insurance charges for each region by indexing patients_by_region dictionary
southwest_pts = len(patients_by_region['southwest'])
southeast_pts = len(patients_by_region['southeast'])
northwest_pts = len(patients_by_region['northwest'])
northeast_pts = len(patients_by_region['northeast'])

southwest_cum_age = 0
southeast_cum_age = 0
northwest_cum_age = 0
northeast_cum_age = 0

for i in range(southwest_pts):
    southwest_cum_age += patients_by_region['southwest'][i]['Age']

for i in range(southeast_pts):
    southeast_cum_age += patients_by_region['southeast'][i]['Age']

for i in range(northwest_pts):
    northwest_cum_age += patients_by_region['northwest'][i]['Age']

for i in range(northeast_pts):
    northeast_cum_age += patients_by_region['northeast'][i]['Age']

southwest_avg_age = int(southwest_cum_age / southwest_pts)
southeast_avg_age = int(southeast_cum_age / southeast_pts)
northwest_avg_age = int(northwest_cum_age / northwest_pts)
northeast_avg_age = int(northeast_cum_age / northeast_pts)

print("Southwest Average Age: " + str(southwest_avg_age))
print("Southeast Average Age: " + str(southeast_avg_age))
print("Northwest Average Age: " + str(northwest_avg_age))
print("Northeast Average Age: " + str(northeast_avg_age))

Southwest Average Age: 39
Southeast Average Age: 38
Northwest Average Age: 39
Northeast Average Age: 39


In [7]:
#function that allows you to input region and children/bmi/age/charges and returns average of that metric for region
def regional_analysis(region, metric):
    regional_pts = len(patients_by_region[region])
    cumulative_regional_metric = 0
    
    for i in range(regional_pts):
        cumulative_regional_metric += patients_by_region[region][i][metric]
    
    average_regional_metric = int(cumulative_regional_metric / regional_pts)
    
    return "{region} average {metric}: ".format(region = region, metric = metric) + str(average_regional_metric)
    
print(regional_analysis('southwest', 'Age'))

southwest average Age: 39


In [8]:
print(regional_analysis('northeast', 'BMI'))

northeast average BMI: 29


In [9]:
#function that allows you to input children/bmi/age/charges and returns average of that metric for all regions
def all_region_avg(metric):
    southwest_cumulative_metric = 0
    southeast_cumulative_metric = 0
    northwest_cumulative_metric = 0
    northeast_cumulative_metric = 0

    for i in range(southwest_pts):
        southwest_cumulative_metric += patients_by_region['southwest'][i][metric]

    for i in range(southeast_pts):
        southeast_cumulative_metric += patients_by_region['southeast'][i][metric]
    
    for i in range(northwest_pts):
        northwest_cumulative_metric += patients_by_region['northwest'][i][metric]

    for i in range(northeast_pts):
        northeast_cumulative_metric += patients_by_region['northeast'][i][metric]

    southwest_avg_metric = int(southwest_cumulative_metric / southwest_pts)
    southeast_avg_metric = int(southeast_cumulative_metric / southeast_pts)
    northwest_avg_metric = int(northwest_cumulative_metric / northwest_pts)
    northeast_avg_metric = int(northeast_cumulative_metric / northeast_pts)
    
    return "Southwest average {metric}: ".format(metric = metric) + str(southwest_avg_metric) + "\nSoutheast average {metric}: ".format(metric = metric) + str(southeast_avg_metric) + "\nNorthwest average {metric}: ".format(metric = metric) + str(northwest_avg_metric) + "\nNortheast average {metric}: ".format(metric = metric) + str(northeast_avg_metric)

print(all_region_avg("Children"))

Southwest average Children: 1
Southeast average Children: 1
Northwest average Children: 1
Northeast average Children: 1


In [10]:
print(all_region_avg("BMI"))

Southwest average BMI: 30
Southeast average BMI: 33
Northwest average BMI: 29
Northeast average BMI: 29


In [11]:
print(all_region_avg("Charges"))

Southwest average Charges: 12346
Southeast average Charges: 14734
Northwest average Charges: 12417
Northeast average Charges: 13405


In [12]:
print(all_region_avg("Age"))

Southwest average Age: 39
Southeast average Age: 38
Northwest average Age: 39
Northeast average Age: 39


In [13]:
#function that returns percentage of smokers in each region
def smoker_analysis(dictionary):
    southwest_cumulative_smokers = 0
    southeast_cumulative_smokers = 0
    northwest_cumulative_smokers = 0
    northeast_cumulative_smokers = 0

    for i in range(southwest_pts):
        if patients_by_region['southwest'][i]['Smoker'] == 'yes':
            southwest_cumulative_smokers += 1
        
    for i in range(southeast_pts):
        if patients_by_region['southeast'][i]['Smoker'] == 'yes':
            southeast_cumulative_smokers += 1
    
    for i in range(northwest_pts):
        if patients_by_region['northwest'][i]['Smoker'] == 'yes':
            northwest_cumulative_smokers += 1
        
    for i in range(northeast_pts):
        if patients_by_region['northeast'][i]['Smoker'] == 'yes':
            northeast_cumulative_smokers += 1

    southwest_smoker_pct = int((southwest_cumulative_smokers / southwest_pts) * 100)
    southeast_smoker_pct = int((southeast_cumulative_smokers / southeast_pts) * 100)
    northwest_smoker_pct = int((northwest_cumulative_smokers / northwest_pts) * 100)
    northeast_smoker_pct = int((northeast_cumulative_smokers / northeast_pts) * 100)
    
    return "Southwest smoking percentage: " + str(southwest_smoker_pct) + "%" + "\nSoutheast smoking percentage: " + str(southeast_smoker_pct) + "%" + "\nNorthwest smoking percentage: " + str(northwest_smoker_pct) + "%" + "\nNortheast smoking percentage: " + str(northeast_smoker_pct) + "%"

In [14]:
print(smoker_analysis(patients_by_region))

Southwest smoking percentage: 17%
Southeast smoking percentage: 25%
Northwest smoking percentage: 17%
Northeast smoking percentage: 20%


In [15]:
#function that allows you to input metric and categorical value and returns percentage of value in each region
def categorical_percentage(metric, value):
    southwest_cumulative_value = 0
    southeast_cumulative_value = 0
    northwest_cumulative_value = 0
    northeast_cumulative_value = 0

    for i in range(southwest_pts):
        if patients_by_region['southwest'][i][metric] == value:
            southwest_cumulative_value += 1
        
    for i in range(southeast_pts):
        if patients_by_region['southeast'][i][metric] == value:
            southeast_cumulative_value += 1
    
    for i in range(northwest_pts):
        if patients_by_region['northwest'][i][metric] == value:
            northwest_cumulative_value += 1
        
    for i in range(northeast_pts):
        if patients_by_region['northeast'][i][metric] == value:
            northeast_cumulative_value += 1

    southwest_metric_pct = int((southwest_cumulative_value / southwest_pts) * 100)
    southeast_metric_pct = int((southeast_cumulative_value / southeast_pts) * 100)
    northwest_metric_pct = int((northwest_cumulative_value / northwest_pts) * 100)
    northeast_metric_pct = int((northeast_cumulative_value / northeast_pts) * 100)
    
    return "Southwest {metric} percentage - {value}: ".format(metric = metric, value = value) + str(southwest_metric_pct) + "%" + "\nSoutheast {metric} percentage - {value}: ".format(metric = metric, value = value) + str(southeast_metric_pct) + "%" + "\nNorthwest {metric} percentage - {value}: ".format(metric = metric, value = value) + str(northwest_metric_pct) + "%" + "\nNortheast {metric} percentage - {value}: ".format(metric = metric, value = value) + str(northeast_metric_pct) + "%"

In [16]:
print(categorical_percentage('Sex', 'male'))

Southwest Sex percentage - male: 50%
Southeast Sex percentage - male: 51%
Northwest Sex percentage - male: 49%
Northeast Sex percentage - male: 50%


In [17]:
print(categorical_percentage('Smoker', 'yes'))

Southwest Smoker percentage - yes: 17%
Southeast Smoker percentage - yes: 25%
Northwest Smoker percentage - yes: 17%
Northeast Smoker percentage - yes: 20%


In [18]:
#creates dictionary that assigns range of BMIs values and adds patient insurance information as nested dictionary list
def bmi_dict(charges_and_details):
    bmi_scale = {'Underweight': [],
                'Healthy Weight': [],
                'Overweight': [],
                'Obese': []}
    
    for patient in charges_and_details:
        if charges_and_details[patient]['BMI'] > 30.0:
            bmi_scale['Obese'].append(charges_and_details[patient])
        elif charges_and_details[patient]['BMI'] > 25.0:
            bmi_scale['Overweight'].append(charges_and_details[patient])
        elif charges_and_details[patient]['BMI'] > 18.5:
            bmi_scale['Healthy Weight'].append(charges_and_details[patient])
        else:
            bmi_scale['Underweight'].append(charges_and_details[patient])
            
    return bmi_scale

In [19]:
patients_by_bmi = bmi_dict(charges_and_details)

In [20]:
#function that allows you to input children/bmi/age/charges and returns average of that metric for BMI category
underweight_pts = len(patients_by_bmi['Underweight'])
healthyweight_pts = len(patients_by_bmi['Healthy Weight'])
overweight_pts = len(patients_by_bmi['Overweight'])
obese_pts = len(patients_by_bmi['Obese'])

def all_bmi_avg(metric):
    underweight_cumulative_metric = 0
    healthyweight_cumulative_metric = 0
    overweight_cumulative_metric = 0
    obese_cumulative_metric = 0

    for i in range(underweight_pts):
        underweight_cumulative_metric += patients_by_bmi['Underweight'][i][metric]

    for i in range(healthyweight_pts):
        healthyweight_cumulative_metric += patients_by_bmi['Healthy Weight'][i][metric]
    
    for i in range(overweight_pts):
        overweight_cumulative_metric += patients_by_bmi['Overweight'][i][metric]

    for i in range(obese_pts):
        obese_cumulative_metric += patients_by_bmi['Obese'][i][metric]

    underweight_avg_metric = int(underweight_cumulative_metric / underweight_pts)
    healthyweight_avg_metric = int(healthyweight_cumulative_metric / healthyweight_pts)
    overweight_avg_metric = int(overweight_cumulative_metric / overweight_pts)
    obese_avg_metric = int(obese_cumulative_metric / obese_pts)
    
    return "Underweight average {metric}: ".format(metric = metric) + str(underweight_avg_metric) + "\nHealthy Weight average {metric}: ".format(metric = metric) + str(healthyweight_avg_metric) + "\nOverweight average {metric}: ".format(metric = metric) + str(overweight_avg_metric) + "\nObese average {metric}: ".format(metric = metric) + str(obese_avg_metric)

print(all_bmi_avg("Charges"))

Underweight average Charges: 8657
Healthy Weight average Charges: 10434
Overweight average Charges: 10997
Obese average Charges: 15560


In [21]:
#creates dictionary that assigns range of ages as values and adds patient insurance information as nested dictionary list
def age_dict(charges_and_details):
    age_groups = {'Children': [],
                'Young Adults': [],
                'Middle Aged Adults': [],
                'Older Adults': [], 
                'Senior Citizens': []}
    
    for patient in charges_and_details:
        if charges_and_details[patient]['Age'] > 64:
            age_groups['Senior Citizens'].append(charges_and_details[patient])
        elif charges_and_details[patient]['Age'] > 44:
            age_groups['Older Adults'].append(charges_and_details[patient])
        elif charges_and_details[patient]['Age'] > 30:
            age_groups['Middle Aged Adults'].append(charges_and_details[patient])
        elif charges_and_details[patient]['Age'] > 17:
            age_groups['Young Adults'].append(charges_and_details[patient])
        else:
            age_groups['Children'].append(charges_and_details[patient])
            
    return age_groups

patients_by_age = age_dict(charges_and_details)

In [22]:
#function that allows you to input children/bmi/age/charges and returns average of that metric for age category
num_youngadults = len(patients_by_age['Young Adults'])
num_middleagedadults = len(patients_by_age['Middle Aged Adults'])
num_olderadults = len(patients_by_age['Older Adults'])

def all_age_avg(metric):
    youngadults_cumulative_metric = 0
    middleagedadults_cumulative_metric = 0
    olderadults_cumulative_metric = 0

    for i in range(num_youngadults):
        youngadults_cumulative_metric += patients_by_age['Young Adults'][i][metric]
    
    for i in range(num_middleagedadults):
        middleagedadults_cumulative_metric += patients_by_age['Middle Aged Adults'][i][metric]

    for i in range(num_olderadults):
        olderadults_cumulative_metric += patients_by_age['Older Adults'][i][metric]

    youngadults_avg_metric = int(youngadults_cumulative_metric / num_youngadults)
    middleagedadults_avg_metric = int(middleagedadults_cumulative_metric / num_middleagedadults)
    olderadults_avg_metric = int(olderadults_cumulative_metric / num_olderadults)
    
    return "Young Adult average {metric}: ".format(metric = metric) + str(youngadults_avg_metric) + "\nMiddle Aged Adult average {metric}: ".format(metric = metric) + str(middleagedadults_avg_metric) + "\nOlder Adult average {metric}: ".format(metric = metric) + str(olderadults_avg_metric)

print(all_age_avg("Charges"))

Young Adult average Charges: 9397
Middle Aged Adult average Charges: 12473
Older Adult average Charges: 17070


In [23]:
print(all_age_avg("BMI"))

Young Adult average BMI: 29
Middle Aged Adult average BMI: 30
Older Adult average BMI: 31
