# U.S. Medical Insurance Costs

In [2]:
import csv

In [77]:
age = []
sex = []
bmi = []
children = []
has_children = []
smoker = []
region = []
charges = []

with open('insurance.csv') as insurance_csv:
    insurance_dict = csv.DictReader(insurance_csv)
    for row in insurance_dict:
        age.append(int(row["age"]))
        sex.append(row["sex"])
        bmi.append(float(row["bmi"]))
        children.append(int(row["children"]))
        if row["children"] == '0':
            has_children.append(False)
        else:
            has_children.append(True)
        if row["smoker"] == "yes":
            smoker.append(True)
        else:
            smoker.append(False)
        region.append(row["region"])
        charges.append(float(row["charges"]))

#print(charges)


In [19]:
def calculate_average(data_list):
    return sum(data_list)/len(data_list)

In [55]:
def construct_dict(data_list):
    new_dict = {}
    for item in data_list:
        if item not in new_dict:
            new_dict[item] = 1
        else:
            new_dict[item] += 1
    
    return new_dict

In [71]:
def percentage_dict(count_dict):
    percentage_dict = {}
    total_count = sum(count_dict.values())
    #print(total_count)
    for key, value in count_dict.items():
        percentage_dict[key] = value / total_count
    
    return percentage_dict

In [86]:
def two_col_dict(list_one, list_two):
    new_dict = {}
    for a, b in zip(list_one, list_two):
        if a not in new_dict:
            new_dict[a] = [b]
        else:
            new_dict[a].append(b)
    
    return new_dict
            

In [92]:
def average_charges_breakdown(col_charges_dict):
    for key, value in col_charges_dict.items():
        print("{}: ${}".format(key, calculate_average(value)))

In [61]:
#Data overview

print("Total number of rows is {}".format(len(age)))

Total number of rows is 1338


In [32]:
#Age analysis

#print(age)
#print(type(age[0]))

average_age = calculate_average(age)
print("Average age is {}".format(average_age))

Average age is 39.20702541106129


In [31]:
#BMI analysis

average_bmi = calculate_average(bmi)
print("Average BMI is {}".format(average_bmi))

Average BMI is 30.663396860986538


In [96]:
#Children analysis

average_children = calculate_average(children)
print("Average # of children is {}".format(average_children))

children_dict = construct_dict(has_children)
print(children_dict)
print(percentage_dict(children_dict))

children_charges_dict = two_col_dict(children, charges)
#print(children_charges_dict)

print("Average charges according to number of children:")
average_charges_breakdown(children_charges_dict)
print("Average charges, no children vs. having children:")
average_charges_breakdown(two_col_dict(has_children, charges))


Average # of children is 1.0949177877429
{False: 574, True: 764}
{False: 0.4289985052316891, True: 0.5710014947683109}
Average charges according to number of children:
0: $12365.975601635882
1: $12731.171831635793
3: $15355.31836681528
2: $15073.563733958328
5: $8786.035247222222
4: $13850.656311199999
Average charges, no children vs. having children:
False: $12365.975601635882
True: $13949.94109348167


In [37]:
#Charges analysis

average_charges = calculate_average(charges)
print("Average charges are ${}".format(round(average_charges,2)))

Average charges are $13270.42


In [72]:
#Sex breakdown

sex_dict = construct_dict(sex)
print("Sex breakdown:")
print(sex_dict)

print(percentage_dict(sex_dict))


Sex breakdown:
{'female': 662, 'male': 676}
{'female': 0.4947683109118087, 'male': 0.5052316890881914}


In [74]:
#Smoker analysis

smoker_dict = construct_dict(smoker)
print("Smoker breakdown:")
print(smoker_dict)
print(percentage_dict(smoker_dict))

smoker_charges = [charge for is_smoker, charge in zip(smoker, charges) if is_smoker]
nonsmoker_charges = [charge for is_smoker, charge in zip(smoker, charges) if not is_smoker]

print("Average charges for smokers: ${}".format(calculate_average(smoker_charges)))
print("Average charges for non-smokers: ${}".format(calculate_average(nonsmoker_charges)))

Smoker breakdown:
{True: 274, False: 1064}
{True: 0.20478325859491778, False: 0.7952167414050823}
Average charges for smokers: $32050.23183153285
Average charges for non-smokers: $8434.268297856199


In [91]:
#Region analysis

region_dict = construct_dict(region)
print("Region breakdown:")
print(region_dict)
print(percentage_dict(region_dict))


print("Average charges by region:")
region_charges_dict = two_col_dict(region, charges)

#print(region_charges_dict)
average_charges_breakdown(region_charges_dict)


Region breakdown:
{'southwest': 325, 'southeast': 364, 'northwest': 325, 'northeast': 324}
{'southwest': 0.2428998505231689, 'southeast': 0.27204783258594917, 'northwest': 0.2428998505231689, 'northeast': 0.242152466367713}
Average charges by region:
southwest: $12346.93737729231
southeast: $14735.411437609895
northwest: $12417.575373969228
northeast: $13406.3845163858
