# U.S. Medical Insurance Costs

In [63]:
import csv

In [64]:
# Save data from file

ages = []
sexes = []
bmis = []
num_of_children = []
smokers = []
regions = []
charges = []


def format_data(lst, category):
    with open("insurance.csv") as insurance_file:
        insurance_dict = csv.DictReader(insurance_file)
        for row in insurance_dict:
            lst.append(row[category])



In [65]:
format_data(ages, "age")
format_data(sexes, "sex")
format_data(bmis, "bmi")
format_data(num_of_children, "children")
format_data(smokers, "smoker")
format_data(regions, "region")
format_data(charges, "charges")

In [66]:
#create dictionary of data
data_list = [{"Age": a, "Sex": s, "BMI": b, "Number of children": c, "Smoker": sm, "Region": r, "Charges": ch} for a, s, b, c, sm, r, ch in zip(ages, sexes, bmis, num_of_children, smokers, regions, charges)]

person_number = []
number = 1
for i in ages:
    person_number.append("Person number " + str(number))
    number += 1

    
insurance_dict = {pn : dl for pn, dl in zip(person_number, data_list)}



In [67]:
# find population size
total_pop = len(ages)

print("There are " + str(total_pop) + " people in the insurance dataset.")


There are 1338 people in the insurance dataset.


In [68]:
#find out proportion of men and women.

men = 0
women = 0
sex_not_listed = 0
for sex in sexes:
    if sex == "male":
        men += 1
    elif sex == "female":
        women += 1
    else:
       sex_not_listed += 1
percent_men = round(((men / total_pop) * 100), 2)
percent_women = round(((women / total_pop) * 100), 2)
print("There are " +str(men) + " men in this dataset. \nThey form " + str(percent_men) + "% of the total population.")
print("There are " +str(women) + " women in this dataset. \nThey form " + str(percent_women) + "% of the total population.")


There are 676 men in this dataset. 
They form 50.52% of the total population.
There are 662 women in this dataset. 
They form 49.48% of the total population.


In [69]:
#average age

total_age = 0

for age in ages:
    total_age += int(age)

average_age = round(total_age / (len(ages)))

print("The average age of this dataset is " +str(average_age) + " years old.")
        

The average age of this dataset is 39 years old.


In [70]:
#organise dictionary by sex

def organise_dict_by_sex(insurance_dict):
    insurance_by_sex = dict()
    for person in insurance_dict:
        current_sex = insurance_dict[person]["Sex"]
        current_person = insurance_dict[person]
        if current_sex not in insurance_by_sex:
            insurance_by_sex[current_sex] = [current_person]
        else:
            insurance_by_sex[current_sex].append(current_person)
    return insurance_by_sex

In [71]:
insurance_by_sex = organise_dict_by_sex(insurance_dict)

In [72]:
# average cost by sex

def average_cost_by_sex(insurance_by_sex, sex):
    total_cost = 0
    position = 0
    for person in insurance_by_sex[sex]:
        current_charges = int(float(insurance_by_sex[sex][position]["Charges"]))
        total_cost += current_charges
        position += 1
    average_by_sex = round(total_cost/ len(insurance_by_sex[sex]))
    if sex == "female":
        return ("The average cost for women is " + str(average_by_sex) + " dollars.")
    elif sex== "male":
        return ("The average cost for men is " + str(average_by_sex) + " dollars.")
        

In [73]:
print(average_cost_by_sex(insurance_by_sex, "female"))

The average cost for women is 12569 dollars.


In [74]:
print(average_cost_by_sex(insurance_by_sex, "male"))

The average cost for men is 13956 dollars.


In [75]:
# organise dict by smoker


def organise_dict_by_smoker(insurance_dict):
    insurance_by_smoker= dict()
    for person in insurance_dict:
        if insurance_dict[person]["Smoker"] =="yes":
            current_smoker = "Smoker"
        elif insurance_dict[person]["Smoker"] =="no":
            current_smoker = "Non-smoker"
        current_person = insurance_dict[person]
        if current_smoker not in insurance_by_smoker:
            insurance_by_smoker[current_smoker] = [current_person]
        else:
            insurance_by_smoker[current_smoker].append(current_person)
    return insurance_by_smoker

In [83]:
insurance_by_smoker = organise_dict_by_smoker(insurance_dict)

In [84]:
# average cost by smoker

def average_cost_by_smoker(insurance_by_smoker, smoker):
    total_cost = 0
    position = 0
    for person in insurance_by_smoker[smoker]:
        current_charges = int(float(insurance_by_smoker[smoker][position]["Charges"]))
        total_cost += current_charges
        position += 1
    average_by_smoker = round(total_cost/ len(insurance_by_smoker[smoker]))
    if smoker == "Smoker":
        return ("The average cost for smokers is " + str(average_by_smoker) + " dollars.")
    elif smoker== "Non-smoker":
        return ("The average cost for non-smokers is " + str(average_by_smoker) + " dollars.")

In [85]:
print(average_cost_by_smoker(insurance_by_smoker, "Smoker"))
print(average_cost_by_smoker(insurance_by_smoker, "Non-smoker"))

The average cost for smokers is 32050 dollars.
The average cost for non-smokers is 8434 dollars.


In [86]:
#proportion of smokers per sex

def smokers_by_sex(insurance_by_sex, sex):
    total_smokers = 0
    position = 0
    for person in insurance_by_sex[sex]:
        current_smoking_status = insurance_by_sex[sex][position]["Smoker"]
        if current_smoking_status == "yes":
            total_smokers += 1
        position += 1
    if sex == "male":
        percent_male_smoker = round(((total_smokers/ men) *100), 2)
        return (str(percent_male_smoker) + "% of men in this dataset are smokers.")
    if sex == "female":
        percent_female_smoker = round(((total_smokers/ women) *100), 2)
        return (str(percent_female_smoker) + "% of women in this dataset are smokers.")  
       

In [87]:
print(smokers_by_sex(insurance_by_sex, "female"))
print(smokers_by_sex(insurance_by_sex, "male"))

17.37% of women in this dataset are smokers.
23.52% of men in this dataset are smokers.


In [93]:
# organise dict by BMI


def organise_dict_by_bmi(insurance_dict):
    insurance_by_bmi= dict()
    for person in insurance_dict:
        if float(insurance_dict[person]["BMI"]) < 18.5:
            current_bmi = "Underweight"
        elif float(insurance_dict[person]["BMI"]) >= 18.5 and float(insurance_dict[person]["BMI"]) < 25.0:
            current_bmi = "Healthy weight"
        elif float(insurance_dict[person]["BMI"]) >= 25.0 and float(insurance_dict[person]["BMI"]) < 30.0:
            current_bmi = "Overweight"
        elif float(insurance_dict[person]["BMI"]) >= 30.0:
            current_bmi = "Obese"
        current_person = insurance_dict[person]
        if current_bmi not in insurance_by_bmi:
            insurance_by_bmi[current_bmi] = [current_person]
        else:
            insurance_by_bmi[current_bmi].append(current_person)
    return insurance_by_bmi

In [94]:
insurance_by_bmi = organise_dict_by_bmi(insurance_dict)

In [97]:
# average cost by bmi

def average_cost_by_bmi(insurance_by_bmi, bmi):
    total_cost = 0
    position = 0
    for person in insurance_by_bmi[bmi]:
        current_charges = int(float(insurance_by_bmi[bmi][position]["Charges"]))
        total_cost += current_charges
        position += 1
    average_by_bmi = round(total_cost/ len(insurance_by_bmi[bmi]))
    if bmi == "Underweight":
        return ("The average cost for underweight people is " + str(average_by_bmi) + " dollars.")
    elif bmi == "Healthy weight":
        return ("The average cost for people with a healthy weight is " + str(average_by_bmi) + " dollars.")
    elif bmi == "Overweight":
        return ("The average cost for overweight people is " + str(average_by_bmi) + " dollars.")
    elif bmi == "Obese":
        return ("The average cost for obese people is " + str(average_by_bmi) + " dollars.")

In [98]:
print(average_cost_by_bmi(insurance_by_bmi, "Underweight"))
print(average_cost_by_bmi(insurance_by_bmi, "Healthy weight"))
print(average_cost_by_bmi(insurance_by_bmi, "Overweight"))
print(average_cost_by_bmi(insurance_by_bmi, "Obese"))

The average cost for underweight people is 8852 dollars.
The average cost for people with a healthy weight is 10409 dollars.
The average cost for overweight people is 10987 dollars.
The average cost for obese people is 15552 dollars.


In [110]:
#proportion of each bmi per sex

def bmi_by_sex(insurance_by_bmi, bmi):
    total_men = 0
    total_women = 0
    position = 0
    for person in insurance_by_bmi[bmi]:
        current_sex = insurance_by_bmi[bmi][position]["Sex"]
        if current_sex == "male":
            total_men += 1
        elif current_sex == "female":
            total_women += 1
        position += 1
    percent_male = round(((total_men/ men) *100), 2)
    percent_female = round(((total_women/ women) *100), 2)
    return (str(percent_male) + "% of men, and " + str(percent_female) + "% of women in this dataset are in the " + bmi + " category.")
    
    

In [111]:
print(bmi_by_sex(insurance_by_bmi, "Underweight"))
print(bmi_by_sex(insurance_by_bmi, "Healthy weight"))
print(bmi_by_sex(insurance_by_bmi, "Overweight"))
print(bmi_by_sex(insurance_by_bmi, "Obese"))

1.18% of men, and 1.81% of women in this dataset are in the Underweight category.
15.98% of men, and 17.67% of women in this dataset are in the Healthy weight category.
27.66% of men, and 30.06% of women in this dataset are in the Overweight category.
55.18% of men, and 50.45% of women in this dataset are in the Obese category.
