# U.S. Medical Insurance Costs

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
insurance_data = pd.read_csv("insurance.csv")
insurance_data

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.900,0,yes,southwest,16884.92400
1,18,male,33.770,1,no,southeast,1725.55230
2,28,male,33.000,3,no,southeast,4449.46200
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.880,0,no,northwest,3866.85520
...,...,...,...,...,...,...,...
1333,50,male,30.970,3,no,northwest,10600.54830
1334,18,female,31.920,0,no,northeast,2205.98080
1335,18,female,36.850,0,no,southeast,1629.83350
1336,21,female,25.800,0,no,southwest,2007.94500


# values for men and women in dataset

In [3]:
men = len(insurance_data["sex"][insurance_data["sex"] == "male"].values)
women = len(insurance_data["sex"][insurance_data["sex"] == "female"].values)
print("There are {} men and {} women in our database".format(men, women))

There are 676 men and 662 women in our database


# average bmi for men and women

In [5]:
bmi_male = insurance_data["bmi"][insurance_data["sex"] == "male"].values
average_bmi_male = np.mean(bmi_male)
bmi_female = insurance_data["bmi"][insurance_data["sex"] == "female"].values
average_bmi_female = np.mean(bmi_female)
print("The average BMI value for men is {male} and for women it is {female}".format(male=round(average_bmi_male,2),female=round(average_bmi_female,2)))


The average BMI value for men is 30.94 and for women it is 30.38


# # average bmi by age

In [6]:
bmi_age = {"15:20":0,"20:30":0,"30:40":0,"40:50":0,"50:60":0, ">60":0}
for ages in bmi_age:
    if ages != ">60":
        bmi_age[ages] = sum(insurance_data["bmi"][(insurance_data["age"] > int(ages[:2])) & (insurance_data["age"] <= int(ages[3:]))].values)/len(insurance_data["bmi"][(insurance_data["age"] > int(ages[:2])) & (insurance_data["age"] <= int(ages[3:]))].values)
        print("From age {age1} to {age2} people have an average BMI value of {bmi_age}".format(age1=ages[:2], age2=ages[3:], bmi_age=bmi_age[ages]))
    else:
        bmi_age[ages] = sum(insurance_data["bmi"][insurance_data["age"] > 60].values)/len(insurance_data["bmi"][insurance_data["age"] > 60].values)
        print("For people over 60 years old the average BMI value is", bmi_age[ages])

From age 15 to 20 people have an average BMI value of 30.087018072289162
From age 20 to 30 people have an average BMI value of 29.773579136690657
From age 30 to 40 people have an average BMI value of 30.39986381322956
From age 40 to 50 people have an average BMI value of 30.808078291814972
From age 50 to 60 people have an average BMI value of 31.449377358490583
For people over 60 years old the average BMI value is 32.44181318681318


In [7]:
bmi_ages_categories = {"15:20":0,"20:30":0,"30:40":0,"40:50":0,"50:60":0, ">60":0}
for ages in bmi_ages_categories:
    if ages != ">60":
        categories = {"underweight":0,"healthy weight":0,"overweight":0,"obese":0}
        categories["underweight"] = len(insurance_data["bmi"][(insurance_data["age"] > int(ages[:2])) & (insurance_data["age"] <= int(ages[3:])) & (insurance_data["bmi"] < 18.5)].values)
        categories["healthy weight"] = len(insurance_data["bmi"][(insurance_data["age"] > int(ages[:2])) & (insurance_data["age"] <= int(ages[3:])) & (insurance_data["bmi"] >= 18.5) & (insurance_data["bmi"] < 25)].values)
        categories["overweight"] = len(insurance_data["bmi"][(insurance_data["age"] > int(ages[:2])) & (insurance_data["age"] <= int(ages[3:])) & (insurance_data["bmi"] >= 25) & (insurance_data["bmi"] < 30)].values)
        categories["obese"] = len(insurance_data["bmi"][(insurance_data["age"] > int(ages[:2])) & (insurance_data["age"] <= int(ages[3:])) & (insurance_data["bmi"] >= 30)].values)
    else:
        categories = {"underweight":0,"healthy weight":0,"overweight":0,"obese":0}
        categories["underweight"] = len(insurance_data["bmi"][(insurance_data["age"] > 60) & (insurance_data["bmi"] < 18.5)].values)
        categories["healthy weight"] = len(insurance_data["bmi"][(insurance_data["age"] > 60) & (insurance_data["bmi"] >= 18.5) & (insurance_data["bmi"] < 25)].values)
        categories["overweight"] = len(insurance_data["bmi"][(insurance_data["age"] > 60) & (insurance_data["bmi"] >= 25) & (insurance_data["bmi"] < 30)].values)
        categories["obese"] = len(insurance_data["bmi"][(insurance_data["age"] > 60) & (insurance_data["bmi"] >= 30)].values)
    bmi_ages_categories[ages] = categories

In [13]:
# set width of bar
barWidth = 0.20
fig = plt.subplots(figsize =(12, 8))

underweight = [bmi_ages_categories[ages]["underweight"] for ages in bmi_ages_categories]
healthy_weight = [bmi_ages_categories[ages]["healthy weight"] for ages in bmi_ages_categories]
overweight = [bmi_ages_categories[ages]["overweight"] for ages in bmi_ages_categories]
obese = [bmi_ages_categories[ages]["obese"] for ages in bmi_ages_categories]
# Set position of bar on X axis
br1 = np.arange(6)
br2 = [x + barWidth for x in br1]
br3 = [x + barWidth for x in br2]
br4 = [x + barWidth for x in br3]
 
# Make the plot
plt.bar(br1, underweight, color="palegreen" ,width = barWidth,
        edgecolor ='grey', label ='underweight')
plt.bar(br2, healthy_weight,color="limegreen", width = barWidth,
        edgecolor ='grey', label ="healthy weight")
plt.bar(br3, overweight,color="green", width = barWidth,
        edgecolor ='grey', label ='overweight')
plt.bar(br4, obese, color="darkgreen",width = barWidth,
        edgecolor ='grey', label ='obese')

 
# Adding Xticks
plt.xlabel('Ages', fontweight ='bold', fontsize = 15)
plt.ylabel('Number of people', fontweight ='bold', fontsize = 15)
plt.xticks([r + barWidth for r in range(6)],
        ["["+ages+"]" for ages in bmi_ages_categories])
 
plt.legend()
plt.show()

NameError: name 'underweight' is not defined