# U.S. Medical Insurance Costs

In [1]:
import csv

In [2]:
# list variables
age = []
sex = []
bmi = []
children = []
smoker = []
region = []
charges = []

In [3]:
# load_data function
def load_data(lst, csv_file, column_name):
    with open(csv_file, newline='') as csv_info:
        csv_dict = csv.DictReader(csv_info)
        for row in csv_dict:
            lst.append(row[column_name])
        return lst


In [4]:
load_data(age, 'insurance.csv', 'age');

In [5]:
load_data(sex, 'insurance.csv', 'sex');

In [6]:
load_data(bmi, 'insurance.csv', 'bmi');

In [7]:
load_data(children, 'insurance.csv', 'children');

In [8]:
load_data(smoker, 'insurance.csv', 'smoker');

In [9]:
load_data(region, 'insurance.csv', 'region');

In [10]:
load_data(charges, 'insurance.csv', 'charges');

In [11]:
# Class
class InsurerData:
    def __init__(self, insurer_age, insurer_sex, insurer_bmi, insurer_children, insurer_smoker, insurer_region, insurer_charges):
        self.insurer_age = insurer_age
        self.insurer_sex = insurer_sex
        self.insurer_bmi = insurer_bmi
        self.insurer_children = insurer_children
        self.insurer_smoker = insurer_smoker
        self.insurer_region = insurer_region
        self.insurer_charges = insurer_charges
    
    def avg_insurance_cost(self):
        total_cost = 0
        for cost in self.insurer_charges:
            total_cost += float(cost)
        total_cost /= len(self.insurer_charges)
        return round(total_cost, 2)
    
    def avg_age(self):
        avg_age = 0
        for age in self.insurer_age:
            avg_age += float(age)
        avg_age /= len(self.insurer_age)
        return round(avg_age)
    
    def largest_region(self):
        region_dict = {}
        for region in self.insurer_region:
            if region in region_dict:
                region_dict[region] += 1
            else:
                region_dict[region] = 1
        # return largest region_dict
        max_region = max(region_dict, key=region_dict.get)
        max_count = region_dict[max_region]
        return max_region, max_count
    
    def num_smokers(self):
        total_smokers = 0
        for smoker in self.insurer_smoker:
            if smoker == 'yes':
                total_smokers += 1
        return total_smokers
    
    def smoker_cost(self):
        cost_sum_smoker = 0
        count_smoker = 0
        cost_sum_non_smoker = 0
        count_non_smoker = 0
        for n in range(len(self.insurer_smoker)):
            if self.insurer_smoker[n] == 'yes':
                cost_sum_smoker += float(self.insurer_charges[n])
                count_smoker += 1
            else:
                cost_sum_non_smoker += float(self.insurer_charges[n])
                count_non_smoker += 1
        smoker_cost = cost_sum_smoker / count_smoker
        non_smoker_cost = cost_sum_non_smoker / count_non_smoker
        return round(smoker_cost, 2), round(non_smoker_cost, 2)
    
    def have_kids_age(self):
        counter = 0
        sum_ages = 0
        for n in range(len(self.insurer_children)):
            if int(self.insurer_children[n]) >= 1:
                counter += 1
                sum_ages += int(self.insurer_age[n])
        return round(sum_ages / counter)
    
    def sex_cost(self):
        cost_female = 0
        count_female = 0
        cost_male = 0
        count_male = 0
        for n in range(len(self.insurer_sex)):
            if self.insurer_sex[n] == 'female':
                cost_female += float(self.insurer_charges[n])
                count_female += 1
            elif self.insurer_sex[n] == 'male':
                cost_male += float(self.insurer_charges[n])
                count_male += 1
            else:
                pass
        female_cost = cost_female / count_female
        male_cost = cost_male / count_male
        return round(female_cost, 2), round(male_cost, 2)
    
    def group_by_age_cost(self):
        age_cost = list(zip(self.insurer_age, self.insurer_charges))
        cost_young, cost_adult, cost_elder = 0, 0, 0
        count_young, count_adult, count_elder = 0, 0, 0
        for patient in age_cost:
            if int(patient[0]) < 21:
                cost_young += float(patient[1])
                count_young += 1
            elif int(patient[0]) < 60:
                cost_adult += float(patient[1])
                count_adult += 1
            elif int(patient[0]) >= 60:
                cost_elder += float(patient[1])
                count_elder += 1
            else:
                pass
        cost_young = cost_young / count_young
        cost_adult = cost_adult / count_adult
        cost_elder = cost_elder / count_elder
        return round(cost_young, 2), round(cost_adult, 2), round(cost_elder, 2)
    

In [12]:
insurer_data = InsurerData(age, sex, bmi, children, smoker, region, charges)

In [13]:
import numpy as np
charges_f = []
for charge in charges:
    charges_f.append(float(charge))
round(np.mean(charges_f), 2)

13270.42

In [14]:
# average insurance cost
avg_cost = insurer_data.avg_insurance_cost()
print(f'The average insurance cost is {avg_cost} for this record.')

The average insurance cost is 13270.42 for this record.


In [15]:
# average age in the dataset
avg_age = insurer_data.avg_age()
print(f'The average age in the recorded insurance data is {avg_age} y.o.')

The average age in the recorded insurance data is 39 y.o.


In [16]:
# largest region of insurers
larg_region, larg_count = insurer_data.largest_region()
print(f'The largest group of patients according the region is \'{larg_region}\' with {larg_count} individuals registered.')

The largest group of patients according the region is 'southeast' with 364 individuals registered.


In [17]:
# total count of smokers in dataset
total_smokers = insurer_data.num_smokers()
print(f'There are {total_smokers} smokers in the record.')

There are 274 smokers in the record.


In [18]:
# smoker cost vs. non-smoker cost
smoker_cost, non_smoker_cost = insurer_data.smoker_cost()
print(f'The average insurance cost for smokers is $ {smoker_cost} dollars.')
print(f'The average insurance cost for non-smokers is $ {non_smoker_cost} dollars.')
# does smokers insurance charges higher than the non-smoker insurers?
print(f'The insurance cost for smokers is about ' + str(smoker_cost - non_smoker_cost) + f' dollars higher than non-smokers cost in average.')

The average insurance cost for smokers is $ 32050.23 dollars.
The average insurance cost for non-smokers is $ 8434.27 dollars.
The insurance cost for smokers is about 23615.96 dollars higher than non-smokers cost in average.


In [19]:
# Figure out what the average age is for someone who has at least one child in this dataset.
avg_have_kids_age = insurer_data.have_kids_age()
print(f'The average age for insurers to have at least one child is {avg_have_kids_age} y.o.')

The average age for insurers to have at least one child is 40 y.o.


In [20]:
# compare the sex insurers versus their insurance charges, see if there are differences between women and men
female_cost, male_cost = insurer_data.sex_cost()
print(f'The average insurance cost for female is $ {female_cost} dollars.')
print(f'The average insurance cost for male is $ {male_cost} dollars.')
print(f'The average female insurance cost ' + str(male_cost - female_cost) + f' dollars lower than male insurance.')

The average insurance cost for female is $ 12569.58 dollars.
The average insurance cost for male is $ 13956.75 dollars.
The average female insurance cost 1387.17 dollars lower than male insurance.


In [21]:
# compare insurers ages and analyze if there are differences with higher ages groups versus younger
young_cost, adult_cost, elder_cost = insurer_data.group_by_age_cost()
print(f'The average insurance cost for teenagers is $ {young_cost} dollars.')
print(f'The average insurance cost for adults is $ {adult_cost} dollars. ' + str(
    round(((adult_cost / young_cost) - 1) * 100)) + '% higher than teenagers cost.')
print(f'The average insurance cost for elders is $ {elder_cost} dollars. ' + str(
    round(((elder_cost / adult_cost) - 1) * 100)) + '% higher than adults cost.')
print(f'Teenagers insurance cost is about ' + str(round((young_cost / elder_cost) * 100)) \
      + '% of elder\'s cost. And Adults insurance cost is about ' \
      + str(round((adult_cost / elder_cost) * 100)) + '% of elder\'s cost.')

The average insurance cost for teenagers is $ 8713.48 dollars.
The average insurance cost for adults is $ 13125.82 dollars. 51% higher than teenagers cost.
The average insurance cost for elders is $ 21248.02 dollars. 62% higher than adults cost.
Teenagers insurance cost is about 41% of elder's cost. And Adults insurance cost is about 62% of elder's cost.
