# U.S. Medical Insurance Costs
In this project, python fundamentals will be used to analyze medical insurance cost data from a CSV file. The goal of the project is to gain practice using basic python functions to analize realworld data. 

In [76]:
#import csv library
import csv

In [78]:
#list variables to hold csv file data for further manipulation
age = []
sex = []
bmi = []
children = []
smoker = []
region = []
charges = []

In [106]:
#define class for patient info taking the list variables populated with data from the insurance.csv file
class Patient_Info:
    
    #method to change a list of strings into a list of integers
    def make_integer (self, lst):
        for x in range(len(lst)):
            lst[x] = int(lst[x])
        return lst
    
    #method to change a list of strings into a list of floating point values
    def make_float (self, lst):
        for x in range(len(lst)):
            lst[x] = float(lst[x])
        return lst
    
    def __init__ (self, age, sex, bmi, children, smoker, region, charges):
        self.age = self.make_integer(age)
        self.sex = sex
        self.bmi = self.make_float(bmi)
        self.children = self.make_integer(children)
        self.smoker = smoker
        self.region = region
        self.charges = self.make_float(charges)
        
    #method to find the average of a list
    def find_average (self, lst):
        sum = 0
        for x in lst:
            sum += x
        average = sum/len(lst)
        return average
    
    #method to update the dictionary of age groups used in the analyze by age method
    def update_age_dict(self,age_dict, age_group, x):
        age_dict[age_group]['Total'] +=1
        age_dict[age_group]['Avg BMI'] += self.bmi[x]
        age_dict[age_group]['Avg Children'] += self.children[x]
        age_dict[age_group]['Avg charges'] += self.charges[x]
        if self.sex[x] == 'male':
            age_dict[age_group]['Total male'] +=1
        else:
            age_dict[age_group]['Total female'] +=1
        if self.smoker[x] == 'yes':
            age_dict[age_group]['Pct Smoker'] += 1
        return age_dict
    
    #method to analyze insurance costs by age
    def analyze_age (self):
        average_age = self.find_average(self.age)
        
        #a dictionary of the patients grouped into common age groups
        patients_by_age = {
            "Under 18": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            "18 to 24": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            "25 to 34": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            "35 to 44": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            "45 to 54": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            "55 to 64": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            "65 to 74": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            "75 and older": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0}
        }
        
        #This goes through the complete set of insurance data and divides into the standard age groups and updates
        for x in range(len(self.age)):
            if self.age[x] < 18:
                self.update_age_dict(patients_by_age, "Under 18", x)
            elif self.age[x] >= 18 and self.age[x] < 25:
                self.update_age_dict(patients_by_age, "18 to 24", x)
            elif self.age[x] >= 25 and self.age[x] < 35:
                self.update_age_dict(patients_by_age, "25 to 34", x)
            elif self.age[x] >= 35 and self.age[x] < 45:
                self.update_age_dict(patients_by_age, "35 to 44", x)
            elif self.age[x] >= 45 and self.age[x] < 55:
                self.update_age_dict(patients_by_age, "45 to 54", x)
            elif self.age[x] >= 55 and self.age[x] < 65:
                self.update_age_dict(patients_by_age, "55 to 64", x)
            elif self.age[x] >= 65 and self.age[x] < 75:
                self.update_age_dict(patients_by_age, "65 to 74", x)
            else:
                self.update_age_dict(patients_by_age, "75 and older", x)
        
        #Convert totals in patients_by_age dictionary values to averages and percentages where applicable
        for age_group in patients_by_age.values():
            try:
                age_group['Avg BMI'] = round(age_group['Avg BMI'] / age_group['Total'],1)
                age_group['Avg Children'] = round(age_group['Avg Children'] / age_group['Total'],1)
                age_group['Avg charges'] = round(age_group['Avg charges'] / age_group['Total'],2)
                age_group['Pct Smoker'] = 100 * round(age_group['Pct Smoker'] / age_group['Total'],2)
            except ZeroDivisionError:
                pass
        
        return patients_by_age, average_age

    #method to update the dictionary of sexes used in the analyze by sex method
    def update_sex_dict(self, sex_dict, sex, x):
        sex_dict[sex]['Total'] += 1
        sex_dict[sex]['Avg Age'] += self.age[x]
        sex_dict[sex]['Avg BMI'] += self.bmi[x]
        if self.smoker[x] == 'yes':
            sex_dict[sex]['Pct Smoker'] +=1
        sex_dict[sex]['Avg Children'] += self.children[x]
        sex_dict[sex]['Avg charges'] += self.charges[x]
        return sex_dict
    
    #method to analyze insurance cost by sex
    def analyze_sexes (self):
        
        #dictionary to group information by sex
        patients_by_sex = {
            'Male': {'Total': 0, 'Avg Age': 0, 'Avg BMI': 0, 'Pct Smoker': 0, 'Avg Children': 0, 'Avg charges':0},
            'Female': {'Total': 0, 'Avg Age': 0, 'Avg BMI': 0, 'Pct Smoker': 0, 'Avg Children': 0, 'Avg charges':0}
        }
        
        #this goes through the insurance information and populates the sex dictionary by sex
        for x in range(len(self.sex)):
            if self.sex[x] == 'male':
                self.update_sex_dict(patients_by_sex, 'Male', x)
            else:
                self.update_sex_dict(patients_by_sex, 'Female', x)   
        
        #converts total amounts in dictionary to average values where applicable
        for sex in patients_by_sex.values():
            sex['Avg Age'] = round(sex['Avg Age'] / sex['Total'],1)
            sex['Avg BMI'] = round(sex['Avg BMI'] / sex['Total'],1)
            sex['Pct Smoker']= 100 * round(sex['Pct Smoker'] / sex['Total'],2)
            sex['Avg Children'] = round(sex['Avg Children'] / sex['Total'],1)
            sex['Avg charges'] = round(sex['Avg charges'] / sex['Total'],2)
        
        return patients_by_sex
    
    #method to update the dictionary of regions used in the analyze by region method
    def update_reg_dict(self, reg_dict, reg, x):
        reg_dict[reg]['Total'] += 1
        reg_dict[reg]['Avg age'] += self.age[x]
        reg_dict[reg]['Avg BMI'] += self.bmi[x]
        if self.smoker[x] == 'yes':
            reg_dict[reg]['Pct Smoker'] +=1
        reg_dict[reg]['Avg Children'] += self.children[x]
        reg_dict[reg]['Avg charges'] += self.charges[x]
        if self.sex[x] == 'male':
            reg_dict[reg]['Total male'] += 1
        else:
            reg_dict[reg]['Total female'] += 1
        
        return reg_dict
    
    #method to analyze insurance cost by region
    def analyze_regions (self):
        
        #dictionary to group information by region
        distinct_regions = {
            'Southwest' : {'Total':0, 'Avg age':0, 'Total male':0, 'Total female':0, 'Avg BMI':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            'Southeast' : {'Total':0, 'Avg age':0, 'Total male':0, 'Total female':0, 'Avg BMI':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            'Northwest' : {'Total':0, 'Avg age':0, 'Total male':0, 'Total female':0, 'Avg BMI':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            'Northeast' : {'Total':0, 'Avg age':0, 'Total male':0, 'Total female':0, 'Avg BMI':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0}
        }
        
        #this goes through the insurance information and groups by region
        for x in range(len(self.region)):
            if self.region[x] == 'southwest':
                self.update_reg_dict(distinct_regions, 'Southwest', x)
            elif self.region[x] == 'southeast':
                self.update_reg_dict(distinct_regions, 'Southeast', x)
            elif self.region[x] == 'northwest':
                self.update_reg_dict(distinct_regions, 'Northwest', x)
            else:
                self.update_reg_dict(distinct_regions, 'Northeast', x)
        
        #converts total amounts in dictionary to average values where applicable
        for region in distinct_regions.values():
            try:
                region['Avg age'] = round(region['Avg age'] / region['Total'],1)
                region['Avg BMI'] = round(region['Avg BMI'] / region['Total'],1)
                region['Pct Smoker']= 100 * round(region['Pct Smoker'] / region['Total'],2)
                region['Avg Children'] = round(region['Avg Children'] / region['Total'],1)
                region['Avg charges'] = round(region['Avg charges'] / region['Total'],2)
            except ZeroDivisionError:
                pass        
        
        return distinct_regions
    
    def create_patients_dict(self):
        patient_profile = []
        for x in range(len(self.age)):
            patient_profile.append({
                'age': int(self.age[x]),
                'sex': self.sex[x],
                'bmi': float(self.bmi[x]),
                'children': int(self.children[x]),
                'smoker': self.smoker[x],
                'region': self.region[x],
                'charges': float(self.charges[x])
            })
        return patient_profile

In [107]:
#read insurance csv file and copy data into predefined list variables
with open('insurance.csv', newline='') as insurance_csv:
    reader = csv.DictReader(insurance_csv)
    for row in reader:
        age.append(row['age'])
        sex.append(row['sex'])
        bmi.append(row['bmi'])
        children.append(row['children'])
        smoker.append(row['smoker'])
        region.append(row['region'])
        charges.append(row['charges'])

Now that the data has been copied from the csv file, let's look ast some simple analysis of the data.

In [108]:
#Instantiates the Patient_Info class
patients = Patient_Info(age, sex, bmi, children, smoker, region, charges)

In [109]:
#Prints the results of the analysis by age group
ages = patients.analyze_age()
for k,v in ages[0].items():
    print('{age} years old: {quantity}'.format(age = k, quantity = v))
print('\nAverage age of patients = {}'.format(round(ages[1])))

Under 18 years old: {'Total': 0, 'Avg BMI': 0, 'Total male': 0, 'Total female': 0, 'Pct Smoker': 0, 'Avg Children': 0, 'Avg charges': 0}
18 to 24 years old: {'Total': 1390, 'Avg BMI': 30.0, 'Total male': 720, 'Total female': 670, 'Pct Smoker': 22.0, 'Avg Children': 0.6, 'Avg charges': 9011.34}
25 to 34 years old: {'Total': 1355, 'Avg BMI': 30.1, 'Total male': 695, 'Total female': 660, 'Pct Smoker': 21.0, 'Avg Children': 1.3, 'Avg charges': 10352.39}
35 to 44 years old: {'Total': 1300, 'Avg BMI': 30.4, 'Total male': 655, 'Total female': 645, 'Pct Smoker': 23.0, 'Avg Children': 1.5, 'Avg charges': 13134.17}
45 to 54 years old: {'Total': 1435, 'Avg BMI': 31.1, 'Total male': 715, 'Total female': 720, 'Pct Smoker': 19.0, 'Avg Children': 1.4, 'Avg charges': 15853.93}
55 to 64 years old: {'Total': 1210, 'Avg BMI': 31.8, 'Total male': 595, 'Total female': 615, 'Pct Smoker': 17.0, 'Avg Children': 0.7, 'Avg charges': 18513.28}
65 to 74 years old: {'Total': 0, 'Avg BMI': 0, 'Total male': 0, 'Tota

In [110]:
#Prints the results of the analysis by sex
sexes = patients.analyze_sexes()
for k, v in sexes.items():
    print(k,v,"\n")

Male {'Total': 3380, 'Avg Age': 38.9, 'Avg BMI': 30.9, 'Pct Smoker': 24.0, 'Avg Children': 1.1, 'Avg charges': 13956.75} 

Female {'Total': 3310, 'Avg Age': 39.5, 'Avg BMI': 30.4, 'Pct Smoker': 17.0, 'Avg Children': 1.1, 'Avg charges': 12569.58} 



In [111]:
#Prints the results of the analysis by region
regions = patients.analyze_regions()
for k, v in regions.items():
    print(k,v,"\n")

Southwest {'Total': 1625, 'Avg age': 39.5, 'Total male': 815, 'Total female': 810, 'Avg BMI': 30.6, 'Pct Smoker': 18.0, 'Avg Children': 1.1, 'Avg charges': 12346.94} 

Southeast {'Total': 1820, 'Avg age': 38.9, 'Total male': 945, 'Total female': 875, 'Avg BMI': 33.4, 'Pct Smoker': 25.0, 'Avg Children': 1.0, 'Avg charges': 14735.41} 

Northwest {'Total': 1625, 'Avg age': 39.2, 'Total male': 805, 'Total female': 820, 'Avg BMI': 29.2, 'Pct Smoker': 18.0, 'Avg Children': 1.1, 'Avg charges': 12417.58} 

Northeast {'Total': 1620, 'Avg age': 39.3, 'Total male': 815, 'Total female': 805, 'Avg BMI': 29.2, 'Pct Smoker': 21.0, 'Avg Children': 1.0, 'Avg charges': 13406.38} 



In [None]:
#Prints the results of the anaysis by BMI

In [112]:
#Creates the dictionary containing all of the patient information
patient_dict = patients.create_patients_dict()

6690
