# U.S. Medical Insurance Costs
In this project, python fundamentals will be used to analyze medical insurance cost data from a CSV file. The goal of the project is to gain practice using basic python functions to analize realworld data. 

In [31]:
#import csv library
import csv

In [32]:
#list variables to hold csv file data for further manipulation
age = []
sex = []
bmi = []
children = []
smoker = []
region = []
charges = []

In [33]:
#define class for patient info taking the list variables populated with data from the insurance.csv file
class Patient_Info:
    
    #method to change a list of strings into a list of integers
    def make_integer (self, lst):
        for x in range(len(lst)):
            lst[x] = int(lst[x])
        return lst
    
    #method to change a list of strings into a list of floating point values
    def make_float (self, lst):
        for x in range(len(lst)):
            lst[x] = float(lst[x])
        return lst
    
    def __init__ (self, age, sex, bmi, children, smoker, region, charges):
        self.age = self.make_integer(age)
        self.sex = sex
        self.bmi = self.make_float(bmi)
        self.children = self.make_integer(children)
        self.smoker = smoker
        self.region = region
        self.charges = self.make_float(charges)
        
    #method to find the average of a list
    def find_average (self, lst):
        sum = 0
        for x in lst:
            sum += x
        average = sum/len(lst)
        return average
    
    #method to update the dictionary of age groups used in the analyze by age method
    def update_age_dict(self,age_dict, age_group, x):
        age_dict[age_group]['Total'] +=1
        age_dict[age_group]['Avg BMI'] += self.bmi[x]
        age_dict[age_group]['Avg Children'] += self.children[x]
        age_dict[age_group]['Avg charges'] += self.charges[x]
        if self.sex[x] == 'male':
            age_dict[age_group]['Total male'] +=1
        else:
            age_dict[age_group]['Total female'] +=1
        if self.smoker[x] == 'yes':
            age_dict[age_group]['Pct Smoker'] += 1
        return age_dict
    
    #method to analyze insurance costs by age
    def analyze_age (self):
        average_age = self.find_average(self.age)
        
        #a dictionary of the patients grouped into common age groups
        patients_by_age = {
            "Under 18": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            "18 to 24": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            "25 to 34": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            "35 to 44": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            "45 to 54": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            "55 to 64": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            "65 to 74": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0},
            "75 and older": {'Total':0, 'Avg BMI':0, 'Total male':0, 'Total female':0, 'Pct Smoker':0, 'Avg Children':0, 'Avg charges':0}
        }
        
        #This goes through the complete set of insurance data and divides into the standard age groups and updates
        for x in range(len(self.age)):
            if self.age[x] < 18:
                self.update_age_dict(patients_by_age, "Under 18", x)
            elif self.age[x] >= 18 and self.age[x] < 25:
                self.update_age_dict(patients_by_age, "18 to 24", x)
            elif self.age[x] >= 25 and self.age[x] < 35:
                self.update_age_dict(patients_by_age, "25 to 34", x)
            elif self.age[x] >= 35 and self.age[x] < 45:
                self.update_age_dict(patients_by_age, "35 to 44", x)
            elif self.age[x] >= 45 and self.age[x] < 55:
                self.update_age_dict(patients_by_age, "45 to 54", x)
            elif self.age[x] >= 55 and self.age[x] < 65:
                self.update_age_dict(patients_by_age, "55 to 64", x)
            elif self.age[x] >= 65 and self.age[x] < 75:
                self.update_age_dict(patients_by_age, "65 to 74", x)
            else:
                self.update_age_dict(patients_by_age, "75 and older", x)
        
        #Convert totals in patients_by_age dictionary values to averages and percentages where applicable
        for age_group in patients_by_age.values():
            try:
                age_group['Avg BMI'] = round(age_group['Avg BMI'] / age_group['Total'],1)
                age_group['Avg Children'] = round(age_group['Avg Children'] / age_group['Total'],1)
                age_group['Avg charges'] = round(age_group['Avg charges'] / age_group['Total'],2)
                age_group['Pct Smoker'] = 100 * round(age_group['Pct Smoker'] / age_group['Total'],2)
            except ZeroDivisionError:
                pass
        
        return patients_by_age, average_age
        
    def analyze_sexes (self):
        num_male = 0
        num_female = 0
        for patient in self.sex:
            if patient == "male":
                num_male += 1
            else:
                num_female += 1
        return num_male, num_female
    
    def analyze_regions (self):
        distinct_regions = {}
        for region in self.region:
            if region not in distinct_regions.keys():
                distinct_regions[region] = 1
            else:
                distinct_regions[region] += 1
        return distinct_regions
    
    def create_patients_dict(self):
        patient_profile = []
        for x in range(len(self.age)):
            patient_profile.append({
                'age': int(self.age[x]),
                'sex': self.sex[x],
                'bmi': float(self.bmi[x]),
                'children': int(self.children[x]),
                'smoker': self.smoker[x],
                'region': self.region[x],
                'charges': float(self.charges[x])
            })
        return patient_profile

In [34]:
#read insurance csv file and copy data into predefined list variables
with open('insurance.csv', newline='') as insurance_csv:
    reader = csv.DictReader(insurance_csv)
    for row in reader:
        age.append(row['age'])
        sex.append(row['sex'])
        bmi.append(row['bmi'])
        children.append(row['children'])
        smoker.append(row['smoker'])
        region.append(row['region'])
        charges.append(row['charges'])

Now that the data has been copied from the csv file, let's look ast some simple analysis of the data.

In [35]:
#Instantiates the Patient_Info class
patients = Patient_Info(age, sex, bmi, children, smoker, region, charges)

In [36]:
#Prints the results of the analysis by region
print(patients.analyze_regions())

{'southwest': 325, 'southeast': 364, 'northwest': 325, 'northeast': 324}


In [37]:
#Creates the dictionary containing all of the patient information
patient_dict = patients.create_patients_dict()
print(len(patient_dict))

1338


In [38]:
#Prints the results of the analysis by sex
sexes = patients.analyze_sexes()
print("Number of males: {}\nNumber of females: {}".format(sexes[0],sexes[1]))

Number of males: 676
Number of females: 662


In [40]:
#Prints the results of the analysis by age group
ages = patients.analyze_age()
for k,v in ages[0].items():
    print('{age} years old: {quantity}'.format(age = k, quantity = v))
print('\nAverage age of patients = {}'.format(round(ages[1])))

Under 18 years old: {'Total': 0, 'Avg BMI': 0, 'Total male': 0, 'Total female': 0, 'Pct Smoker': 0, 'Avg Children': 0, 'Avg charges': 0}
18 to 24 years old: {'Total': 278, 'Avg BMI': 30.0, 'Total male': 144, 'Total female': 134, 'Pct Smoker': 22.0, 'Avg Children': 0.6, 'Avg charges': 9011.34}
25 to 34 years old: {'Total': 271, 'Avg BMI': 30.1, 'Total male': 139, 'Total female': 132, 'Pct Smoker': 21.0, 'Avg Children': 1.3, 'Avg charges': 10352.39}
35 to 44 years old: {'Total': 260, 'Avg BMI': 30.4, 'Total male': 131, 'Total female': 129, 'Pct Smoker': 23.0, 'Avg Children': 1.5, 'Avg charges': 13134.17}
45 to 54 years old: {'Total': 287, 'Avg BMI': 31.1, 'Total male': 143, 'Total female': 144, 'Pct Smoker': 19.0, 'Avg Children': 1.4, 'Avg charges': 15853.93}
55 to 64 years old: {'Total': 242, 'Avg BMI': 31.8, 'Total male': 119, 'Total female': 123, 'Pct Smoker': 17.0, 'Avg Children': 0.7, 'Avg charges': 18513.28}
65 to 74 years old: {'Total': 0, 'Avg BMI': 0, 'Total male': 0, 'Total fem