# U.S. Medical Insurance Costs
In this project, python fundamentals will be used to analyze medical insurance cost data from a CSV file. The goal of the project is to gain practice using basic python functions to analize realworld data. 

In [7]:
#import csv library
import csv

In [8]:
#list variables to hold csv file data for further manipulation
age = []
sex = []
bmi = []
children = []
smoker = []
region = []
charges = []

In [138]:
#define class for patient info taking the list variables populated with data from the insurance.csv file
class Patient_Info:
    
    #method to change a list of strings into a list of integers
    def make_integer (self, lst):
        for x in range(len(lst)):
            lst[x] = int(lst[x])
        return lst
    
    #method to change a list of strings into a list of floating point values
    def make_float (self, lst):
        for x in range(len(lst)):
            lst[x] = float(lst[x])
        return lst
    
    def __init__ (self, age, sex, bmi, children, smoker, region, charges):
        self.age = self.make_integer(age)
        self.sex = sex
        self.bmi = self.make_float(bmi)
        self.children = self.make_integer(children)
        self.smoker = smoker
        self.region = region
        self.charges = self.make_float(charges)
        
    #method to find the average of a list
    def find_average (self, lst):
        sum = 0
        for x in lst:
            sum += x
        average = sum/len(lst)
        return average
    
    def analyze_age (self):
        average_age = self.find_average(self.age)
        
        #a dictionary of the patients grouped into common age groups
        patients_by_age = {
            "Under 18": {'Total':0, 'BMI':0, 'Number of Children':0, 'Average charges':0},
            "18 to 24": {'Total':0, 'BMI':0, 'Number of Children':0, 'Average charges':0},
            "25 to 34": {'Total':0, 'BMI':0, 'Number of Children':0, 'Average charges':0},
            "35 to 44": {'Total':0, 'BMI':0, 'Number of Children':0, 'Average charges':0},
            "45 to 54": {'Total':0, 'BMI':0, 'Number of Children':0, 'Average charges':0},
            "55 to 64": {'Total':0, 'BMI':0, 'Number of Children':0, 'Average charges':0},
            "65 to 74": {'Total':0, 'BMI':0, 'Number of Children':0, 'Average charges':0},
            "75 and older": {'Total':0, 'BMI':0, 'Number of Children':0, 'Average charges':0}
        }
        
        for x in range(len(self.age)):
            if self.age[x] < 18:
                patients_by_age["Under 18"]['Total'] += 1
                patients_by_age["Under 18"]['BMI'] += self.bmi[x]
                patients_by_age["Under 18"]['Number of Children'] += self.children[x]
                patients_by_age["Under 18"]['Average charges'] += self.charges[x]
            elif self.age[x] >= 18 and self.age[x] < 25:
                patients_by_age["18 to 24"]['Total'] += 1
                patients_by_age["18 to 24"]['BMI'] += self.bmi[x]
                patients_by_age["18 to 24"]['Number of Children'] += self.children[x]
                patients_by_age["18 to 24"]['Average charges'] += self.charges[x]
            elif self.age[x] >= 25 and self.age[x] < 35:
                patients_by_age["25 to 34"]['Total'] += 1
                patients_by_age["25 to 34"]['BMI'] += self.bmi[x]
                patients_by_age["25 to 34"]['Number of Children'] += self.children[x]
                patients_by_age["25 to 34"]['Average charges'] += self.charges[x]
            elif self.age[x] >= 35 and self.age[x] < 45:
                patients_by_age["35 to 44"]['Total'] += 1
                patients_by_age["35 to 44"]['BMI'] += self.bmi[x]
                patients_by_age["35 to 44"]['Number of Children'] += self.children[x]
                patients_by_age["35 to 44"]['Average charges'] += self.charges[x]
            elif self.age[x] >= 45 and self.age[x] < 55:
                patients_by_age["45 to 54"]['Total'] += 1
                patients_by_age["45 to 54"]['BMI'] += self.bmi[x]
                patients_by_age["45 to 54"]['Number of Children'] += self.children[x]
                patients_by_age["45 to 54"]['Average charges'] += self.charges[x]
            elif self.age[x] >= 55 and self.age[x] < 65:
                patients_by_age["55 to 64"]['Total'] += 1
                patients_by_age["55 to 64"]['BMI'] += self.bmi[x]
                patients_by_age["55 to 64"]['Number of Children'] += self.children[x]
                patients_by_age["55 to 64"]['Average charges'] += self.charges[x]
            elif self.age[x] >= 65 and self.age[x] < 75:
                patients_by_age["65 to 74"]['Total'] += 1
                patients_by_age["65 to 74"]['BMI'] += self.bmi[x]
                patients_by_age["65 to 74"]['Number of Children'] += self.children[x]
                patients_by_age["65 to 74"]['Average charges'] += self.charges[x]
            else:
                patients_by_age["75 and older"]['Total'] += 1
                patients_by_age["75 and older"]['BMI'] += self.bmi[x]
                patients_by_age["75 and older"]['Number of Children'] += self.children[x]
                patients_by_age["75 and older"]['Average charges'] += self.charges[x]
        
        for age_group in patients_by_age.values():
            try:
                age_group['BMI'] = round(age_group['BMI'] / age_group['Total'],1)
                age_group['Number of Children'] = round(age_group['Number of Children'] / age_group['Total'],1)
                age_group['Average charges'] = round(age_group['Average charges'] / age_group['Total'],2)
            except ZeroDivisionError:
                pass
        
        return patients_by_age, average_age
        
    def analyze_sexes (self):
        num_male = 0
        num_female = 0
        for patient in self.sex:
            if patient == "male":
                num_male += 1
            else:
                num_female += 1
        return num_male, num_female
    
    def analyze_regions (self):
        distinct_regions = {}
        for region in self.region:
            if region not in distinct_regions.keys():
                distinct_regions[region] = 1
            else:
                distinct_regions[region] += 1
        return distinct_regions
    
    def create_patients_dict(self):
        patient_profile = []
        for x in range(len(self.age)):
            patient_profile.append({
                'age': int(self.age[x]),
                'sex': self.sex[x],
                'bmi': float(self.bmi[x]),
                'children': int(self.children[x]),
                'smoker': self.smoker[x],
                'region': self.region[x],
                'charges': float(self.charges[x])
            })
        return patient_profile

In [139]:
#read insurance csv file and copy data into predefined list variables
with open('insurance.csv', newline='') as insurance_csv:
    reader = csv.DictReader(insurance_csv)
    for row in reader:
        age.append(row['age'])
        sex.append(row['sex'])
        bmi.append(row['bmi'])
        children.append(row['children'])
        smoker.append(row['smoker'])
        region.append(row['region'])
        charges.append(row['charges'])

Now that the data has been copied from the csv file, let's look ast some simple analysis of the data.

In [140]:
patients = Patient_Info(age, sex, bmi, children, smoker, region, charges)

In [141]:
print(patients.analyze_regions())

{'southwest': 6500, 'southeast': 7280, 'northwest': 6500, 'northeast': 6480}


In [142]:
patient_dict = patients.create_patients_dict()
print(len(patient_dict))

26760


In [143]:
sexes = patients.analyze_sexes()
print("Number of males: {}\nNumber of females: {}".format(sexes[0],sexes[1]))

Number of males: 13520
Number of females: 13240


In [144]:
ages = patients.analyze_age()
for k,v in ages[0].items():
    print('{age} years old: {quantity}'.format(age = k, quantity = v))
print('Average age of patients = {}'.format(round(ages[1])))

Under 18 years old: {'Total': 0, 'BMI': 0, 'Number of Children': 0, 'Average charges': 0}
18 to 24 years old: {'Total': 5560, 'BMI': 30.0, 'Number of Children': 0.6, 'Average charges': 9011.34}
25 to 34 years old: {'Total': 5420, 'BMI': 30.1, 'Number of Children': 1.3, 'Average charges': 10352.39}
35 to 44 years old: {'Total': 5200, 'BMI': 30.4, 'Number of Children': 1.5, 'Average charges': 13134.17}
45 to 54 years old: {'Total': 5740, 'BMI': 31.1, 'Number of Children': 1.4, 'Average charges': 15853.93}
55 to 64 years old: {'Total': 4840, 'BMI': 31.8, 'Number of Children': 0.7, 'Average charges': 18513.28}
65 to 74 years old: {'Total': 0, 'BMI': 0, 'Number of Children': 0, 'Average charges': 0}
75 and older years old: {'Total': 0, 'BMI': 0, 'Number of Children': 0, 'Average charges': 0}
Average age of patients = 39
