# U.S. Medical Insurance Costs
In this project, python fundamentals will be used to analyze medical insurance cost data from a CSV file. The goal of the project is to gain practice using basic python functions to analize realworld data. 

In [7]:
#import csv library
import csv

In [8]:
#list variables to hold csv file data for further manipulation
age = []
sex = []
bmi = []
children = []
smoker = []
region = []
charges = []

In [22]:
#define class for patient info taking the list variables populated with data from the insurance.csv file
class Patient_Info:
    
    #method to change a list of strings into a list of integers
    def make_integer (self, lst):
        for x in range(len(lst)):
            lst[x] = int(lst[x])
        return lst
    
    #method to change a list of strings into a list of floating point values
    def make_float (self, lst):
        for x in range(len(lst)):
            lst[x] = float(lst[x])
        return lst
    
    def __init__ (self, age, sex, bmi, children, smoker, region, charges):
        self.age = self.make_integer(age)
        self.sex = sex
        self.bmi = self.make_float(bmi)
        self.children = children
        self.smoker = smoker
        self.region = region
        self.charges = self.make_float(charges)
        
    #method to find the average of a list
    def find_average (self, lst):
        sum = 0
        for x in lst:
            sum += x
        average = sum/len(lst)
        return average
    
    def analyze_age (self):
        average_age = self.find_average(self.age)
        
        #a dictionary of the patients grouped into common age groups
        patients_by_age = {
            "Under 18": 0,
            "18 to 24": 0,
            "25 to 34": 0,
            "35 to 44": 0,
            "45 to 54": 0,
            "55 to 64": 0,
            "65 to 74": 0,
            "75 and older": 0
        }
        
        for patient in self.age:
            if patient < 18:
                patients_by_age["Under 18"] += 1
            elif patient >= 18 and patient < 25:
                patients_by_age["18 to 24"] += 1
            elif patient >= 25 and patient < 35:
                patients_by_age["25 to 34"] += 1
            elif patient >= 35 and patient < 45:
                patients_by_age["35 to 44"] += 1
            elif patient >= 45 and patient < 55:
                patients_by_age["45 to 54"] += 1
            elif patient >= 55 and patient < 65:
                patients_by_age["55 to 64"] += 1
            elif patient >= 65 and patient < 75:
                patients_by_age["64 to 74"] +=1
            else:
                patients_by_age["75 and older"] +=1
        
        return patients_by_age, average_age
        
    def analyze_sexes (self):
        num_male = 0
        num_female = 0
        for patient in self.sex:
            if patient == "male":
                num_male += 1
            else:
                num_female += 1
        return num_male, num_female
    
    def analyze_regions (self):
        distinct_regions = {}
        for region in self.region:
            if region not in distinct_regions.keys():
                distinct_regions[region] = 1
            else:
                distinct_regions[region] += 1
        return distinct_regions
    
    def create_patients_dict(self):
        patient_profile = []
        for x in range(len(self.age)):
            patient_profile.append({
                'age': int(self.age[x]),
                'sex': self.sex[x],
                'bmi': float(self.bmi[x]),
                'children': int(self.children[x]),
                'smoker': self.smoker[x],
                'region': self.region[x],
                'charges': float(self.charges[x])
            })
        return patient_profile

In [23]:
#read insurance csv file and copy data into predefined list variables
with open('insurance.csv', newline='') as insurance_csv:
    reader = csv.DictReader(insurance_csv)
    for row in reader:
        age.append(row['age'])
        sex.append(row['sex'])
        bmi.append(row['bmi'])
        children.append(row['children'])
        smoker.append(row['smoker'])
        region.append(row['region'])
        charges.append(row['charges'])

Now that the data has been copied from the csv file, let's look ast some simple analysis of the data.

In [24]:
patients = Patient_Info(age, sex, bmi, children, smoker, region, charges)

In [25]:
print(patients.analyze_regions())

{'southwest': 1300, 'southeast': 1456, 'northwest': 1300, 'northeast': 1296}


In [26]:
patient_dict = patients.create_patients_dict()
print(len(patient_dict))

5352


In [27]:
sexes = patients.analyze_sexes()
print("Number of males: {}\nNumber of females: {}".format(sexes[0],sexes[1]))

Number of males: 2704
Number of females: 2648


In [28]:
ages = patients.analyze_age()
for k,v in ages[0].items():
    print('{age} years old: {quantity}'.format(age = k, quantity = v))
print('Average age of patients = {}'.format(round(ages[1])))

Under 18 years old: 0
18 to 24 years old: 1112
25 to 34 years old: 1084
35 to 44 years old: 1040
45 to 54 years old: 1148
55 to 64 years old: 968
65 to 74 years old: 0
75 and older years old: 0
Average age of patients = 39
