# U.S. Medical Insurance Costs - Class Approach



Aims of the project
1. Find out the average age of the patients in the dataset.
2. Analyze where a majority of the individuals are from.
3. Look at the different costs between smokers vs. non-smokers.
4. Figure out what the average age is for someone who has at least one child in this dataset.

In [34]:
import csv

class Insurance:
    
    def __init__(self, filename):
        self.filename = filename
        self.data = {}
        self.values = {}
    
    def __repr__(self):
        return """Dataset from file '{}'.""".format(self.filename)
    
    def openFile(self):
        with open(self.filename) as dataset:
            insurance_lines = csv.DictReader(dataset)
            patient = 1
            for line in insurance_lines:
                self.data[patient] = line
                patient +=1
        self.values = self.data.values()

    #Find average age of patients in the dataset
    def average_age(self):
        age = 0
        num_of_patients = 0
        for patient in self.values:
            patient_age = patient["age"]
            age += int(patient_age)
            num_of_patients += 1
        return round(age/num_of_patients, 1)

    #function to return an array of the locations in the list with the number of patients
    def locations(self):
        areas = []
        for patient in self.values:
            current_area = patient["region"]
            areas.append(current_area)
        # create dictionary of areas to store the number of patients in each area
        areas_with_count = {}
        for area in areas:
            if areas_with_count.get(area) == None:
                count = areas.count(area)
                areas_with_count[area] = count
        return areas_with_count
    
    #function returns the name and count of the largest area in the dataset
    def largest_location(self):
        locations = self.locations()
        largest = ""
        count = 0
        for area in locations:
            if locations[area] > count:
                count = locations[area]
                largest = area
        return "The largest location in this dataset is " + largest + " with " + str(count) + " patients."
    
    #function that returns the difference in the insurance cost of smokers vs non-smokers
    def cost_of_smoking(self):
        #total costs for smokers and non smokers
        smokers_total = 0
        number_of_smokers = 0
        non_smokers_total = 0
        number_of_non_smokers = 0
        for patient in self.values:
            current_cost = float(patient["charges"])
            if patient["smoker"] == 'yes':
                smokers_total += current_cost
                number_of_smokers += 1
            elif patient["smoker"] == 'no':
                non_smokers_total += current_cost
                number_of_non_smokers += 1
        average_cost_smokers = smokers_total/number_of_smokers
        print("The average insurance cost of smokers is $" + str(round(average_cost_smokers,2)))
        average_cost_non_smokers = non_smokers_total/number_of_non_smokers
        print("The average insurance cost of non-smokers is $" + str(round(average_cost_non_smokers,2)))
        difference_in_cost = average_cost_smokers - average_cost_non_smokers
        return round(difference_in_cost,2)
    
    #Calculate the average age for a parent
    def parental_age(self):
        total_age = 0
        number_of_parents = 0
        for patient in self.values:
            current_age = int(patient["age"])
            children = int(patient["children"])
            if children > 0:
                total_age += current_age
                number_of_parents +=1
        average_age = total_age/number_of_parents
        return round(average_age, 1)

    
new_dataset = Insurance('insurance.csv')
new_dataset.openFile()
print(new_dataset)
#print(new_dataset.data)
#print(new_dataset.values)
print(new_dataset.average_age())
print(new_dataset.locations())
print(new_dataset.largest_location())
difference = new_dataset.cost_of_smoking()
print("The cost of smoking is on average $" + str(difference) + " extra in insurance cost.")
average_parental_age = new_dataset.parental_age()
print("The average age of parents in the dataset is " + str(average_parental_age))

Dataset from file 'insurance.csv'.
39.2
{'southwest': 325, 'southeast': 364, 'northwest': 325, 'northeast': 324}
The largest location in this dataset is southeast with 364 patients.
The average insurance cost of smokers is $32050.23
The average insurance cost of non-smokers is $8434.27
The cost of smoking is on average $23615.96 extra in insurance cost.
The average age of parents in the dataset is 39.8
