# U.S. Medical Insurance Costs

In [54]:
#############################
#### Descriptive Analysis ###
#############################

import csv

# Find out the average age of the patients
def find_average_age_of_patients (list):
    total = 0
    sum = 0
    for sublist in list:
        sum += int(sublist['age'])
        total += 1
    average = round(sum / total, 0)
    return print('The average age of all ('+str(total)+') patients is ' + str(average))
    

with open('insurance.csv') as insurance_file:
    insurance_list = csv.DictReader(insurance_file)
    find_average_age_of_patients(insurance_list)
    

The average age of all (1338) patients is 39.0


In [55]:
#############################
#### Descriptive Analysis ###
#############################

# Figure out what the average age is for someone who has at least one child
def find_average_age_with_at_least_one_child(list):
    # find the patient who has at least one child
    # then add the age to a sum variable
    # also increment a total variable
    # take the sum divide the total variable
    # return the average
    total = 0
    sum = 0
    for sublist in list:
        if int(sublist['children']) > 0:
            sum += int(sublist['age'])
            total += 1
    average = round(sum / total, 0)
    return print('The average age of ('+str(total)+') patients with at least one child is ' + str(average))

with open('insurance.csv') as insurance_file:
    insurance_list = csv.DictReader(insurance_file)
    find_average_age_with_at_least_one_child(insurance_list)
    

The average age of (764) patients with at least one child is 40.0


In [56]:
#############################
#### Descriptive Analysis ###
#############################

# Figure out what the average age is for someone who has no child
def find_average_age_with_no_child(list):
    total = 0
    sum = 0
    for sublist in list:
        if int(sublist['children']) == 0:
            sum += int(sublist['age'])
            total += 1
    average = round(sum / total, 0)
    return  print('The average age of ('+str(total)+') patients with no child is ' + str(average))

with open('insurance.csv') as insurance_file:
    insurance_list = csv.DictReader(insurance_file)
    find_average_age_with_no_child(insurance_list)
   

The average age of (574) patients with no child is 38.0


In [46]:
#############################
#### Descriptive Analysis ###
#############################

import csv

# Analyze where patients are from:
def find_patients_locations():
    locations = {}
    # Transfer the list of possible locations to a list
    # Count the locations list of the duplicate locations
    for region in insurance_file:
        if locations.get(region['region']) is None:
            locations[region['region']] = 1 
        else:
            locations[region['region']] += 1
    return locations
    
with open('insurance.csv') as insurance_file:
    insurance_data = csv.DictReader(insurance_file)
    locations_of_patients = find_patients_locations(insurance_file)
    for key, value in locations_of_patients.items():
        print(str(value) + ' patients are from ' + key + '.')

325 patients are from southwest.
364 patients are from southeast.
325 patients are from northwest.
324 patients are from northeast.


In [6]:
#############################
#### Exploratory Analysis ###
#############################

import csv

# Analyze whether higher bmi will cause higher charges
# Since we don't have heights or weights info from patients, use below ranges for analyzing
# < 16 = severe thinnes
# 16-17 = moderate thinnes
# 17-18.5 = mild thinnes
# 18.5-25 = normal
# 25-30 = overweight
# 30-35 = obese class 1
# 35-40 = obese class 2
# > 40 = obese class 3
# note: remember to use multiply everything by 100
import csv

def analyze_bmi_insurance_correlation(data):
    categories = {
        'severe thinness': 0,
        'moderate thinness': 0,
        'mild thinness': 0,
        'normal': 0,
        'overweight': 0,
        'obese class 1': 0,
        'obese class 2': 0,
        'obese class 3': 0
    }
    
    counts = {
        'severe thinness': 0,
        'moderate thinness': 0,
        'mild thinness': 0,
        'normal': 0,
        'overweight': 0,
        'obese class 1': 0,
        'obese class 2': 0,
        'obese class 3': 0
    }

    for sublist in data:
        bmi = float(sublist['bmi'])
        charges = float(sublist['charges'])

        if bmi < 16:
            category = 'severe thinness'
        elif 16 <= bmi < 17:
            category = 'moderate thinness'
        elif 17 <= bmi < 18.5:
            category = 'mild thinness'
        elif 18.5 <= bmi < 25:
            category = 'normal'
        elif 25 <= bmi < 30:
            category = 'overweight'
        elif 30 <= bmi < 35:
            category = 'obese class 1'
        elif 35 <= bmi < 40:
            category = 'obese class 2'
        else:
            category = 'obese class 3'
        
        categories[category] += charges
        counts[category] += 1

    for category in categories:
        if counts[category] > 0:
            average = categories[category] / counts[category]
            print(f"{category.capitalize()} BMI's average insurance cost is ${average:.2f}, with {counts[category]} patients in this range.")
        else:
            print(f"No patients in {category.capitalize()} BMI range.")

with open('insurance.csv') as insurance_file:
    insurance_data = csv.DictReader(insurance_file)
    analyze_bmi_insurance_correlation(insurance_data)
    print('From these conclusions, we can see that the outliers like severe thinness and obese bmis tend to have higher insurance costs.')


Severe thinness BMI's average insurance cost is $1694.80, with 1 patients in this range.
Moderate thinness BMI's average insurance cost is $4904.00, with 2 patients in this range.
Mild thinness BMI's average insurance cost is $9737.72, with 17 patients in this range.
Normal BMI's average insurance cost is $10409.34, with 225 patients in this range.
Overweight BMI's average insurance cost is $10987.51, with 386 patients in this range.
Obese class 1 BMI's average insurance cost is $14419.67, with 391 patients in this range.
Obese class 2 BMI's average insurance cost is $17022.26, with 225 patients in this range.
Obese class 3 BMI's average insurance cost is $16784.62, with 91 patients in this range.
From these conclusions, we can see that the outliers like severe thinness and obese bmis tend to have higher insurance costs.


In [5]:
#############################
#### Exploratory Analysis ###
#############################

import csv

# Analyze whether having children will cause higher charges
def analyze_children_insurance_correlation(data):
    categories = {
        'children': 0,
        'no children': 0
    }

    count = {
        'children': 0,
        'no children': 0
    }

    for sublist in data:
        children = int(sublist['children'])
        charges = float(sublist['charges'])

        if children > 0:
            category = 'children'
        else:
            category = 'no children'
        
        categories[category] += charges
        count[category] += 1
    
    for category in categories:
        average = categories[category] / count[category]
        print(f"{count[category]} of patients with {category} has an average insurance costs of ${average:.2f}")

with open('insurance.csv') as insurance_file:
    insurance_data = csv.DictReader(insurance_file)
    analyze_children_insurance_correlation(insurance_data)
    print('Patients with children have a slightly higher cost than patients without one.')
        


764 of patients with children has an average insurance costs of $13949.94
574 of patients with no children has an average insurance costs of $12365.98


In [None]:
#############################
#### Exploratory Analysis ###
#############################

# Analyze whether gender affects higher insurance
# Calculate the average insurance for each gender