In [None]:
# pylint: disable= pointless-string-statement

# U.S. Medical Insurance Costs

## Project Overview
For this project, you will be investigating a medical insurance costs dataset in a .csv file,  
using the Python skills that you have developed.  
This dataset and its parameters will seem familiar if you have,  
done any of the previous Python projects in the data science path

## Project Goals:
Work locally on your own computer  
Import a dataset into your program  
Analyze a dataset by building out functions or class methods  
Use libraries to assist in your analysis  
Optional: Document and organize your findings  
Optional: Make predictions about a dataset’s features based on your findings  

## Step 1. Look at the data in *insurance.csv*

In [None]:
# Import csv library
import csv

# Read the insurance.csv file
# with open("insurance.csv", newline="") as insurance_obj:
#     insurance_reader = csv.DictReader(insurance_obj)
#     for row in insurance_reader:
#         print(row)

Data includes the age, sex, bmi, number of children, smoker status, region, and charges of patients.
Data is stored in a .csv file, using delim = ",".

## Step 2. Define the scope of the analysis  
Potential Questions:
* What is the average age of the patients?
* What is the average cost (charge) of the patients?
* What is the ratio between the sexes of the patients?
* How does the different variables affect the charge within the dataset?
    * Is there a difference in average charge when grouped by:
        * Sexes?
        * Smoker status?
        * Number of children?
        * NB. These variables are likely confounded!
    * Is there a difference in average BMI when grouped by:
        * Sexes?
        * Smoker status?
        * Number of children?
        * NB. These variables are likely confounded!

## Step 3. Import the *insurance.csv* dataset

In [None]:
# Import libraries
import csv

# Read in the insurance.csv file as a file object
# and store {"number": details} in insurance_dict
with open("insurance.csv", newline="") as insurance_csv:
    insurance_reader = csv.DictReader(insurance_csv)
    # Create a dict to store the data in
    insurance_dict = {}
    key = 0
    for row in insurance_reader:
        insurance_dict.update(
            {
                key: {
                    "Age": row["age"],
                    "Sex": row["sex"],
                    "BMI": row["bmi"],
                    "Children": row["children"],
                    "Smoker": row["smoker"],
                    "Region": row["region"],
                    "Charges": row["charges"],
                }
            }
        )
        key += 1

## Step 4. Data Analysis

### Step 4.0 Defining Functions

In [None]:
# Create funtion to generate dictionary with key and details
def create_dictionary(data, key):
    """
    Function for reconstructing a dictionary grouped by specified key
    ---
    data = dict
    key = key to group by
    """
    temp_dictionary = {}
    for record in data.values():
        temp_keys = record.keys()
        if key in temp_keys:
            new_key_from_value = record.get(key)
            temp_dictionary.setdefault(new_key_from_value, []).append(record)
    print(f"Created dictionary with {list(temp_dictionary.keys())} as the keys.\n")
    return temp_dictionary

In [None]:
# Create function to calculate average charge per group
def calculate_average_charge(data):
    """
    Function for Calculating Average Charge per Group
    ---

    data = Dictionary

    """
    keys_list = list(data.keys())
    charge_by_group = {}
    counter = 0
    for record in data.values():
        total_charge = 0
        total_patients = len(record)
        for details in record:
            charge = float(details.get("Charges"))
            total_charge += charge
        average_charge = total_charge / float(total_patients)
        charge_by_group.setdefault(keys_list[counter], average_charge)
        counter += 1
    return charge_by_group

In [None]:
# Create function to calculate average BMI per group
def calculate_average_bmi(data):
    """
    Function for Calculating Average BMI per Group
    ---

    data = Dictionary

    """
    keys_list = list(data.keys())
    bmi_by_group = {}
    counter = 0
    for record in data.values():
        total_bmi = 0
        total_patients = len(record)
        for details in record:
            bmi = float(details.get("BMI"))
            total_bmi += bmi
        average_bmi = total_bmi / float(total_patients)
        bmi_by_group.setdefault(keys_list[counter], average_bmi)
        counter += 1
    return bmi_by_group

### Step 4.1 Sexes
Does average charge differ between 'male' and 'female' patients?

In [1]:
# Construct dict with 'Sex' values as keys
insurance_sex_dict = create_dictionary(insurance_dict, "Sex")

# Calculate the per group average of Sexes
PRECISION = 2
average_charges_sexes = calculate_average_charge(insurance_sex_dict)
charge_sexes_dif = abs(average_charges_sexes["female"] - average_charges_sexes["male"])
print(
    f"Female patients average charge: {round(average_charges_sexes['female'], PRECISION)}"
)
print(
    f"Male patients average charge: {round(average_charges_sexes['male'], PRECISION)}"
)
print(f"Difference between sexes: {round(charge_sexes_dif, PRECISION)}")

Created dictionary with ['female', 'male'] as the keys.

Female patients average charge: 12569.58
Male patients average charge: 13956.75
Difference between sexes: 1387.17


Does average BMI differ between 'male' and 'female' patients?

In [2]:
# Calculate the per group average BMI of Sexes
average_bmi_sexes = calculate_average_bmi(insurance_sex_dict)
bmi_sexes_dif = abs(average_bmi_sexes["female"] - average_bmi_sexes["male"])
print(f"Female patients average BMI: {round(average_bmi_sexes['female'], PRECISION)}")
print(f"Male patients average BMI: {round(average_bmi_sexes['male'], PRECISION)}")
print(f"Difference between sexes: {round(bmi_sexes_dif, PRECISION)}")

Female patients average BMI: 30.38
Male patients average BMI: 30.94
Difference between sexes: 0.57


### Setp 4.2 Smoker Status

In [3]:
# Construct dict with 'Smoker Status' as keys ('yes', 'no')
insurance_smoker_dict = create_dictionary(insurance_dict, "Smoker")

# Calculate the per group average of Smoker status
average_charges_smokerstatus = calculate_average_charge(insurance_smoker_dict)
charge_smokerstatus_dif = abs(
    average_charges_smokerstatus["yes"] - average_charges_smokerstatus["no"]
)
print(
    f"Smokers average insurance charge: {round(average_charges_smokerstatus['yes'], PRECISION)}"
)
print(
    f"Non-smokers average insurance charge: {round(average_charges_smokerstatus['no'], PRECISION)}"
)
print(
    f"Difference between Smokers and Non-smokers: {round(charge_smokerstatus_dif, PRECISION)}"
)

Created dictionary with ['yes', 'no'] as the keys.

Smokers average insurance charge: 32050.23
Non-smokers average insurance charge: 8434.27
Difference between Smokers and Non-smokers: 23615.96


Does average BMI differ between Smokers and Non-smokers?

In [4]:
# Calculate average BMI per group
average_bmi_smokerstatus = calculate_average_bmi(insurance_smoker_dict)
bmi_smokerstatus_dif = abs(
    average_bmi_smokerstatus["yes"] - average_bmi_smokerstatus["no"]
)
print(
    f"Smoking patients average BMI: {round(average_bmi_smokerstatus['yes'], PRECISION)}"
)
print(
    f"Non-smoking patients average BMI: {round(average_bmi_smokerstatus['no'], PRECISION)}"
)
print(
    f"Difference between smokers and non-smokers: {round(bmi_smokerstatus_dif, PRECISION)}"
)

Smoking patients average BMI: 30.71
Non-smoking patients average BMI: 30.65
Difference between smokers and non-smokers: 0.06


### Step 4.3 Number of Children
Is there a difference in average charge when grouped by number of children?

In [5]:
# Create dict with number of children as the key
insurance_children_dict = create_dictionary(insurance_dict, "Children")

# Calculate average charge per group (number of children per patient)
average_charges_children = calculate_average_charge(insurance_children_dict)
for group in average_charges_children.items():
    print(
        f"Average charge for patients with {group[0]} children: {round(group[1], PRECISION)} "
    )

Created dictionary with ['0', '1', '3', '2', '5', '4'] as the keys.

Average charge for patients with 0 children: 12365.98 
Average charge for patients with 1 children: 12731.17 
Average charge for patients with 3 children: 15355.32 
Average charge for patients with 2 children: 15073.56 
Average charge for patients with 5 children: 8786.04 
Average charge for patients with 4 children: 13850.66 


Is there a difference in average BMI when grouped by number of children?

In [6]:
# Calculate average BMI per group (number of children)
average_bmi_children = calculate_average_bmi(insurance_children_dict)
for group in average_bmi_children.items():
    print(
        f"Average BMI for patients with {group[0]} children: {round(group[1], PRECISION)}"
    )

Average BMI for patients with 0 children: 30.55
Average BMI for patients with 1 children: 30.62
Average BMI for patients with 3 children: 30.68
Average BMI for patients with 2 children: 30.98
Average BMI for patients with 5 children: 29.61
Average BMI for patients with 4 children: 31.39
