# U.S. Medical Insurance Costs

#### This project uses ***Python*** to open and analyze a csv file containing the medical records for over a thousand patients. 

In [62]:
import csv

The very first step is to import any libraries necessary for this project. In this case, the csv library is we need.

In [63]:
with open('insurance.csv') as insurance_csv:
    medical_records = csv.DictReader(insurance_csv)
    for row in medical_records:
        print(row)

{'age': '19', 'sex': 'female', 'bmi': '27.9', 'children': '0', 'smoker': 'yes', 'region': 'southwest', 'charges': '16884.924'}
{'age': '18', 'sex': 'male', 'bmi': '33.77', 'children': '1', 'smoker': 'no', 'region': 'southeast', 'charges': '1725.5523'}
{'age': '28', 'sex': 'male', 'bmi': '33', 'children': '3', 'smoker': 'no', 'region': 'southeast', 'charges': '4449.462'}
{'age': '33', 'sex': 'male', 'bmi': '22.705', 'children': '0', 'smoker': 'no', 'region': 'northwest', 'charges': '21984.47061'}
{'age': '32', 'sex': 'male', 'bmi': '28.88', 'children': '0', 'smoker': 'no', 'region': 'northwest', 'charges': '3866.8552'}
{'age': '31', 'sex': 'female', 'bmi': '25.74', 'children': '0', 'smoker': 'no', 'region': 'southeast', 'charges': '3756.6216'}
{'age': '46', 'sex': 'female', 'bmi': '33.44', 'children': '1', 'smoker': 'no', 'region': 'southeast', 'charges': '8240.5896'}
{'age': '37', 'sex': 'female', 'bmi': '27.74', 'children': '3', 'smoker': 'no', 'region': 'northwest', 'charges': '7281.

The very next step that would be most useful is to look at the data in a readable format. For this, the csv file containing the medical records is opened in read mode as a dictionary and printed according to each row.

In [64]:
ages = []
sexes = []
bmis = []
num_children = []
smoker_statuses = []
regions = []
insurance_costs = []

For each column in our csv file, an empty list is created that we will populate later.

In [65]:
def populate_lists(list_name, column_name):
    
    with open('insurance.csv') as insurance_csv:
        medical_records = csv.DictReader(insurance_csv)
        for row in medical_records:
            list_name.append(row[column_name])
    return list_name

In order to populate each list with each column from our csv file, we need to open the file again within a function that will append each column to the appropriate list.

In [66]:
populate_lists(ages, 'age')
populate_lists(sexes, 'sex')
populate_lists(bmis, 'bmi')
populate_lists(num_children, 'children')
populate_lists(smoker_statuses, 'smoker')
populate_lists(regions, 'region')
populate_lists(insurance_costs, 'charges')

['16884.924',
 '1725.5523',
 '4449.462',
 '21984.47061',
 '3866.8552',
 '3756.6216',
 '8240.5896',
 '7281.5056',
 '6406.4107',
 '28923.13692',
 '2721.3208',
 '27808.7251',
 '1826.843',
 '11090.7178',
 '39611.7577',
 '1837.237',
 '10797.3362',
 '2395.17155',
 '10602.385',
 '36837.467',
 '13228.84695',
 '4149.736',
 '1137.011',
 '37701.8768',
 '6203.90175',
 '14001.1338',
 '14451.83515',
 '12268.63225',
 '2775.19215',
 '38711',
 '35585.576',
 '2198.18985',
 '4687.797',
 '13770.0979',
 '51194.55914',
 '1625.43375',
 '15612.19335',
 '2302.3',
 '39774.2763',
 '48173.361',
 '3046.062',
 '4949.7587',
 '6272.4772',
 '6313.759',
 '6079.6715',
 '20630.28351',
 '3393.35635',
 '3556.9223',
 '12629.8967',
 '38709.176',
 '2211.13075',
 '3579.8287',
 '23568.272',
 '37742.5757',
 '8059.6791',
 '47496.49445',
 '13607.36875',
 '34303.1672',
 '23244.7902',
 '5989.52365',
 '8606.2174',
 '4504.6624',
 '30166.61817',
 '4133.64165',
 '14711.7438',
 '1743.214',
 '14235.072',
 '6389.37785',
 '5920.1041',
 '176

The function is called 7 times to populate each list. (Jupyter Notebooks only prints the output of the last line of code, thus we see the 'insurance_costs' list in the output.)

In [67]:
ages_int = [int(i) for i in ages]
num_children_int = [int(i) for i in num_children]
bmis_float = [float(i) for i in bmis]
insurance_costs_float = [float(i) for i in insurance_costs]

Currently, the elements in each list are strings. However, it will be easier to perform calculations on the lists that contain numbers by creating new lists that have been converted to integers and floats respectively. 

In [68]:
medical_records_dict = {}
medical_records_dict["age"] = ages_int
medical_records_dict["sex"] = sexes
medical_records_dict["bmi"] = bmis_float
medical_records_dict["children"] = num_children_int
medical_records_dict["smoker"] = smoker_statuses
medical_records_dict["regions"] = regions
medical_records_dict["cost"] = insurance_costs_float

We can also create a dictionary that houses all of the lists in one place.

In [69]:
smoker_cost_dict = dict(zip(insurance_costs_float, smoker_statuses))
print(smoker_cost_dict)

{16884.924: 'yes', 1725.5523: 'no', 4449.462: 'no', 21984.47061: 'no', 3866.8552: 'no', 3756.6216: 'no', 8240.5896: 'no', 7281.5056: 'no', 6406.4107: 'no', 28923.13692: 'no', 2721.3208: 'no', 27808.7251: 'yes', 1826.843: 'no', 11090.7178: 'no', 39611.7577: 'yes', 1837.237: 'no', 10797.3362: 'no', 2395.17155: 'no', 10602.385: 'no', 36837.467: 'yes', 13228.84695: 'no', 4149.736: 'no', 1137.011: 'no', 37701.8768: 'yes', 6203.90175: 'no', 14001.1338: 'no', 14451.83515: 'no', 12268.63225: 'no', 2775.19215: 'no', 38711.0: 'yes', 35585.576: 'yes', 2198.18985: 'no', 4687.797: 'no', 13770.0979: 'no', 51194.55914: 'yes', 1625.43375: 'no', 15612.19335: 'no', 2302.3: 'no', 39774.2763: 'yes', 48173.361: 'yes', 3046.062: 'no', 4949.7587: 'no', 6272.4772: 'no', 6313.759: 'no', 6079.6715: 'no', 20630.28351: 'no', 3393.35635: 'no', 3556.9223: 'no', 12629.8967: 'no', 38709.176: 'yes', 2211.13075: 'no', 3579.8287: 'no', 23568.272: 'yes', 37742.5757: 'yes', 8059.6791: 'no', 47496.49445: 'yes', 13607.36875

For this project, we will find out the average cost of insurance for the smokers and non_smokers in our file. The first step is to combine the smoker_statuses list with the insurance_costs_float list into a dictionary. Using the zip method, the dictionary will have the insurance cost as keys and the smoker status as values. 

In [70]:
def smoker_costs(dictionary):
    
    smoker_cost_counter = 0
    smoker_cost_list = []
    
    for key, value in dictionary.items():
        if value == "yes":
            smoker_cost_counter += key
            smoker_cost_list.append(key)
            avg_smoker_cost = smoker_cost_counter / len(smoker_cost_list)
            
    return print("The average cost of insurance for smokers is " + str(round(avg_smoker_cost, 2)) + " dollars.")

In [71]:
def non_smoker_costs(dictionary):
    
    smoker_cost_counter = 0
    smoker_cost_list = []
    
    for key, value in dictionary.items():
        if value == "no":
            smoker_cost_counter += key
            smoker_cost_list.append(key)
            avg_smoker_cost = smoker_cost_counter / len(smoker_cost_list)
            
    return print("The average cost of insurance for non-smokers is " + str(round(avg_smoker_cost, 2)) + " dollars.")

Then, we define a function which calculates the average cost of insurance for smokers and returns that number with a printed message for readability. A second function is created to do the same thing for non-smokers.

In [72]:
smoker_costs(smoker_cost_dict)

The average cost of insurance for smokers is 32050.23 dollars.


In [73]:
non_smoker_costs(smoker_cost_dict)

The average cost of insurance for non-smokers is 8440.66 dollars.


Each function is called respectively and we see the message which tells us the average cost for each type of patient, smokers and non-smokers. It turns out that the average cost for smokers is nearly 4x as much as non-smokers!