# U.S. Medical Insurance Costs

In [None]:
import csv

In [None]:
with open("insurance.csv") as database:
    reader = csv.DictReader(database)
    count = 0
    for row in reader:
        print(row)
        count += 1
    print(f"There are {count} entries in database")    

<h3>The above cell reads the "insurance.csv" file with csv.DictReader, which prints all the data as a value and keys are the titles of the data. Same process can be done with csv.reader but it returns a list only with data in it and its hard to see which data is which.</h3>

In [None]:
def extract_values(data_file, data_dict):
    age_list = []
    sex_list = []
    bmi_list = []
    children_list = []
    smoker_list = []
    region_list = []
    charges_list = []
    
    
    with open(data_file) as database:
        reader = csv.DictReader(database)
        for row in reader:
            age_list.append(row["age"])
            sex_list.append(row["sex"])
            bmi_list.append(row["bmi"])
            children_list.append(row["children"])
            smoker_list.append(row["smoker"])
            region_list.append(row["region"])
            charges_list.append(row["charges"])
            
    if type(data_dict) == dict:
        data_dict["age"] = age_list
        data_dict["sex"] = sex_list
        data_dict["bmi"] = bmi_list 
        data_dict["children"] = children_list
        data_dict["smoker"] = smoker_list
        data_dict["region"] = region_list
        data_dict["charges"] = charges_list
        return data_dict 
    else:
        return("Please pass a dictionary as second variable")        

In [None]:
data_dictionary = {}
print(extract_values("insurance.csv", data_dictionary))


<h3>The above cell is a function to extract the values to a dictionary which every key keeps the values in a list. This is helpful to perform with same values together.</h3>

In [None]:
def arithmetic_mean(data, round_digits=3):
    total = 0
    for val in data:
        try:
            val = float(val)
            total += val
        except ValueError:
            return("Data is not a NUMBER")
            break
            
    mean = total/len(data)
    return round(mean, round_digits)

In [None]:
print(arithmetic_mean(data_dictionary["charges"], 5))

<h3>Above cell gets the arithmetic mean of a key inside the main data dictionary if the key passed contains numbers(float or integer). If the key passed in the function does not contain any numbers then code lets you know you passed a key without numbers.</h3>

In [None]:
user_list = []

def make_list(data_dict=data_dictionary, list_to_return=user_list):
    for key, val in data_dict.items():
        for i in range(len(val)):
            list_to_return.append([data_dict["age"][i], data_dict["sex"][i], data_dict["bmi"][i], data_dict["children"][i], data_dict["smoker"][i], data_dict["region"][i], data_dict["charges"][i]])

    return list(list_to_return)        

In [None]:
print(make_list())

<h3>Above cells turns the whole data which was seperated by columns to nested lists which every nested list contains an individual user data.</h3>

In [None]:
smoker_list = []
non_smoker_list = []
print(len(user_list))
for data in user_list:
    #print(data)
    if data[4] == "yes":
        smoker_list.append(data)
    elif data[4] == "no":
        non_smoker_list.append(data)
print(len(smoker_list), len(non_smoker_list))
print(smoker_list)

In [None]:
# smoker_charge = []
# non_smoker_charge = []

# for sublist in smoker_list:
#     smoker_charge.append(sublist[-1])
# for sublist in non_smoker_list:
#     non_smoker_charge.append(sublist[-1])

# #print(smoker_charge)
# #print(non_smoker_charge)

# print(arithmetic_mean(smoker_charge) - arithmetic_mean(non_smoker_charge))

def smoking_effect(list1=smoker_list, list2=non_smoker_list):
    smoker_charge = []
    non_smoker_charge = []
    
    for sublist in list1:
        smoker_charge.append(sublist[-1])
    for sublist in list2:
        non_smoker_charge.append(sublist[-1])
        
    return arithmetic_mean(smoker_charge) - arithmetic_mean(non_smoker_charge)   

<h3>Above code prints the average price difference between smokers and non smokers.</h3>

In [None]:
print(smoking_effect())

In [None]:
def avg_parent_age(data=data_dictionary):
    age_list = []

    make_list(list_to_return=age_list)
    parent_age_total = []

    for client in age_list:
        client_children = int(client[3])
        if client_children != 0:
            parent_age_total.append(int(client[0]))
        
    return arithmetic_mean(parent_age_total)     

<h3>Above cell defines a function to get the average age of every client with at least one children.</h3>

In [None]:
def region_analysis(data=data_dictionary):
    region_list = data["region"]
    region_counts = {}
    region_count_list = []

    individual_regions = list(set(data["region"])) #this line of code helps to identify every individual element in region list

    for region in individual_regions:
        region_counts[region] = region_list.count(region)

    #print(region_counts)

    for key, val in region_counts.items():
        region_count_list.append([val, key])

    region_count_list.sort(reverse=True)

    major_region_number = region_count_list[0][0]
    major_region_name = region_count_list[0][1]

    minor_region_number = region_count_list[-1][0]
    minor_region_name = region_count_list[-1][1]

    return (f"Major client region is {major_region_name} with {major_region_number} applicants. Minor client region is {minor_region_name} with {minor_region_number} applicants.")

region_analysis()

<h3>Above cell provides information about major and minor client areas.</h3>

In [None]:
def avg_age(data=data_dictionary):
    return arithmetic_mean(data["age"])

avg_age()

In [None]:
def over_average_analysis(data=data_dictionary, data_l=data_list):
    data_list = []
    make_list(list_to_return=data_list)

    over_avg_check_dict = {"Children": 0, "Bmi": 0, "Age": 0, "Smoker": 0}
    under_avg_check_dict = {"Children": 0, "Bmi": 0, "Age": 0, "Smoker": 0}
    
    
    avg_age = arithmetic_mean(data["age"])
    avg_bmi = arithmetic_mean(data["bmi"])
    avg_children = arithmetic_mean(data["children"])
    avg_charges = arithmetic_mean(data["charges"])
    
    for client in data_l:
        client_age = int(client[0])
        client_bmi = float(client[2])
        client_children = int(client[3])
        client_smoke = client[4]
        client_charge = float(client[-1])
        
        if client_charge > avg_charges: #comparing some data against each other for all clients who got over average charges such as smoking status, bmi or age etc.
        
            if client_age > avg_age:
                over_avg_check_dict["Age"] += 1
            elif client_age < avg_age:
                under_avg_check_dict["Age"] += 1

            if client_bmi > avg_bmi:
                over_avg_check_dict["Bmi"] += 1
            elif client_bmi < avg_bmi:
                under_avg_check_dict["Bmi"] += 1

            if client_children > avg_children:
                over_avg_check_dict["Children"] += 1
            elif client_children < avg_children:
                under_avg_check_dict["Children"] += 1

            if client_smoke == "yes":
                over_avg_check_dict["Smoker"] += 1
            elif client_smoke == "no":
                under_avg_check_dict["Smoker"] += 1
            
        children_analysis_o = ""
        if over_avg_check_dict["Children"] > under_avg_check_dict["Children"]:
            children_analysis_o = "People with over average charges have MORE CHILDREN than average."
        elif over_avg_check_dict["Children"] < under_avg_check_dict["Children"]:
            children_analysis_o = "People with over average charges have LESS CHILDREN than average."
        
        bmi_analysis_o = ""
        if over_avg_check_dict["Bmi"] > under_avg_check_dict["Bmi"]:
            bmi_analysis_o = "People with over average charges have MORE BMI than average."
        elif over_avg_check_dict["Bmi"] < under_avg_check_dict["Bmi"]:
            bmi_analysis_o = "People with over average charges have LESS BMI than average."
            
        age_analysis_o = ""
        if over_avg_check_dict["Age"] > under_avg_check_dict["Age"]:
            age_analysis_o = "People with over average charges are OLDER than average."
        elif over_avg_check_dict["Age"] < under_avg_check_dict["Age"]:
            age_analysis_o = "People with over average charges are YOUNGER than average."  
            
        smoker_analysis_o = ""
        if over_avg_check_dict["Smoker"] > under_avg_check_dict["Smoker"]:
            smoker_analysis_o = "People with over average charges are SMOKERS."
        elif over_avg_check_dict["Smoker"] < under_avg_check_dict["Smoker"]:
            smoker_analysis_o = "People with over average charges have NOT SMOKERS."
    
              
    print(children_analysis_o + "\n" + bmi_analysis_o + "\n" + age_analysis_o + "\n" + smoker_analysis_o)

over_average_analysis()

<h3>Above cell compares "children", "bmi", "age" and "smoker" data for clients with over average charges.</h3>

<p>This was a portfolio project as part of Codecademy's "Data Analyst" career path. Made by Onur Kaşıkçı, this was my second coding attempt except basic coding challenges on Codewars and Hackerrank so for any suggestions and improvements hit me up on onurka.46@gmail.com or on Discord --> onurkasikci. <p>