# U.S. Medical Insurance Costs

**Project Aims**

This project will aim to analyze the U.S. Medical Insurance Costs dataset.

First, I will use python classes and methods to sort and organise the data. I will use containers to store the data and methods to allow analsing various aspects of the data and perform various functions. I will also create methods to analyse the data and group the data.

Secondly, I will use libraries to then query tables and visualise the data in the dataset and create function which will perform analysis on the data.

Lastly, I will create a dashboard to visualise the data.

**1) Imports**

In [2]:
import csv

**2) Creating a Class for handling the data**

In [63]:

class Data:
    def __init__(self, csv_file, name):
        self.name = name
        self.csv_file = csv_file
        self.data = {}

        ##Automatically loading the data into self.data without having to call the method
        self.readcsv()
    
    ## Method to read and display the data in the csv class object
    def readcsv(self):
        with open(self.csv_file, "r") as file:
            reader = csv.DictReader(file)
            person_number = 1
            for row in reader:
                age = row["age"]
                sex = row["sex"]
                bmi = row["bmi"]
                children = row["children"]
                smoker = row["smoker"]
                region = row["region"]
                charges = row["charges"] 
                key = f"Person {person_number}"
                self.data[key] = {"Age" : age, "Sex" : sex, "BMI" : bmi, "Children" : children, "Smoker" : smoker, "Region" : region, "Charges" : charges}
                
                person_number += 1
                
    ## Grouping Methods

    ## By sex
    def groupbygender(self):
        new_dict = {
            "Male" : [],
            "Female" : []
        }
        for key, value in self.data.items():
            if value["Sex"] == "male":
                new_dict["Male"].append(value)
            elif value["Sex"] == "female":
                new_dict["Female"].append(value)
        return new_dict


    ## By smoker
    def groupbysmoker(self):
        new_dict = {
            "Smoker" : [],
            "Non-Smoker" : []
        }
        for key, value in self.data.items():
            if value["Smoker"] == "yes":
                new_dict["Smoker"].append(value)
            elif value["Smoker"] == "no":
                new_dict["Non-Smoker"].append(value)
        return new_dict
    
    ## By region
    def groupbyregion(self):
        new_dict = {}
        for key, value in self.data.items():
            region = value.get("Region", "unknown")
            new_region = region.capitalize()
            if new_region not in new_dict:
                new_dict[new_region] = [value]
            else:
                new_dict[new_region].append(value)
        return new_dict
    
    ## By age
    def groupbyage(self):
        new_dict = {
            "0-12" : [],
            "13-18" : [],
            "19-30" : [],
            "31-55" : [],
            "56-75" : [],
            "75+" : []
        }
        for key, value in self.data.items():
            age = int(value.get("Age", "unknown"))
            if age > 0 and age <= 12:
                new_dict["0-12"].append(value)
            elif age > 12 and age <=18:
                new_dict["13-18"].append(value)
            elif age > 18 and age <=30:
                new_dict["19-30"].append(value)
            elif age > 30 and age <= 55:
                new_dict["31-55"].append(value)
            elif age > 55 and age <= 75:
                new_dict["56-75"].append(value)
            elif age > 75:
                new_dict["75+"].append(value)
            else:
                print("Age Error")
        return new_dict


    ## Analysis Methods which take a data dictionary (Grouped, Sorted or Base) and performs analysis on the data within.

    ## Data Info Method

    def datainfo(self):
        person_count = len(self.data.keys()) + 1
        gender = self.groupbygender()
        male_count = len(gender["Male"])
        female_count = len(gender["Female"])
        region = self.groupbyregion()
        region_dict = {}
        ages = [int(value["Age"]) for value in self.data.values()]
        mean_age = sum(ages) / len(ages)
        region_dict = {k: len(v) for k, v in region.items()}

        return f"""
Total Persons: {person_count}
Male: {male_count}
Female: {female_count} 
Regions : {region_dict}
Mean Age: {round(mean_age)}
        """
    ##Total Methods

    ## Total cost
    def totalcost(self):
        total = 0
        for key, value in self.data.items():
            cost = round(float(value["Charges"]))
            total += cost
        return total
    
    ## Return formatted total
    def returntotalcost(self):
        total = self.totalcost()
        return f"Total Cost: ${total}"
    
    ## Cost by group
    def chargesbygroup(self, method = None):
        method_group = None
        if method.lower() == "smoker":
            method_group = self.groupbysmoker()
        elif method.lower() == "region":
            method_group = self.groupbyregion()
        elif method.lower() == "sex":
            method_group = self.groupbygender()
        else:
            print("Grouping not supported")
        if method_group is not None:
            total_list = []
            for key, value in method_group.items():
                group_total = sum(round(float(item["Charges"])) for item in value)
                total_list.append(group_total)
            return total_list
    
    ## Printing formatted costs by group
    def printcostsbygroup(self, group):
        totals = self.chargesbygroup(group)
        group_data = None
        if group.lower() == "smoker":
            group_data = self.groupbysmoker()
        elif group.lower() == "region":
            group_data = self.groupbyregion()
        elif group.lower() == "sex":
            group_data = self.groupbygender()
        if group_data is not None:
            result = []
            for key, total in zip(group_data.keys(), totals):
                result.append(f"{key}: ${total}")
            return "\n".join(result)
        
    ## Average Methods

    ## Average Total   
    def averagetotal(self):
        total = self.totalcost()
        num_people = total/len(self.data)
        if num_people > 0:
            average = total/num_people
            return f"Average Cost: ${average}"
        else:
            return 0
        
    ## Average by Group
    def averagegroup(self, group = None):
        totals = self.chargesbygroup(group)
        length_list = []
        group_data = None
        if group.lower() == "smoker":
            group_data = self.groupbysmoker()
        elif group.lower() == "region":
            group_data = self.groupbyregion()
        elif group.lower() == "sex":
            group_data = self.groupbygender()
        else:
            print("Grouping not supported")
        if group_data is not None:
            totals = self.chargesbygroup(group)
            length_list = [len(value) for value in group_data.values()]
            average = [round(totals[i])/length_list[i] for i in range(len(totals))]
            result = {key : round(value) for key, value in zip(group_data.keys(), average)}
        return result
    
    def printavggroup(self, group):
        average = self.averagegroup(group)
        group_data = None
        if group.lower() == "smoker":
            group_data = self.groupbysmoker()
        elif group.lower() == "region":
            group_data = self.groupbyregion()
        elif group.lower() == "sex":
            group_data = self.groupbygender()
        if group_data is not None:
            avg_list = []
            for key, value in zip(group_data.keys(), average.values()):
                avg_list.append(f"{key}: ${value}")
            return "\n".join(avg_list)

                
        
#-------------------------------------------------------------------------------------------------
            
file_name = "insurance.csv"        
medical_data = Data(file_name, "Medical Data")

print("Medical Data Info")
info = medical_data.datainfo()
print(info)
print(" ")
total_charges = medical_data.returntotalcost()
print(total_charges)
total_group = medical_data.printcostsbygroup("region")
print(" ")
print("Totals by Region:")
print(total_group)
print(" ")
print("By Gender:")
total_gender = medical_data.printcostsbygroup("sex")
print(total_gender)
print(" ")
average_cost = medical_data.averagetotal()
print(average_cost)
print(" ")
print("Averages by group:")
avg_group_print = medical_data.printavggroup("smoker")
print(avg_group_print)
                


Medical Data Info

Total Persons: 1339
Male: 676
Female: 662 
Regions : {'Southwest': 325, 'Southeast': 364, 'Northwest': 325, 'Northeast': 324}
Mean Age: 39
        
 
Total Cost: $17755815
 
Totals by Region:
Southwest: $4012752
Southeast: $5363691
Northwest: $4035713
Northeast: $4343659
 
By Gender:
Male: $9434750
Female: $8321065
 
Average Cost: $1338.0
 
Averages by group:
Smoker: $32050
Non-Smoker: $8434
