# U.S. Medical Insurance Costs

Key questions for the project:
- 

### Importing Modules
For this project, I am importing the csv module and some functions from the operator module. The csv module will be used to read the data from the insurance.csv file. The operator functions will be used in a function that will allow me to search based on specific parameters.

In [1]:
import csv
from operator import lt, le, eq, ne, gt, ge

### Creating a class with several methods including loading and analyzing the data
I can now create a class with methods that will allow me to retrieve data from a csv, clean the data as necessary, create a dictionary based on the clean data, and perform various calculations.

In [5]:
class Insurance_Data:
    def __init__(self, csv_file):
        self.csv_file = csv_file
        self.age_data = []
        self.sex_data = []
        self.bmi_data = []
        self.children_data = []
        self.smoker_data = []
        self.region_data = []
        self.charges_data = []
        self.data = {}
        
    def get_csv_data(self):
        with open(self.csv_file) as csv_data:
            
            csv_contents = csv.DictReader(csv_data)
            for line in csv_contents:
                age_value, sex_value, bmi_value, children_value, smoker_value, region_value, charges_value = line.values()
                self.age_data.append(int(age_value))
                self.sex_data.append(sex_value)
                self.bmi_data.append(float(bmi_value))
                self.children_data.append(int(children_value))
                self.smoker_data.append(smoker_value)
                self.region_data.append(region_value)
                self.charges_data.append(float(charges_value))
            self.zipped_data = zip(self.age_data, self.sex_data, self.bmi_data, self.children_data, self.smoker_data, self.region_data, self.charges_data)

    def create_dictionary(self):
        self.get_csv_data()
        i= 0
        for age_value, sex_value, bmi_value, children_value, smoker_value, region_value, charge_value in self.zipped_data:
            record_id = 'patient ' + str(i+1)
            self.data[record_id] = {
                                    'age': age_value,
                                    'sex': sex_value,
                                    'bmi' : bmi_value,
                                    'children'  : children_value,
                                    'smoker' : smoker_value,
                                    'region' : region_value,
                                    'charges':  charge_value
                                    }
            i = i + 1
    
    
    def search(self, charges_op=eq, bmi_op=eq, children_op=eq, age_op=eq, **search_criteria):
        results = []
        
        for patient, record in self.data.items():
            criteria_match = 1
            for key, value in search_criteria.items():
                
                if type(value) is int or type(value) is float:
                    if key == 'charges' and charges_op(record[key], value):
                        criteria_match *= 1
                    elif key == 'children' and children_op(record[key], value):
                        criteria_match *= 1
                    elif key == 'age' and age_op(record[key], value):
                        criteria_match *= 1
                    elif key == 'bmi' and bmi_op(record[key], value):
                        criteria_match *= 1
                    else:
                        criteria_match *= 0
                else:
                    if record[key] == value:
                        criteria_match *= 1
                    else:
                        criteria_match *=0
            
            if criteria_match == 1:
                results.append(patient)
        
        return results
    
    def get_patient_data(self, patient_name):
        return self.data.get(patient_name)
    
    def count_frequency(self, **search_criteria):
        return len(self.filter_data(**search_criteria))
    
    def percent_of_total(self, **search_criteria):
        percent = self.count_frequency(**search_criteria) / self.record_count * 100
        return f'{percent}%'

my_data = Insurance_Data('insurance.csv')
my_data.create_dictionary()

In [4]:
test = my_data.search(sex='female', smoker='no', region='northeast')
print(test)

['patient 17', 'patient 21', 'patient 27', 'patient 32', 'patient 41', 'patient 47', 'patient 51', 'patient 57', 'patient 79', 'patient 82', 'patient 103', 'patient 126', 'patient 131', 'patient 132', 'patient 135', 'patient 153', 'patient 155', 'patient 180', 'patient 200', 'patient 206', 'patient 222', 'patient 229', 'patient 242', 'patient 256', 'patient 268', 'patient 276', 'patient 284', 'patient 287', 'patient 322', 'patient 334', 'patient 335', 'patient 343', 'patient 361', 'patient 366', 'patient 367', 'patient 372', 'patient 373', 'patient 395', 'patient 406', 'patient 427', 'patient 428', 'patient 429', 'patient 463', 'patient 469', 'patient 472', 'patient 474', 'patient 486', 'patient 493', 'patient 509', 'patient 521', 'patient 523', 'patient 532', 'patient 549', 'patient 555', 'patient 562', 'patient 574', 'patient 575', 'patient 580', 'patient 589', 'patient 596', 'patient 598', 'patient 607', 'patient 613', 'patient 614', 'patient 617', 'patient 638', 'patient 648', 'pat