# U.S. Medical Insurance Costs

### Project goals
* Find out the average age of patients
* Investigate the geographical belongings of patients
* Find out the percentage of smokers
* Categorise patients by BMI

In [39]:
# For a given value of individuals calculates the percentage with respect to total individuals and complemetary
# percentage with n decimal
def percent(value, total, n):
    basic = value/total*100
    compl = (total - value)/total*100
    return format(basic,'.'+str(n)+'f'), format(compl, '.'+str(n)+'f')

# Average value with n decimals
def average(list, n):
    total = 0
    for element in list:
        total += float(element)
    return format(total/len(list),'.'+str(n)+'f')

def min_max(list):
    sorted_list = sorted(list)
    minimum = sorted_list[0]
    maximum = sorted_list[-1]
    return minimum, maximum

## Looking over the dataset

In [2]:
import csv

with open('insurance.csv') as insurance_csv:
    insurance = csv.DictReader(insurance_csv)
    print(insurance_csv.readline())
    print(insurance_csv.readline())

age,sex,bmi,children,smoker,region,charges

19,female,27.9,0,yes,southwest,16884.924



## Importing the data

In [54]:
class Insurance:
    def __init__(self):
        self.age = []
        self.sex = []
        self.bmi = []
        self.children = []
        self.smoker = []
        self.region = []
        self.charges = []
        with open('insurance.csv') as insurance_csv:
            insurance_dict = csv.DictReader(insurance_csv)
            for row in insurance_dict:
                self.age.append(row['age']) 
                self.sex.append(row['sex'])
                self.bmi.append(row['bmi'])
                self.children.append(row['children'])
                self.smoker.append(row['smoker'])
                self.region.append(row['region'])
                self.charges.append(row['charges'])
                
    def total(self):
        return len(self.age)
                
    def ages(self):
        total = 0
        for element in self.age:
            total += int(element)
        average = total/self.total()
        sorted_age = sorted(self.age)
        print('The average age of the patients is '+format(average,'.1f')+' years.')
        print('The youngest patient is {} years old, the oldest - {}.'.format(sorted_age[0], sorted_age[-1]))
        
    def sex_data(self):
        man = self.sex.count('male')
        male, female = percent(man, self.total(), 1)
        print('There are {} patients recorded.'.format(self.total()))
        print('{}% of them are men, {}% are women.'.format(male,female))
        
    def smokers(self):
        all_smokers = self.smoker.count('yes')
        percent_smokers, non_smokers = percent(all_smokers, self.total(), 1)
        print('There are {} smokers or {}% of all the patients.'.format(all_smokers,percent_smokers))
        smoker_sex = list(zip(self.sex,self.smoker))
        smokers_man = 0
        for patient in smoker_sex:
            if patient[0] == 'male' and patient[1] == 'yes':
                smokers_man += 1
        smoker_male, smoker_female = percent(smokers_man,all_smokers,1)
        print('{}% of all smokers are men, {}% are women.'.format(smoker_male,smoker_female))
        
    def smokers_cost(self):
        smoker_list = []
        non_smoker_list = []
        smoker_cost = list(zip(self.smoker,self.charges))
        for entry in smoker_cost:
            if entry[0] == 'yes':
                smoker_list.append(entry[1])
            else:
                non_smoker_list.append(entry[1])
        av_sm = average(smoker_list, 2)
        av_non = average(non_smoker_list, 2)
        up = format(float(av_sm)/float(av_non)*100,'.1f')
        print('The average price of insurance for non-smoker is $'+av_non+', for smoker - $'+av_sm+' ('+up+'% more).')
        
    def regions(self):
        region_list = []
        for area in self.region:
            if not area in region_list:
                region_list.append(area)
        region_dict = {}
        for area in region_list:
            region_dict[area] = format(self.region.count(area)/self.total()*100,'.1f')
            print('{}% of patients are from {}.'.format(region_dict[area], area))
            
    def bmi_categories(self):
        # <18.5 underweight
        # 18.5 - 25 normal weight
        # 25 - 30 overweight
        # >30 obese
        bmi_scale = {'underweight': 0,
                    'normal weight': 18.5,
                    'overweight': 25,
                    'obese': 30}
        bmi_patients = {}
        for weight in bmi_scale.keys():
            bmi_patients[weight] = 0
        for num in self.bmi:
            if float(num) < bmi_scale['underweight']:
                bmi_patients['underweight'] += 1
            elif float(num) >= bmi_scale['underweight'] and float(num) < bmi_scale['normal weight']:
                bmi_patients['normal weight'] += 1
            elif float(num) > bmi_scale['obese']:
                bmi_patients['obese'] += 1
            else:
                bmi_patients['overweight'] += 1
        for key, value in bmi_patients.items():
            if value != 0:
                print('{}% of patients are {}.'.format(format(value/self.total()*100,'.1f'),key))
            else:
                print('None of the patients are '+key)
        
    def average_bmi(self):
        print('The average BMI of the patients is {}.'.format(average(self.bmi, 1)))
        
    def children_analysis(self):
        sorted_children = sorted(self.children)
        max_children = sorted_children[-1]
        print('All the patients have no more than {} children'.format(max_children))
        num_children = {}
        for i in range(int(max_children) + 1):
            num_children[i] = []
            children_age = list(zip(self.children,self.age))
            for age in children_age:
                if int(age[0]) == i:
                    num_children[i].append(age[1])
        for key, value in num_children.items():
            min_age, max_age = min_max(value)
            average_age = average(value, 1)
            perc_age, exc = percent(len(value), self.total(), 1)
            if key == 0:
                print('There are {} patients with no children (or {}% of all patients).'.format(len(value),perc_age))
            elif key == 1:
                print('There are {} patients with {} child (or {}% of all patients).'.format(len(value),key,perc_age))
            else:
                print('There are {} patients with {} children (or {}% of all patients).'.format(len(value),key,perc_age))
            print('Their average age is {} years; maximum - {}, minimum - {}.'.format(average_age,max_age,min_age))
    
    def print_record(self,i):
        print(self.age[i],self.sex[i],self.bmi[i],self.children[i],self.smoker[i],self.region[i],self.charges[i])
    
    def five_kids(self):
        for i in range(self.total()):
            if self.children[i] == '5':
                self.print_record(i)

insurances = Insurance()

## Gender

In [4]:
insurances.sex_data()

There are 1338 patients recorded.
50.5% of them are men, 49.5% are women.


## Age

In [5]:
insurances.ages()

The average age of the patients is 39.2 years.
The youngest patient is 18 years old, the oldest - 64.


## Smoking vs insurance cost

In [6]:
insurances.smokers()
insurances.smokers_cost()

There are 274 smokers or 20.5% of all the patients.
58.0% of all smokers are men, 42.0% are women.
The average price of insurance for non-smoker is $8434.27, for smoker - $32050.23 (380.0% more).


## Geography

In [7]:
insurances.regions()

24.3% of patients are from southwest.
27.2% of patients are from southeast.
24.3% of patients are from northwest.
24.2% of patients are from northeast.


## Weight

In [8]:
insurances.average_bmi()
insurances.bmi_categories()

The average BMI of the patients is 30.7.
None of the patients are underweight
1.5% of patients are normal weight.
45.8% of patients are overweight.
52.7% of patients are obese.


## Children

In [51]:
insurances.children_analysis()

All the patients have no more than 5 children
There are 574 patinents with no children (or 42.9% of all patients).
Their average age is 38.4 years; maximum - 64, minimum - 18.
There are 324 patinents with 1 child (or 24.2% of all patients).
Their average age is 39.5 years; maximum - 64, minimum - 18.
There are 240 patinents with 2 children (or 17.9% of all patients).
Their average age is 39.4 years; maximum - 64, minimum - 18.
There are 157 patinents with 3 children (or 11.7% of all patients).
Their average age is 41.6 years; maximum - 64, minimum - 18.
There are 25 patinents with 4 children (or 1.9% of all patients).
Their average age is 39.0 years; maximum - 61, minimum - 18.
There are 18 patinents with 5 children (or 1.3% of all patients).
Their average age is 35.6 years; maximum - 52, minimum - 19.
