# U.S. Medical Insurance Costs

In [141]:
import pandas as pd

# creates dataframe and writes header 
df = pd.read_csv('insurance.csv')
df.head()




Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [142]:
def averages(category):
        '''
        Purpose: Takes averages from numerical data columns
        Parameters: category (str) **category must have numerical data**
        Return value: str
        '''
        try:
            total = 0
            for i in df[category]:
                total += i
            total /= len(df[category])

            if category == 'children':
                return f"Subjects in this file have an average of {total:.2f} children."
            
            elif category == 'charges':
                return f"The average charges for a subject in this file are ${total:.2f}."
            
            return f"The average {category} of a subject in this file is {total:.2f}."

        except TypeError:
            return "category must be either 'age', 'bmi', 'children', or 'charges'"
    
        
    
def greater_than_average(category):
    '''
    Purpose: returns a filtered dataframe with subjects whose values in specified 
            category are greater than the average value in that category
    Parameters: category (str) **category must have numerical data**
    Return value: dataframe
    '''
    try:
        is_greater = df[category] > df[category].mean()
        return df[is_greater]
    
    except TypeError:
         return "category must be either 'age', 'bmi', 'children', or 'charges'"
    
    
def unique_regions():
    '''
    Purpose: compiles a dictionary with count of subjects per region 
    Return Value: dict
    '''
    region_count_dict = {}
    
    for i in df['region']: 
        if i in region_count_dict:
            region_count_dict[i] += 1
        else:
            region_count_dict[i] = 1
    return region_count_dict
  



In [149]:
# dataframe filters (smoker, male, female)
filt_smoker = df['smoker'] == 'yes'
filt_male = df['sex'] == 'male'
filt_female = df['sex'] =='female'

# dataframe with only male smokers
male_smokers = df[filt_male & filt_smoker]

# dataframe with only female smokers 
female_smokers = df[filt_female & filt_smoker]

# print dataframes
print(male_smokers)
print(female_smokers) 

      age   sex     bmi  children smoker     region      charges
14     27  male  42.130         0    yes  southeast  39611.75770
19     30  male  35.300         0    yes  southwest  36837.46700
29     31  male  36.300         2    yes  southwest  38711.00000
30     22  male  35.600         0    yes  southwest  35585.57600
34     28  male  36.400         1    yes  southwest  51194.55914
...   ...   ...     ...       ...    ...        ...          ...
1301   62  male  30.875         3    yes  northwest  46718.16325
1303   43  male  27.800         0    yes  southwest  37829.72420
1304   42  male  24.605         2    yes  northeast  21259.37795
1307   32  male  28.120         4    yes  northwest  21472.47880
1321   62  male  26.695         0    yes  northeast  28101.33305

[159 rows x 7 columns]
      age     sex     bmi  children smoker     region      charges
0      19  female  27.900         0    yes  southwest  16884.92400
11     62  female  26.290         0    yes  southeast  27808.7

In [152]:
# dataframe filters (teenagers, has children)
filt_teenagers = df['age'] < 20
filt_has_children = df['children'] > 0

# create dataframe with only teenage parents
teen_parents = df[filt_teenagers & filt_has_children]

# index for dataframe with teen parents
teen_parents_index = teen_parents.index


# print dataframe and indexes
print(teen_parents)
print(teen_parents_index)  


      age     sex     bmi  children smoker     region      charges
1      18    male  33.770         1     no  southeast   1725.55230
15     19    male  24.600         1     no  southwest   1837.23700
32     19  female  28.600         5     no  southwest   4687.79700
46     18  female  38.665         2     no  northeast   3393.35635
57     18    male  31.680         2    yes  southeast  34303.16720
106    19  female  28.400         1     no  southwest   2331.51900
149    19    male  28.400         1     no  southwest   1842.51900
168    19  female  31.825         1     no  northwest   2719.27975
248    19    male  20.900         1     no  southwest   1832.09400
250    18    male  17.290         2    yes  northeast  12829.45510
270    18    male  29.370         1     no  southeast   1719.43630
276    19    male  20.615         2     no  northwest   2803.69785
369    18    male  30.400         3     no  northeast   3481.86800
469    18  female  24.090         1     no  southeast   2201.0

In [153]:
if __name__ == '__main__':
    # test functions 
    print(greater_than_average('sex')) # category 'sex' has invalid data type for function greater_than_average
    print(greater_than_average('bmi').head())
    print(averages('region')) # category 'region' has invalid data type for function averages 
    print(averages('age'))
    print(averages('bmi'))
    print(averages('children'))
    print(averages('charges'))
    print(unique_regions())


    
     

category must be either 'age', 'bmi', 'children', or 'charges'
    age     sex    bmi  children smoker     region     charges
1    18    male  33.77         1     no  southeast   1725.5523
2    28    male  33.00         3     no  southeast   4449.4620
6    46  female  33.44         1     no  southeast   8240.5896
12   23    male  34.40         0     no  southwest   1826.8430
13   56  female  39.82         0     no  southeast  11090.7178
category must be either 'age', 'bmi', 'children', or 'charges'
The average age of a subject in this file is 39.21.
The average bmi of a subject in this file is 30.66.
Subjects in this file have an average of 1.09 children.
The average charges for a subject in this file are $13270.42.
{'southwest': 325, 'southeast': 364, 'northwest': 325, 'northeast': 324}
