In [25]:
data = open('US_births_1994-2003_CDC_NCHS.csv','r').read().split('\n')
print(data[:10])

['year,month,date_of_month,day_of_week,births', '1994,1,1,6,8096', '1994,1,2,7,7772', '1994,1,3,1,10142', '1994,1,4,2,11248', '1994,1,5,3,11053', '1994,1,6,4,11406', '1994,1,7,5,11251', '1994,1,8,6,8653', '1994,1,9,7,7910']


In [26]:
def read_csv(csv_file):
    """ input = string name of csv file
        function reads csv file, deletes header row, converts each row to a list
        return = list of lists with [year,month,date,day,births] integer data """
    string_list = open(csv_file,'r').read().split('\n')[1:]
    final_list = []
    for row in string_list:
        int_fields = []
        string_fields = row.split(',')
        for item in string_fields:
            int_fields.append(int(item))
        final_list.append(int_fields)
    return final_list
cdc_list = read_csv('US_births_1994-2003_CDC_NCHS.csv')
print(cdc_list[:10])

[[1994, 1, 1, 6, 8096], [1994, 1, 2, 7, 7772], [1994, 1, 3, 1, 10142], [1994, 1, 4, 2, 11248], [1994, 1, 5, 3, 11053], [1994, 1, 6, 4, 11406], [1994, 1, 7, 5, 11251], [1994, 1, 8, 6, 8653], [1994, 1, 9, 7, 7910], [1994, 1, 10, 1, 10498]]


In [27]:
def month_births(input_lst):
    """ input = list of lists of integers with [[year,month,date,day,births]]
        function sums number of births per each month
        return = dictionary with { month keys : number of births } """
    births_per_month = {}
    for row in input_lst:
        month = row[1]
        birth = row[4]
        if month in births_per_month:
            births_per_month[month] += birth
        else:
            births_per_month[month] = birth
    return births_per_month
cdc_month_births = month_births(cdc_list)
print(cdc_month_births)

{1: 3232517, 2: 3018140, 3: 3322069, 4: 3185314, 5: 3350907, 6: 3296530, 7: 3498783, 8: 3525858, 9: 3439698, 10: 3378814, 11: 3171647, 12: 3301860}


In [28]:
def dow_births(input_lst):
    """ input = list of lists of integers with [[year,month,date,day,births]]
        function sums number of births per each day of the week
        return = dictionary with { day of week keys : number of births } """
    births_per_dow = {}
    for row in input_lst:
        dow = row[3]
        birth = row[4]
        if dow in births_per_dow:
            births_per_dow[dow] += birth
        else:
            births_per_dow[dow] = birth
    return births_per_dow
cdc_day_births = dow_births(cdc_list)
print(cdc_day_births)

{1: 5789166, 2: 6446196, 3: 6322855, 4: 6288429, 5: 6233657, 6: 4562111, 7: 4079723}


In [29]:
def calc_counts(data,column):
    """ input = list of lists of integers with [[year,month,date,day,births]],
            column number
        function sums number of births per each input column number
        return = dictionary with { input column keys : number of births } """
    births_per_calc = {}
    for row in data:
        calc = row[column]
        birth = row[4]
        if calc in births_per_calc:
            births_per_calc[calc] += birth
        else:
            births_per_calc[calc] = birth
    return births_per_calc
cdc_year_births = calc_counts(cdc_list,0)
cdc_month_births = calc_counts(cdc_list,1)
cdc_dom_births = calc_counts(cdc_list,2)
cdc_dow_births = calc_counts(cdc_list,3)
print(cdc_year_births)

{2000: 4058814, 2001: 4025933, 2002: 4021726, 2003: 4089950, 1994: 3952767, 1995: 3899589, 1996: 3891494, 1997: 3880894, 1998: 3941553, 1999: 3959417}


In [30]:
def get_extrema(data_dict):
    """ input = dictionary with {key : number of birth values}
        function determines keys for minimum and maximum number of births
        return = minimum key, maximum key """
    values = list(data_dict.values())
    keys = list(data_dict.keys())
    min_key = keys[values.index(min(values))]
    max_key = keys[values.index(max(values))]
    return min_key, max_key
min_year,max_year = get_extrema(cdc_year_births)
min_month,max_month = get_extrema(cdc_month_births)
min_dom,max_dom = get_extrema(cdc_dom_births)
min_dow,max_dow = get_extrema(cdc_dow_births)
print(min_month,max_month)

2 8


In [31]:
def compare_years(data,column,sub_column):
    """ input = list of lists of integers with [[year,month,date,day,births]],
            column number, key value within the column
        function sums number of births per input column key value for each year
        return = dictionary with { year : number of births per column key } """
    births_per_year = {}
    for row in data:
        year = row[0]
        calc = row[column]
        births = row[4]
        if calc == sub_column:
            if year in births_per_year:
                births_per_year[year] += births
            else:
                births_per_year[year] = births
    return births_per_year
cdc_saturday_births_per_year = compare_years(cdc_list,3,6)
cdc_april_births_per_year = compare_years(cdc_list,1,4)
print(cdc_saturday_births_per_year)

{2000: 469794, 2001: 453928, 2002: 445770, 2003: 447445, 1994: 474732, 1995: 459580, 1996: 456261, 1997: 450840, 1998: 453776, 1999: 449985}


# Conclusions

Birth numbers dip in February and peak in August.

Birth numbers are highest during the week and lowest over the weekend, 
likely because C-sections are scheduled during weekdays.