# Introduction to the Dataset

The following csv is pulled from the FiveThirtyEight <a href="https://raw.githubusercontent.com/fivethirtyeight/data/master/births/US_births_1994-2003_CDC_NCHS.csv" target="_blank">dataset</a> of US births between 1994 and 2003. Essentially each row includes the number of births for a particular day. It includes the columns:

<ul>
<li>Year</li>
<li>Month</li>
<li>Date of Month</li>
<li>Day of Week</li>
<li>Births</li>
</ul>

## Read in the data

In [27]:
data = open("US_births_1994-2003_CDC_NCHS.csv").read()
split_rows = data.split("\n")

## Display the header and first ten rows

In [28]:
header = split_rows[0]
header

'year,month,date_of_month,day_of_week,births'

In [29]:
split_rows[1:11]

['1994,1,1,6,8096',
 '1994,1,2,7,7772',
 '1994,1,3,1,10142',
 '1994,1,4,2,11248',
 '1994,1,5,3,11053',
 '1994,1,6,4,11406',
 '1994,1,7,5,11251',
 '1994,1,8,6,8653',
 '1994,1,9,7,7910',
 '1994,1,10,1,10498']

# Converting Data into a List of Lists

In [30]:
def read_csv(file_name):
    data = open(file_name, "r").read()
    split_rows = data.split("\n")
    string_list = split_rows[1:-1]
    final_list = []
    for item in string_list:
        int_fields = []
        string_fields = item.split(",")
        for sf in string_fields:
            value = int(sf)
            int_fields.append(value)
        final_list.append(int_fields)
    return(final_list)

In [31]:
cdc_list = read_csv("US_births_1994-2003_CDC_NCHS.csv")
cdc_list[0:10]

[[1994, 1, 1, 6, 8096],
 [1994, 1, 2, 7, 7772],
 [1994, 1, 3, 1, 10142],
 [1994, 1, 4, 2, 11248],
 [1994, 1, 5, 3, 11053],
 [1994, 1, 6, 4, 11406],
 [1994, 1, 7, 5, 11251],
 [1994, 1, 8, 6, 8653],
 [1994, 1, 9, 7, 7910],
 [1994, 1, 10, 1, 10498]]

# Calculating Number of Births Each Month

In [32]:
def month_births(births_lists):
    births_per_month = {}
    for item in births_lists:
        month = item[1]
        num_births = item[4]
        if month in births_per_month:
            births_per_month[month] += num_births
        else:
            births_per_month[month] = num_births
    return(births_per_month)

In [33]:
cdc_month_births = month_births(cdc_list)
cdc_month_births

{1: 3232517,
 2: 3018140,
 3: 3322069,
 4: 3185314,
 5: 3350907,
 6: 3296530,
 7: 3498783,
 8: 3525858,
 9: 3439698,
 10: 3378814,
 11: 3171647,
 12: 3289486}

# Calculating Number of Births Each Day of Week

In [34]:
def dow_births(births_lists):
    births_per_dow = {}
    for item in births_lists:
        dow = item[3]
        num_births = item[4]
        if dow in births_per_dow:
            births_per_dow[dow] += num_births
        else:
            births_per_dow[dow] = num_births
    return(births_per_dow)

In [35]:
cdc_day_births = dow_births(cdc_list)
cdc_day_births

{1: 5789166,
 2: 6446196,
 3: 6310481,
 4: 6288429,
 5: 6233657,
 6: 4562111,
 7: 4079723}

# Creating a More General Function

In [36]:
def calc_counts(data, column):
    births_per_timeframe = {}
    for item in data:
        timeframe = item[column]
        num_births = item[4]
        if timeframe in births_per_timeframe:
            births_per_timeframe[timeframe] += num_births
        else:
            births_per_timeframe[timeframe] = num_births
    return(births_per_timeframe)

In [37]:
cdc_year_births = calc_counts(cdc_list, 0)
cdc_month_births = calc_counts(cdc_list, 1)
cdc_dom_births = calc_counts(cdc_list, 2)
cdc_dow_births = calc_counts(cdc_list, 3)

In [38]:
cdc_year_births

{1994: 3952767,
 1995: 3899589,
 1996: 3891494,
 1997: 3880894,
 1998: 3941553,
 1999: 3959417,
 2000: 4058814,
 2001: 4025933,
 2002: 4021726,
 2003: 4077576}

In [39]:
cdc_month_births

{1: 3232517,
 2: 3018140,
 3: 3322069,
 4: 3185314,
 5: 3350907,
 6: 3296530,
 7: 3498783,
 8: 3525858,
 9: 3439698,
 10: 3378814,
 11: 3171647,
 12: 3289486}

In [40]:
cdc_dom_births

{1: 1276557,
 2: 1288739,
 3: 1304499,
 4: 1288154,
 5: 1299953,
 6: 1304474,
 7: 1310459,
 8: 1312297,
 9: 1303292,
 10: 1320764,
 11: 1314361,
 12: 1318437,
 13: 1277684,
 14: 1320153,
 15: 1319171,
 16: 1315192,
 17: 1324953,
 18: 1326855,
 19: 1318727,
 20: 1324821,
 21: 1322897,
 22: 1317381,
 23: 1293290,
 24: 1288083,
 25: 1272116,
 26: 1284796,
 27: 1294395,
 28: 1307685,
 29: 1223161,
 30: 1202095,
 31: 734322}

In [41]:
cdc_dow_births

{1: 5789166,
 2: 6446196,
 3: 6310481,
 4: 6288429,
 5: 6233657,
 6: 4562111,
 7: 4079723}

# Calculate the Min and Max Values of any Dictionary

In [42]:
def min_max_dict(d):
    result = {}
    min_value = None
    key_value = None
    for key, value in d.items():
        if min_value is None or value < min_value:
            min_value = value
            key_value = key
    result[key_value] = min_value
    max_value = None
    max_key_value = None
    for key, value in d.items():
        if max_value is None or value > max_value:
            max_value = value
            max_key_value = key
    result[max_key_value] = max_value
    return(result)

In [43]:
min_num = min_max_dict(cdc_month_births)
min_num

{2: 3018140, 8: 3525858}