## The Dataset
This dataset contains U.S. birth data for the years 1994 - 2003. The columns correspond to Year, Month, Day of Month, Day of Week, and Number of Births. The dataset was compiled by FiveThirtyEight and can be found here: https://github.com/fivethirtyeight/data/tree/master/births

## Converting the CSV file into a list of lists

In [52]:
def read_csv(file_name):
    """Split a CSV file into rows and convert values to int"""
    f = open(file_name, 'r')
    raw_data = f.read()
    split_data = raw_data.split('\n')
    string_list = split_data[1:]
    final_list = []    
    for row in string_list:
        int_fields = []
        string_fields = row.split(',')
        for string in string_fields:
            int_fields.append(int(string))
        final_list.append(int_fields)
    return final_list

In [53]:
cdc_list = read_csv("US_births_1994-2003_CDC_NCHS.csv")
#view the first 10 rows
cdc_list[:10]

[[1994, 1, 1, 6, 8096],
 [1994, 1, 2, 7, 7772],
 [1994, 1, 3, 1, 10142],
 [1994, 1, 4, 2, 11248],
 [1994, 1, 5, 3, 11053],
 [1994, 1, 6, 4, 11406],
 [1994, 1, 7, 5, 11251],
 [1994, 1, 8, 6, 8653],
 [1994, 1, 9, 7, 7910],
 [1994, 1, 10, 1, 10498]]

## Calculating the number of births per month

In [54]:
def month_births(list_of_lists_data):
    """Create dict with keys as months, values as total births"""
    births_per_month = {}
    for row in list_of_lists_data:
        month = row[1]
        births = row[4]
        if month not in births_per_month:
            births_per_month[month] = births
        else:
            births_per_month[month] += births
    return births_per_month

In [55]:
cdc_month_births = month_births(cdc_list)
cdc_month_births

{1: 3232517,
 2: 3018140,
 3: 3322069,
 4: 3185314,
 5: 3350907,
 6: 3296530,
 7: 3498783,
 8: 3525858,
 9: 3439698,
 10: 3378814,
 11: 3171647,
 12: 3301860}

## Calculating the number of births per day of the week

In [56]:
def dow_births(list_of_lists_data):
    """Create dict with keys as days, values as total births"""
    births_per_day = {}
    for row in list_of_lists_data:
        day = row[3]
        births = row[4]
        if day not in births_per_day:
            births_per_day[day] = births
        else:
            births_per_day[day] += births
    return births_per_day

In [57]:
cdc_day_births = dow_births(cdc_list)
cdc_day_births

{1: 5789166,
 2: 6446196,
 3: 6322855,
 4: 6288429,
 5: 6233657,
 6: 4562111,
 7: 4079723}

## Creating a general function to calculate birth frequency for a column

In [58]:
def calc_counts(list_of_lists_data, column):
    """Create dict with keys as unique column entries, values as total births"""
    birth_freq = {}
    for row in list_of_lists_data:
        calc = row[column]
        births = row[4]
        if calc not in birth_freq:
            birth_freq[calc] = births
        else: 
            birth_freq[calc] += births
    return birth_freq    

## Creating a function to show some basic statistics for a column

In [59]:
def basic_stats(frequency_dict):
    """ a) create a list of the dict's keys and values; 
        b) find max and min pairs along with average"""  
    values = list(frequency_dict.values())
    keys = list(frequency_dict.keys())
    max_pair = (keys[values.index(max(values))], max(values))
    min_pair = (keys[values.index(min(values))], min(values))
    avg = sum(values) / len(values)
    message = """
          Maximum: {max_pair}
          Minimum: {min_pair}
          Average: {average}
          """
    print (message.format(max_pair = max_pair, min_pair = min_pair, average = avg))

In [62]:
# year birth statistics
cdc_year_births = calc_counts(cdc_list, 0)
year_birth_statistics = basic_stats(cdc_year_births)
cdc_year_births, year_birth_statistics


          Maximum: (2003, 4089950)
          Minimum: (1997, 3880894)
          Average: 3972213.7
          


({1994: 3952767,
  1995: 3899589,
  1996: 3891494,
  1997: 3880894,
  1998: 3941553,
  1999: 3959417,
  2000: 4058814,
  2001: 4025933,
  2002: 4021726,
  2003: 4089950},
 None)

In [63]:
# month birth statistics
cdc_month_births = calc_counts(cdc_list, 1)
month_birth_statistics = basic_stats(cdc_month_births)
cdc_month_births, month_birth_statistics


          Maximum: (8, 3525858)
          Minimum: (2, 3018140)
          Average: 3310178.0833333335
          


({1: 3232517,
  2: 3018140,
  3: 3322069,
  4: 3185314,
  5: 3350907,
  6: 3296530,
  7: 3498783,
  8: 3525858,
  9: 3439698,
  10: 3378814,
  11: 3171647,
  12: 3301860},
 None)

In [64]:
# day of month statistics
cdc_dom_births = calc_counts(cdc_list, 2)
dom_birth_statistics = basic_stats(cdc_dom_births)
cdc_dom_births, dom_birth_statistics


          Maximum: (18, 1326855)
          Minimum: (31, 746696)
          Average: 1281359.2580645161
          


({1: 1276557,
  2: 1288739,
  3: 1304499,
  4: 1288154,
  5: 1299953,
  6: 1304474,
  7: 1310459,
  8: 1312297,
  9: 1303292,
  10: 1320764,
  11: 1314361,
  12: 1318437,
  13: 1277684,
  14: 1320153,
  15: 1319171,
  16: 1315192,
  17: 1324953,
  18: 1326855,
  19: 1318727,
  20: 1324821,
  21: 1322897,
  22: 1317381,
  23: 1293290,
  24: 1288083,
  25: 1272116,
  26: 1284796,
  27: 1294395,
  28: 1307685,
  29: 1223161,
  30: 1202095,
  31: 746696},
 None)

In [65]:
# day of week birth statistics
cdc_dow_births = calc_counts(cdc_list, 3)
dow_birth_statistics = basic_stats(cdc_dow_births)
cdc_dow_births, dow_birth_statistics


          Maximum: (2, 6446196)
          Minimum: (7, 4079723)
          Average: 5674591.0
          


({1: 5789166,
  2: 6446196,
  3: 6322855,
  4: 6288429,
  5: 6233657,
  6: 4562111,
  7: 4079723},
 None)