# Additional Processing of Births Data

The dataset contains the following columns:

- year: Year (1994 to 2003).
- month: Month (1 to 12).
- date_of_month: Day number of the month (1 to 31).
- day_of_week: Day of week (1 to 7).
- births: Number of births that day.

In [18]:
def read_csv(file):
    """This function takes in the filename of the csv and returns
       a formatted array of the data with the header removed"""
    
    f = open(file, "r")
    data = f.read()
    string_list = data.split("\n")
    string_list = string_list[1:]
    final_list = []
    for el in string_list:
        int_fields = []
        string_fields = el.split(",")
        int_fields = [int(num) for num in string_fields]
        final_list.append(int_fields)
        
    return final_list

cdc_list = read_csv("US_births_1994-2003_CDC_NCHS.csv")
cdc_list[:10]

[[1994, 1, 1, 6, 8096],
 [1994, 1, 2, 7, 7772],
 [1994, 1, 3, 1, 10142],
 [1994, 1, 4, 2, 11248],
 [1994, 1, 5, 3, 11053],
 [1994, 1, 6, 4, 11406],
 [1994, 1, 7, 5, 11251],
 [1994, 1, 8, 6, 8653],
 [1994, 1, 9, 7, 7910],
 [1994, 1, 10, 1, 10498]]

# Calculating Number of Births Per Month

In [3]:
def month_births(lists):
    births_per_month = {}
    for list_el in lists:
        month = list_el[1]
        births = list_el[-1]
        if month in births_per_month:
            births_per_month[month] += births
        else:
            births_per_month[month] = births
    return births_per_month

cdc_month_births = month_births(cdc_list)
cdc_month_births

{1: 3232517,
 2: 3018140,
 3: 3322069,
 4: 3185314,
 5: 3350907,
 6: 3296530,
 7: 3498783,
 8: 3525858,
 9: 3439698,
 10: 3378814,
 11: 3171647,
 12: 3301860}

# Calculating Number of Births for Each Day of the Week

In [4]:
def dow_births(lists):
    births_per_week = {}
    for list_el in lists:
        week = list_el[3]
        births = list_el[-1]
        if week in births_per_week:
            births_per_week[week] += births
        else:
            births_per_week[week] = births
    return births_per_week

cdc_day_births = dow_births(cdc_list)
cdc_day_births

{1: 5789166,
 2: 6446196,
 3: 6322855,
 4: 6288429,
 5: 6233657,
 6: 4562111,
 7: 4079723}

# General Function to Calculate Births By Any Column

In [5]:
def calc_counts(data, column):
    births_per = {}
    for list_el in data:
        time = list_el[column]
        births = list_el[-1]
        if time in births_per:
            births_per[time] += births
        else:
            births_per[time] = births
    return births_per

cdc_year_births = calc_counts(cdc_list, 0)
cdc_month_births = calc_counts(cdc_list, 1)
cdc_dom_births = calc_counts(cdc_list, 2)
cdc_dow_births = calc_counts(cdc_list, 3)
cdc_year_births

{1994: 3952767,
 1995: 3899589,
 1996: 3891494,
 1997: 3880894,
 1998: 3941553,
 1999: 3959417,
 2000: 4058814,
 2001: 4025933,
 2002: 4021726,
 2003: 4089950}

In [11]:
cdc_month_births

{1: 3232517,
 2: 3018140,
 3: 3322069,
 4: 3185314,
 5: 3350907,
 6: 3296530,
 7: 3498783,
 8: 3525858,
 9: 3439698,
 10: 3378814,
 11: 3171647,
 12: 3301860}

In [12]:
cdc_dom_births

{1: 1276557,
 2: 1288739,
 3: 1304499,
 4: 1288154,
 5: 1299953,
 6: 1304474,
 7: 1310459,
 8: 1312297,
 9: 1303292,
 10: 1320764,
 11: 1314361,
 12: 1318437,
 13: 1277684,
 14: 1320153,
 15: 1319171,
 16: 1315192,
 17: 1324953,
 18: 1326855,
 19: 1318727,
 20: 1324821,
 21: 1322897,
 22: 1317381,
 23: 1293290,
 24: 1288083,
 25: 1272116,
 26: 1284796,
 27: 1294395,
 28: 1307685,
 29: 1223161,
 30: 1202095,
 31: 746696}

In [13]:
cdc_dow_births

{1: 5789166,
 2: 6446196,
 3: 6322855,
 4: 6288429,
 5: 6233657,
 6: 4562111,
 7: 4079723}

# Calculate Min and Max Values for Any Dictionary

In [14]:
def min_max(data):
    max = next (iter (data.values())) # gets first value from dict
    min = max
    
    for key, val in data.items():
        if val > max:
            max = val
        if val < min:
            min = val
    return min, max

year_min, year_max = min_max(cdc_year_births)

In [15]:
year_min

3880894

In [16]:
year_max

4089950

In [None]:
# def consecutive_year_birth_diff(birth_date):
# #     date_next_year = birth_date[:]
# #     date_next_year[0] += 1
# #     if date_next_year[0] > 2003:
# #         return None
#     curr_year = birth_date[0]
#     for i in range(len(data)):
#         if next_year == data[i]:
#             return None
    
#     curr_births = birth_date[-1]
    