# U.S. Births
The dataset contains the following columns:
+ `[0] year`: Year (1994 to 2003)
+ `[1] month`: Month (1 to 12)
+ `[2] date_of_month`: Day number of the month (1 to 31)
+ `[3] day_of_week`: Day of week (1 to 7)
+ `[4] births`: Number of births that day

In [1]:
f = open("US_births_1994-2003_CDC_NCHS.csv", "r")
text = f.read()
lines = text.split("\n")
lines[0:10]

['year,month,date_of_month,day_of_week,births',
 '1994,1,1,6,8096',
 '1994,1,2,7,7772',
 '1994,1,3,1,10142',
 '1994,1,4,2,11248',
 '1994,1,5,3,11053',
 '1994,1,6,4,11406',
 '1994,1,7,5,11251',
 '1994,1,8,6,8653',
 '1994,1,9,7,7910']

In [2]:
def read_csv(file_name):
    f = open(file_name, "r")
    text = f.read()
    lines = text.split("\n")
    string_list = lines[1:]
    final_list = []
    for each in string_list:
        int_fields = []
        string_fields = each.split(",")
        for field in string_fields:
            int_value = int(field)
            int_fields.append(int_value)
        final_list.append(int_fields)
    return final_list

In [3]:
cdc_list = read_csv("US_births_1994-2003_CDC_NCHS.csv")
cdc_list[0:10]

[[1994, 1, 1, 6, 8096],
 [1994, 1, 2, 7, 7772],
 [1994, 1, 3, 1, 10142],
 [1994, 1, 4, 2, 11248],
 [1994, 1, 5, 3, 11053],
 [1994, 1, 6, 4, 11406],
 [1994, 1, 7, 5, 11251],
 [1994, 1, 8, 6, 8653],
 [1994, 1, 9, 7, 7910],
 [1994, 1, 10, 1, 10498]]

In [4]:
def month_births(input_lst):
    births_per_month = {}
    for each in input_lst:
        month = each[1]
        births = each[4]
        if month in births_per_month:
            births_per_month[month] += births
        else:
            births_per_month[month] = births
    return births_per_month

In [5]:
cdc_month_births = month_births(cdc_list)
cdc_month_births

{1: 3232517,
 2: 3018140,
 3: 3322069,
 4: 3185314,
 5: 3350907,
 6: 3296530,
 7: 3498783,
 8: 3525858,
 9: 3439698,
 10: 3378814,
 11: 3171647,
 12: 3301860}

In [6]:
def dow_births(input_lst):
    births_per_dow = {}
    for each in input_lst:
        day_of_week = each[3]
        births = each[4]
        if day_of_week in births_per_dow:
            births_per_dow[day_of_week] += births
        else:
            births_per_dow[day_of_week] = births
    return births_per_dow

In [7]:
cdc_day_births = dow_births(cdc_list)
cdc_day_births

{1: 5789166,
 2: 6446196,
 3: 6322855,
 4: 6288429,
 5: 6233657,
 6: 4562111,
 7: 4079723}

In [8]:
def calc_counts(data, column):
    calc_dict = {}
    for each in data:
        col = each[column]
        births = each[4]
        if col in calc_dict:
            calc_dict[col] += births
        else:
            calc_dict[col] = births
    return calc_dict

In [9]:
cdc_year_births = calc_counts(cdc_list, 0)
cdc_year_births

{1994: 3952767,
 1995: 3899589,
 1996: 3891494,
 1997: 3880894,
 1998: 3941553,
 1999: 3959417,
 2000: 4058814,
 2001: 4025933,
 2002: 4021726,
 2003: 4089950}

In [10]:
cdc_month_births = calc_counts(cdc_list, 1)
cdc_month_births

{1: 3232517,
 2: 3018140,
 3: 3322069,
 4: 3185314,
 5: 3350907,
 6: 3296530,
 7: 3498783,
 8: 3525858,
 9: 3439698,
 10: 3378814,
 11: 3171647,
 12: 3301860}

In [11]:
cdc_dom_births = calc_counts(cdc_list, 2)
cdc_dom_births

{1: 1276557,
 2: 1288739,
 3: 1304499,
 4: 1288154,
 5: 1299953,
 6: 1304474,
 7: 1310459,
 8: 1312297,
 9: 1303292,
 10: 1320764,
 11: 1314361,
 12: 1318437,
 13: 1277684,
 14: 1320153,
 15: 1319171,
 16: 1315192,
 17: 1324953,
 18: 1326855,
 19: 1318727,
 20: 1324821,
 21: 1322897,
 22: 1317381,
 23: 1293290,
 24: 1288083,
 25: 1272116,
 26: 1284796,
 27: 1294395,
 28: 1307685,
 29: 1223161,
 30: 1202095,
 31: 746696}

In [12]:
cdc_dow_births = calc_counts(cdc_list, 3)
cdc_dow_births

{1: 5789166,
 2: 6446196,
 3: 6322855,
 4: 6288429,
 5: 6233657,
 6: 4562111,
 7: 4079723}

In [28]:
def min_max(input_dict):
    min_key = list(input_dict.keys())[0]
    min_value = input_dict[min_key]
    max_key = min_key
    max_value = min_value
    
    for dict_key, dict_val in input_dict.items():
        if dict_val < min_value:
            min_key = dict_key
            min_value = dict_val
        if dict_val > max_value:
            max_key = dict_key
            max_value = dict_val
    result = {
        "min": {min_key: min_value},
        "max": {max_key: max_value}
    }
    return result

In [29]:
print(min_max(cdc_dow_births))

{'max': {2: 6446196}, 'min': {7: 4079723}}


In [30]:
print(min_max(cdc_dom_births))

{'max': {18: 1326855}, 'min': {31: 746696}}


In [31]:
print(min_max(cdc_month_births))

{'max': {8: 3525858}, 'min': {2: 3018140}}


In [32]:
print(min_max(cdc_year_births))

{'max': {2003: 4089950}, 'min': {1997: 3880894}}


In [40]:
def trend_over_years(data, column, value):
    if column == 0:
        print('Choose another column to see birth trends over the years')
        return
    columns = ['year', 'month', 'date_of_month', 'day_of_week', 'births']
    trend_dict = {}
    trend_dict[columns[column]] = value
    births_over_year = {}
    for each in data:
        if each[column] == value:
            year = each[0]
            births = each[4]
            if year in births_over_year:
                births_over_year[year] += births
            else:
                births_over_year[year] = births
    trend_dict["trend"] = births_over_year
    return trend_dict

In [42]:
saturday_births = trend_over_years(cdc_list, 3, 7)
saturday_births

{'day_of_week': 7,
 'trend': {1994: 428752,
  1995: 425790,
  1996: 413336,
  1997: 404478,
  1998: 407129,
  1999: 401991,
  2000: 416454,
  2001: 397119,
  2002: 391375,
  2003: 393299}}

In [45]:
march_births = trend_over_years(cdc_list, 1, 3)
march_births

{'month': 3,
 'trend': {1994: 339736,
  1995: 328503,
  1996: 322581,
  1997: 321212,
  1998: 329436,
  1999: 332939,
  2000: 340553,
  2001: 338684,
  2002: 331505,
  2003: 336920}}

In [46]:
twenty_seventh_births = trend_over_years(cdc_list, 2, 27)
twenty_seventh_births

{'date_of_month': 27,
 'trend': {1994: 127901,
  1995: 133076,
  1996: 126046,
  1997: 121094,
  1998: 128560,
  1999: 130052,
  2000: 135697,
  2001: 137393,
  2002: 128334,
  2003: 126242}}