## Working with US birth dataset
The dataset contains the following columns:
- `year` : Year (1994 to 2003).
- `month`: Month (1 to 12).
- `date_of_month`: Day number of the month (1 to 31).
- `day_of_week`: Day of week (1 to 7).
- `births`: Number of births that day.

In [8]:
US_births = open("US_births_1994-2003_CDC_NCHS.csv", 'r').read()
US_births = US_births.split("\n")
US_births[0:10]

['year,month,date_of_month,day_of_week,births',
 '1994,1,1,6,8096',
 '1994,1,2,7,7772',
 '1994,1,3,1,10142',
 '1994,1,4,2,11248',
 '1994,1,5,3,11053',
 '1994,1,6,4,11406',
 '1994,1,7,5,11251',
 '1994,1,8,6,8653',
 '1994,1,9,7,7910']

In [5]:
def read_csv(file):
    string = open(file).read()
    string_list = string.split("\n")
    string_list = string_list[1:len(string_list)]
    final_list = []
    for it in string_list:
        int_fields = []
        string_fields = it.split(",")
        for num in string_fields:
            int_fields.append(int(num))
        final_list.append(int_fields)
    return final_list

cdc_list = read_csv("US_births_1994-2003_CDC_NCHS.csv")
cdc_list[0:10]

[[1994, 1, 1, 6, 8096],
 [1994, 1, 2, 7, 7772],
 [1994, 1, 3, 1, 10142],
 [1994, 1, 4, 2, 11248],
 [1994, 1, 5, 3, 11053],
 [1994, 1, 6, 4, 11406],
 [1994, 1, 7, 5, 11251],
 [1994, 1, 8, 6, 8653],
 [1994, 1, 9, 7, 7910],
 [1994, 1, 10, 1, 10498]]

In [7]:
def month_births(data_list):
    births_per_month = {}
    for it in data_list:
        month = it[1]
        birth = it[4]
        if month in births_per_month:
            births_per_month[month] = births_per_month[month] + birth
        else:
            births_per_month[month] = birth
    return births_per_month

cdc_month_births = month_births(cdc_list)
cdc_month_births

{1: 3232517,
 2: 3018140,
 3: 3322069,
 4: 3185314,
 5: 3350907,
 6: 3296530,
 7: 3498783,
 8: 3525858,
 9: 3439698,
 10: 3378814,
 11: 3171647,
 12: 3301860}

In [9]:
def dow_births(data_list):
    day_of_week = {}
    for it in data_list:
        day = it[3]
        birth = it[4]
        if day in day_of_week:
            day_of_week[day] = day_of_week[day] + birth
        else:
            day_of_week[day] = birth
    return day_of_week

cdc_day_births = dow_births(cdc_list)
cdc_day_births

{1: 5789166,
 2: 6446196,
 3: 6322855,
 4: 6288429,
 5: 6233657,
 6: 4562111,
 7: 4079723}

In [14]:
def calc_counts(data, column):
    lib = {}
    for it in data:
        position = it[int(column)]
        birth = it[4]
        if position in lib:
            lib[position] = lib[position] + birth
        else:
            lib[position] = birth
    return lib

cdc_year_births = calc_counts(cdc_list, 0)
cdc_month_births = calc_counts(cdc_list, 1)
cdc_dom_births = calc_counts(cdc_list, 2)
cdc_dow_births = calc_counts(cdc_list, 3)

In [15]:
def min_max(lib):
    minv = 0
    maxv = 0
    for it in lib:
        if lib[it] > maxv:
            maxv = lib[it]
        if lib[it] < minv:
            minv = lib[it]
    return minv, maxv

In [78]:
def trend(data, column, val):
    lib = {}
    
    for row in data:
        year = row[0]
        test = row[int(column)]
        birth = row[4]
        if test == int(val):
            if year in lib:
                lib[year] = lib[year] + birth
            else:
                lib[year] = birth
    
    val_births = 0
    for key in lib:
        val = lib[key]
        if val_births == 0:
            print("Growth of birth in " + str(key) + " is not available.")
        else:
            if val > val_births:
                print("Growth of birth in " + str(key) + " is increasing")
            elif val < val_births: 
                print("Growth of birth in " + str(key) + " is decreasing")
            elif val == val_births:
                print("Growth of birth in " + str(key) + " is the same")
        val_births = val
        
    return lib

The number of births on Saturday change each year between 1994 and 2003.

In [80]:
#the number of births on Saturday change each year between 1994 and 2003
a = trend(cdc_list, 3, 6)
a

Growth of birth in 2000 is not available.
Growth of birth in 2001 is decreasing
Growth of birth in 2002 is decreasing
Growth of birth in 2003 is increasing
Growth of birth in 1994 is increasing
Growth of birth in 1995 is decreasing
Growth of birth in 1996 is decreasing
Growth of birth in 1997 is decreasing
Growth of birth in 1998 is increasing
Growth of birth in 1999 is decreasing


{1994: 474732,
 1995: 459580,
 1996: 456261,
 1997: 450840,
 1998: 453776,
 1999: 449985,
 2000: 469794,
 2001: 453928,
 2002: 445770,
 2003: 447445}