# Reading Data

In [2]:
import csv
import collections
import dateutil.parser
import urllib
import contextlib
import matplotlib.pyplot as plt
import numpy as np



In [3]:
first_key = ("A002","R051","02-00-00","59 ST")
other_key = ('N134', 'R385', '00-00-00', 'ROCKAWAY BLVD')

In [4]:
def read_data(data):
    dict_list = []
    with open(data, 'rb') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            dict_list.append(row)
    return dict_list

In [5]:
dict_list = read_data('turnstile_160402.txt')
print dict_list[1:3]

[{'DIVISION': 'BMT', 'LINENAME': 'NQR456', 'EXITS                                                               ': '0001893282                                             ', 'ENTRIES': '0005595746', 'C/A': 'A002', 'STATION': '59 ST', 'TIME': '04:00:00', 'DATE': '03/26/2016', 'SCP': '02-00-00', 'UNIT': 'R051', 'DESC': 'REGULAR'}, {'DIVISION': 'BMT', 'LINENAME': 'NQR456', 'EXITS                                                               ': '0001893282                                             ', 'ENTRIES': '0005595746', 'C/A': 'A002', 'STATION': '59 ST', 'TIME': '08:00:00', 'DATE': '03/26/2016', 'SCP': '02-00-00', 'UNIT': 'R051', 'DESC': 'REGULAR'}]


In [3]:
def read_url(url):
    dict_list = []
    with contextlib.closing(urllib.urlopen(url)) as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            dict_list.append(row)
    return dict_list

In [4]:
url = 'http://web.mta.info/developers/data/nyct/turnstile/turnstile_160402.txt'
dict_list = read_url(url)
print dict_list[1:3]

[{'DIVISION': 'BMT', 'LINENAME': 'NQR456', 'EXITS                                                               ': '0001893282                                             ', 'ENTRIES': '0005595746', 'C/A': 'A002', 'STATION': '59 ST', 'TIME': '04:00:00', 'DATE': '03/26/2016', 'SCP': '02-00-00', 'UNIT': 'R051', 'DESC': 'REGULAR'}, {'DIVISION': 'BMT', 'LINENAME': 'NQR456', 'EXITS                                                               ': '0001893282                                             ', 'ENTRIES': '0005595746', 'C/A': 'A002', 'STATION': '59 ST', 'TIME': '08:00:00', 'DATE': '03/26/2016', 'SCP': '02-00-00', 'UNIT': 'R051', 'DESC': 'REGULAR'}]


In [6]:
import json

if json.dumps(dict_listdata) == json.dumps(dict_list):
    print("Equal")

Equal


# Challenge 1

In [5]:
def create_dict1(dict_list):
    new_dict = collections.defaultdict(list)
    for d in dict_list:
        key = ( d['C/A'], d['UNIT'], d['SCP'], d['STATION'] )
        value = [ d['LINENAME'], d['DIVISION'], d['DATE'], d['TIME'], d['DESC'], d['ENTRIES'], 
         d['EXITS                                                               '].strip()]
        new_dict[key].append(value) 
    return new_dict


In [6]:
my_dict = create_dict1(dict_list)
print(my_dict.keys()[0])
#print(my_dict[my_dict.keys()[0]])
print my_dict[first_key]
print(len(my_dict[("A002","R051","02-00-00","59 ST")]))

('N134', 'R385', '00-00-00', 'ROCKAWAY BLVD')
[['NQR456', 'BMT', '03/26/2016', '00:00:00', 'REGULAR', '0005595746', '0001893277'], ['NQR456', 'BMT', '03/26/2016', '04:00:00', 'REGULAR', '0005595746', '0001893282'], ['NQR456', 'BMT', '03/26/2016', '08:00:00', 'REGULAR', '0005595746', '0001893282'], ['NQR456', 'BMT', '03/26/2016', '12:00:00', 'REGULAR', '0005595746', '0001893282'], ['NQR456', 'BMT', '03/26/2016', '16:00:00', 'REGULAR', '0005595746', '0001893282'], ['NQR456', 'BMT', '03/26/2016', '20:00:00', 'REGULAR', '0005595746', '0001893282'], ['NQR456', 'BMT', '03/27/2016', '00:00:00', 'REGULAR', '0005595746', '0001893282'], ['NQR456', 'BMT', '03/27/2016', '04:00:00', 'REGULAR', '0005595746', '0001893282'], ['NQR456', 'BMT', '03/27/2016', '08:00:00', 'REGULAR', '0005595746', '0001893282'], ['NQR456', 'BMT', '03/27/2016', '12:00:00', 'REGULAR', '0005595746', '0001893282'], ['NQR456', 'BMT', '03/27/2016', '16:00:00', 'REGULAR', '0005595746', '0001893282'], ['NQR456', 'BMT', '03/27/2016

# Challenge 2

In [48]:
def create_dict2(dict_list):
    new_dict = collections.defaultdict(list)
    for d in dict_list:
        key = ( d['C/A'], d['UNIT'], d['SCP'], d['STATION'] )
        dt = d['DATE'] + " " + d['TIME']
        dt = dateutil.parser.parse(dt)
        value = [ dt, int(d['ENTRIES']) ]
        new_dict[key].append(value) 
    for key in new_dict:
        new_dict[key].sort(key=lambda ls: ls[0])
        for i in range(len(new_dict[key])-1,0,-1):
            new_dict[key][i][1] -= new_dict[key][i-1][1]
        new_dict[key][0][1] = 0
    return new_dict

In [49]:
my_dict2 = create_dict2(dict_list)
print(my_dict2.keys()[0])
print(my_dict2[my_dict2.keys()[0]])

('N134', 'R385', '00-00-00', 'ROCKAWAY BLVD')
[[datetime.datetime(2016, 3, 26, 1, 0), 0], [datetime.datetime(2016, 3, 26, 5, 0), 13], [datetime.datetime(2016, 3, 26, 9, 0), 140], [datetime.datetime(2016, 3, 26, 13, 0), 189], [datetime.datetime(2016, 3, 26, 17, 0), 208], [datetime.datetime(2016, 3, 26, 21, 0), 174], [datetime.datetime(2016, 3, 27, 1, 0), 98], [datetime.datetime(2016, 3, 27, 5, 0), 9], [datetime.datetime(2016, 3, 27, 9, 0), 95], [datetime.datetime(2016, 3, 27, 13, 0), 125], [datetime.datetime(2016, 3, 27, 17, 0), 133], [datetime.datetime(2016, 3, 27, 21, 0), 114], [datetime.datetime(2016, 3, 28, 1, 0), 65], [datetime.datetime(2016, 3, 28, 5, 0), 19], [datetime.datetime(2016, 3, 28, 9, 0), 595], [datetime.datetime(2016, 3, 28, 13, 0), 234], [datetime.datetime(2016, 3, 28, 17, 0), 270], [datetime.datetime(2016, 3, 28, 21, 0), 202], [datetime.datetime(2016, 3, 29, 1, 0), 80], [datetime.datetime(2016, 3, 29, 5, 0), 26], [datetime.datetime(2016, 3, 29, 9, 0), 670], [datetime.

In [24]:
#print my_dict2[first_key]

# Challenge 3

In [56]:
def create_dict3(dict2):
    dict3 = collections.defaultdict(list)
    for key in dict2:
        #key = ('N134', 'R385', '00-00-00', 'ROCKAWAY BLVD')
        l = []
        hist = collections.defaultdict(int)
        for item in dict2[key]:
            d = item[0]
            hist[str(d.date())] += item[1]
        for k in hist:
            l.append([dateutil.parser.parse(k),hist[k]])
        dict3[key]=l
        del(l)
        del(hist)
    return dict3        

In [57]:
my_dict3 = create_dict3(my_dict2)
print(my_dict3.keys()[0])
print(my_dict3[my_dict3.keys()[0]])

('N134', 'R385', '00-00-00', 'ROCKAWAY BLVD')
[[datetime.datetime(2016, 3, 26, 0, 0), 724], [datetime.datetime(2016, 3, 27, 0, 0), 574], [datetime.datetime(2016, 4, 1, 0, 0), 1481], [datetime.datetime(2016, 3, 31, 0, 0), 1532], [datetime.datetime(2016, 3, 30, 0, 0), 1553], [datetime.datetime(2016, 3, 28, 0, 0), 1385], [datetime.datetime(2016, 3, 29, 0, 0), 1568]]
