Session 15
========================
Goal 1

Create a lazy iterator that will return a named tuple of the data in each row. The data types should be appropriate - i.e. if the column is a date, you should be storing dates in the named tuple, if the field is an integer, then it should be stored as an integer, etc.

Goal 2

Calculate the number of violations by car make.

Note:
Try to use lazy evaluation as much as possible - it may not always be possible though! That's OK, as long as it's kept to a minimum

In [None]:
from pathlib import Path

In [None]:
path = Path('./')

In [None]:
from collections import namedtuple
from datetime import datetime

In [None]:
def cast(data_type, value):
    if data_type == 'INT':
        return int(value)
    elif data_type == 'DATE':
        return datetime.strptime(value, '%m/%d/%Y')
    else:
        return str(value)

In [None]:
def cast_row(data_types, data_row):
    return [cast(data_type, value) 
            for data_type, value in zip(data_types, data_row)]

In [None]:
#Goal1
file = open('nyc_parking_tickets_extract-1.csv') 
file_iter = iter(file)
headers = next(file_iter).strip('\n').replace(' ','').split(',')
print(headers)
CarParking = namedtuple('CarParking',headers)
data_types = ['INT','STRING','STRING','STRING','DATE','INT','STRING','STRING','STRING']
cars = (CarParking(*cast_row(data_types, 
                          line.strip('\n').split(',')))
            for line in file_iter)

['SummonsNumber', 'PlateID', 'RegistrationState', 'PlateType', 'IssueDate', 'ViolationCode', 'VehicleBodyType', 'VehicleMake', 'ViolationDescription']


# Test Case-1 : Let us check if we created a Generator 

In [None]:
cars

<generator object <genexpr> at 0x000001FEFE612A48>

In [None]:
def gen_car():
   for each_car in cars:
      yield each_car
car_gen = gen_car()
car_gen

<generator object gen_car at 0x000001FEFE612BC8>

# Test Case-2 : Let's read iteratively

In [None]:
for element in car_gen:
    print(element)

CarParking(SummonsNumber=4006478550, PlateID='VAD7274', RegistrationState='VA', PlateType='PAS', IssueDate=datetime.datetime(2016, 10, 5, 0, 0), ViolationCode=5, VehicleBodyType='4D', VehicleMake='BMW', ViolationDescription='BUS LANE VIOLATION')
CarParking(SummonsNumber=4006462396, PlateID='22834JK', RegistrationState='NY', PlateType='COM', IssueDate=datetime.datetime(2016, 9, 30, 0, 0), ViolationCode=5, VehicleBodyType='VAN', VehicleMake='CHEVR', ViolationDescription='BUS LANE VIOLATION')
CarParking(SummonsNumber=4007117810, PlateID='21791MG', RegistrationState='NY', PlateType='COM', IssueDate=datetime.datetime(2017, 4, 10, 0, 0), ViolationCode=5, VehicleBodyType='VAN', VehicleMake='DODGE', ViolationDescription='BUS LANE VIOLATION')
CarParking(SummonsNumber=4006265037, PlateID='FZX9232', RegistrationState='NY', PlateType='PAS', IssueDate=datetime.datetime(2016, 8, 23, 0, 0), ViolationCode=5, VehicleBodyType='SUBN', VehicleMake='FORD', ViolationDescription='BUS LANE VIOLATION')
CarPark

# Test Case-3- Generator should get exhausted 

In [None]:
next(car_gen)

StopIteration: 

In [None]:
import collections, functools, operator

In [None]:
from collections import Counter

In [None]:
#goal2
def gen_car():
   for each_car in cars:
      yield each_car
car_gen = gen_car()
result_dict = collections.defaultdict(list)
for element in car_gen:
   result_dict[element.VehicleMake].append(element.ViolationCode)

violations_dict = {key:Counter(value_list) for key, value_list in result_dict.items()}


#  TestCase-4 : Count for each  Violation code field on car type

In [None]:
print(violations_dict)

{'BMW': Counter({36: 10, 21: 8, 37: 4, 38: 3, 40: 3, 14: 2, 5: 1, 20: 1, 70: 1, 71: 1}), 'CHEVR': Counter({21: 14, 36: 12, 38: 12, 14: 5, 71: 5, 46: 3, 82: 3, 13: 2, 19: 2, 20: 2, 37: 2, 40: 2, 74: 2, 5: 1, 7: 1, 16: 1, 17: 1, 24: 1, 31: 1, 42: 1, 69: 1, 80: 1, 98: 1}), 'DODGE': Counter({14: 7, 36: 7, 21: 4, 46: 4, 7: 3, 20: 3, 38: 3, 16: 2, 19: 2, 31: 2, 37: 2, 5: 1, 40: 1, 47: 1, 69: 1, 71: 1, 75: 1}), 'FORD': Counter({14: 15, 38: 15, 36: 13, 21: 12, 37: 7, 20: 5, 40: 5, 16: 4, 71: 4, 48: 3, 5: 2, 7: 2, 19: 2, 42: 2, 46: 2, 47: 2, 69: 2, 70: 2, 10: 1, 50: 1, 51: 1, 52: 1, 74: 1}), 'FRUEH': Counter({14: 8, 46: 8, 40: 4, 84: 4, 19: 3, 47: 3, 69: 3, 21: 2, 38: 2, 5: 1, 10: 1, 37: 1, 42: 1, 50: 1, 78: 1, 85: 1}), 'HONDA': Counter({21: 25, 36: 22, 38: 9, 20: 8, 7: 6, 14: 6, 37: 5, 40: 5, 46: 4, 71: 4, 19: 3, 48: 2, 50: 2, 74: 2, 5: 1, 17: 1, 91: 1}), 'LINCO': Counter({21: 3, 36: 3, 37: 3, 71: 2, 5: 1}), 'TOYOT': Counter({21: 27, 36: 15, 38: 15, 7: 6, 19: 6, 37: 6, 14: 5, 20: 5, 40: 5, 46:

#  TestCase-5 : Generator get exhausted 

In [None]:
next(car_gen)

StopIteration: 

# Count of the Car violation based on Car name as input 

In [None]:
def violation_car(car_make):
    for each_car in cars:
        if each_car.VehicleMake == car_make:
           yield each_car


# Test Case-6 : Count of Violation for BMW

In [None]:
print("The number of violation for the given car is", len(list(violation_car('BMW'))))

The number of violation for the given car is 0
