# GOAL1
Create a lazy iterator that will return a named tuple of the data in each row. The data types should be appropriate - i.e. if the column is a date, you should be storing dates in the named tuple, if the field is an integer, then it should be stored as an integer, etc.

In [3]:
# import packages
from collections import namedtuple
import csv
from datetime import datetime

In [52]:
# upload "nyc_parking_tickets.csv" into colab
from google.colab import files
uploaded = files.upload()

Saving nyc_parking_tickets_extract2.csv to nyc_parking_tickets_extract2.csv


In [74]:
file_name = "nyc_parking_tickets_extract2.csv"

In [80]:
# Print Column Names
with open(file_name) as f:
    header = next(f)

column_header = [col.strip(" ") for col in header.strip("\n").split(",")]
column_header

['SummonsNumber',
 'PlateID',
 'RegistrationState',
 'PlateType',
 'IssueDate',
 'ViolationCode',
 'VehicleBodyType',
 'VehicleMake',
 'ViolationDescription']

### Reading file using lazy iterator

In [102]:
def read_csv_file(file_name):
    with open(file_name) as f:
        next(f)
        yield from f

In [81]:
print(column_header)
raw_data = read_csv_file(file_name)
for _ in range(5):
    print(next(raw_data))

['SummonsNumber', 'PlateID', 'RegistrationState', 'PlateType', 'IssueDate', 'ViolationCode', 'VehicleBodyType', 'VehicleMake', 'ViolationDescription']
4006478550,VAD7274,VA,PAS,10/5/2016,5,4D,BMW,BUS LANE VIOLATION

4006462396,22834JK,NY,COM,9/30/2016,5,VAN,CHEVR,BUS LANE VIOLATION

4007117810,21791MG,NY,COM,4/10/2017,5,VAN,DODGE,BUS LANE VIOLATION

4006265037,FZX9232,NY,PAS,8/23/2016,5,SUBN,FORD,BUS LANE VIOLATION

4006535600,N203399C,NY,OMT,10/19/2016,5,SUBN,FORD,BUS LANE VIOLATION



In [85]:
def parse_int_val(value,default=None):
    try:
        return int(value)
    except ValueError:
        return default

In [91]:
from datetime import datetime

def parse_date_val(value,default=None):
    date_format='%m/%d/%Y'
    try:
        return datetime.strptime(value, date_format).date()
    except ValueError:
        return default

In [127]:
def parse_str_val(value,default=None):
  try:
    val = str(value).strip()
  except ValueError:
    val = default
  return val

In [128]:
column_values_parsers = (parse_int_val,  
                  parse_str_val, 
                  parse_str_val,  
                  parse_str_val, 
                  parse_date_val,  
                  parse_int_val,  
                  parse_str_val, 
                  parse_str_val, 
                  parse_str_val 
                 )

In [129]:
# row parser
def parse_row(row):
    
    fields = row.strip('\n').split(',')
    parsed_data = (func(field) 
                   for func, field in zip(column_parsers, fields))
    return parsed_data

In [130]:
rows = read_csv_file(file_name)

# create generator
def parse_rows_gen():
    for row in read_csv_file(file_name):
        yield zip(column_header,parse_row(row))

In [131]:
parsed_rows = parse_rows_gen()
for i in range(5):
    print(list(next(parsed_rows)))

[('SummonsNumber', 4006478550), ('PlateID', 'VAD7274'), ('RegistrationState', 'VA'), ('PlateType', 'PAS'), ('IssueDate', datetime.date(2016, 10, 5)), ('ViolationCode', 5), ('VehicleBodyType', '4D'), ('VehicleMake', 'BMW'), ('ViolationDescription', 'BUS LANE VIOLATION')]
[('SummonsNumber', 4006462396), ('PlateID', '22834JK'), ('RegistrationState', 'NY'), ('PlateType', 'COM'), ('IssueDate', datetime.date(2016, 9, 30)), ('ViolationCode', 5), ('VehicleBodyType', 'VAN'), ('VehicleMake', 'CHEVR'), ('ViolationDescription', 'BUS LANE VIOLATION')]
[('SummonsNumber', 4007117810), ('PlateID', '21791MG'), ('RegistrationState', 'NY'), ('PlateType', 'COM'), ('IssueDate', datetime.date(2017, 4, 10)), ('ViolationCode', 5), ('VehicleBodyType', 'VAN'), ('VehicleMake', 'DODGE'), ('ViolationDescription', 'BUS LANE VIOLATION')]
[('SummonsNumber', 4006265037), ('PlateID', 'FZX9232'), ('RegistrationState', 'NY'), ('PlateType', 'PAS'), ('IssueDate', datetime.date(2016, 8, 23)), ('ViolationCode', 5), ('Vehicle


#GOAL 2 <br>
Calculate the number of violations by car make.

In [124]:
# index=7 is to get Vehicle Make
parsed_rows = parse_rows_gen()
for i in range(10):
    print(list(next(parsed_rows))[7])

('VehicleMake', 'BMW')
('VehicleMake', 'CHEVR')
('VehicleMake', 'DODGE')
('VehicleMake', 'FORD')
('VehicleMake', 'FORD')
('VehicleMake', 'FRUEH')
('VehicleMake', 'HONDA')
('VehicleMake', 'LINCO')
('VehicleMake', 'TOYOT')
('VehicleMake', 'TOYOT')


In [125]:
# holder for storing voilation make and number of violation as dict.
violations_counts= {}
for rows in parsed_rows:
    data = list(next(parsed_rows))[7]
    if data[1] in violations_counts:        
        violations_counts[data[1]] += 1
    else:
        violations_counts[data[1]] = 1

In [126]:
violations_counts

{'ACURA': 7,
 'AM/T': 1,
 'AUDI': 7,
 'BMW': 15,
 'BUICK': 3,
 'CADIL': 4,
 'CHEVR': 40,
 'CHRYS': 7,
 'DODGE': 21,
 'FIR': 1,
 'FORD': 51,
 'FRUEH': 21,
 'GMC': 18,
 'HIN': 5,
 'HINO': 1,
 'HONDA': 51,
 'HYUND': 18,
 'INFIN': 6,
 'INTER': 11,
 'ISUZU': 4,
 'JAGUA': 3,
 'JEEP': 11,
 'KENWO': 3,
 'KIA': 3,
 'LEXUS': 13,
 'LINCO': 6,
 'MAZDA': 2,
 'ME/BE': 18,
 'MERCU': 3,
 'MI/F': 1,
 'MINI': 1,
 'MITSU': 4,
 'NISSA': 34,
 'NS/OT': 9,
 None: 1,
 'OLDSM': 1,
 'PETER': 1,
 'PLYMO': 1,
 'PORSC': 1,
 'ROVER': 2,
 'SAAB': 1,
 'SATUR': 2,
 'SCION': 1,
 'SMART': 2,
 'SPRI': 1,
 'STAR': 1,
 'SUBAR': 10,
 'TOYOT': 53,
 'UD': 1,
 'UPS': 1,
 'VOLKS': 4,
 'VOLVO': 6,
 'WORKH': 1,
 'YAMAH': 1}