In [1]:
from collections import namedtuple, Counter
from datetime import datetime

In [2]:
class ParkingTickets:
    '''
    This is a class to lazily read contents of \'nyc_parking_tickets_extract-1.csv\' file
    '''
    def __init__(self, file_name):
        self.file_name = file_name

    def __iter__(self):
        return ParkingTickets.read_file(self.file_name)

    @staticmethod
    def read_file(file_name):
        with open(file_name) as f:
            #Yield helps reading the file contents only when retrieved but not the whole content at once
            yield from f

In [3]:
park_tkts = ParkingTickets('nyc_parking_tickets_extract-1.csv')
park_tkts

<__main__.ParkingTickets at 0x7f9ef7b170d0>

In [4]:
park_tkts_iter = iter(park_tkts)
park_tkts_iter

<generator object ParkingTickets.read_file at 0x7f9ef9b4add0>

In [5]:
#Retrieving next element
next(park_tkts_iter)

'Summons Number,Plate ID,Registration State,Plate Type,Issue Date,Violation Code,Vehicle Body Type,Vehicle Make,Violation Description\n'

In [6]:
#Retrieving next element
next(park_tkts_iter)

'4006478550,VAD7274,VA,PAS,10/5/2016,5,4D,BMW,BUS LANE VIOLATION\n'

In [7]:
def cast(data_type, value):
    '''
    Function used to cast the type of each field as required for later usage
    '''
    if data_type == 'INT':
        return int(value)
    elif data_type == 'DATE&TIME':
        return datetime.strptime(value, '%m/%d/%Y')
    elif data_type == 'BOOL':
        return bool(value)
    else:
        return str(value)

def cast_row(data_types, data_row):
    return [cast(data_type, value) for data_type, value in zip(data_types, data_row)]

In [8]:
class Tickets:
    '''
    This is a class to lazily read and extract violation information in a structured manner
    '''
    def __init__(self, ParkingTickets):
        self.tickets = ParkingTickets

    def __iter__(self):
        return Tickets.fetch_ticket(self.tickets)

    @staticmethod
    def fetch_ticket(park_tkts):
        data_types = ['INT', 'STRING', 'STRING', 'STRING', 'DATE&TIME', 'INT', 'STRING', 'STRING', 'STRING', 'BOOL']    
        for index, tkt in enumerate(park_tkts):
            if index == 0:
                #First line of the file contains headers
                headers = tkt.strip('\n').split(',')
                for index, header in enumerate(headers):
                    headers[index] = '_'.join(header.split(' '))
                #Added new column to easily count the violations for each car
                headers.append('Is_Violated')
                Car = namedtuple('Car', headers)
            else:
                #Violations data starts from second line onwards in the file
                data = tkt.strip('\n').split(',')
                #Adding data for the new column: True Indicates a violation happened, False otherwise
                if(data[-1] == ''):
                    data.append('False')
                else:
                    data.append('True')
                #Casting to maintain the type of each field of namedtuple
                data = cast_row(data_types, data)
                car = Car(*data)
                yield car

In [9]:
tickets = Tickets(park_tkts)
tickets

<__main__.Tickets at 0x7f9ef7b27e90>

In [10]:
tickets_iter = iter(tickets)
tickets_iter

<generator object Tickets.fetch_ticket at 0x7f9ef7b153d0>

In [11]:
#Retrieving next element
next(tickets_iter)

Car(Summons_Number=4006478550, Plate_ID='VAD7274', Registration_State='VA', Plate_Type='PAS', Issue_Date=datetime.datetime(2016, 10, 5, 0, 0), Violation_Code=5, Vehicle_Body_Type='4D', Vehicle_Make='BMW', Violation_Description='BUS LANE VIOLATION', Is_Violated=True)

In [12]:
#Retrieving next element
next(tickets_iter)

Car(Summons_Number=4006462396, Plate_ID='22834JK', Registration_State='NY', Plate_Type='COM', Issue_Date=datetime.datetime(2016, 9, 30, 0, 0), Violation_Code=5, Vehicle_Body_Type='VAN', Vehicle_Make='CHEVR', Violation_Description='BUS LANE VIOLATION', Is_Violated=True)

In [13]:
#Shows each car maker has how many violations
count_violations = Counter( (ticket.Vehicle_Make, ticket.Is_Violated) for ticket in tickets)
# True indicates the car maker has a parking violation
count_violations

Counter({('', True): 5,
         ('ACURA', True): 12,
         ('AM/T', True): 1,
         ('AUDI', True): 12,
         ('BMW', True): 34,
         ('BSA', True): 1,
         ('BUICK', True): 5,
         ('CADIL', True): 9,
         ('CHEVR', True): 76,
         ('CHRYS', True): 12,
         ('CITRO', True): 1,
         ('DODGE', True): 45,
         ('FIAT', True): 1,
         ('FIR', True): 1,
         ('FORD', True): 104,
         ('FRUEH', True): 44,
         ('GEO', True): 1,
         ('GMC', True): 35,
         ('GMCQ', True): 1,
         ('HIN', True): 6,
         ('HINO', True): 2,
         ('HONDA', True): 106,
         ('HYUND', True): 35,
         ('INFIN', True): 13,
         ('INTER', True): 25,
         ('ISUZU', True): 10,
         ('JAGUA', True): 3,
         ('JEEP', True): 22,
         ('KENWO', True): 5,
         ('KIA', True): 8,
         ('LEXUS', True): 26,
         ('LINCO', True): 12,
         ('MAZDA', True): 5,
         ('ME/BE', True): 38,
         ('MERCU', T