In [1]:
from collections import namedtuple
from datetime import datetime
import csv
import re

In [2]:
# Function to sanitize column names to make them valid namedtuple fields
def sanitize_column_name(name):
    """
    Sanitizes the column name by replacing spaces and invalid characters with underscores.
    
    Args:
        name (str): The original column name.
    
    Returns:
        str: The sanitized column name.
    """
    return re.sub(r'\W|^(?=\d)', '_', name)  # Replace non-alphanumeric characters with underscores and handle leading digits

In [3]:
def type_cast_and_clean_data(data: list, cast_type: list):
    """
    Lazily casts each element in the `data` list to the corresponding type specified in the `cast_type` list.
    If data not exists then it yields 'N/A'.

    This function takes two lists:
    - `data`: A list of values to be cast.
    - `cast_type`: A list of types or casting functions (such as `int`, `str`, etc.).
    
    It applies the corresponding type to each value in `data` and yields the result.

    Parameters:
    -----------
    data : list
        A list containing the raw data that needs to be cast to specific types.
    cast_type : list
        A list containing the types or functions (like `int`, `str`, etc.) that will be used
        to cast the corresponding elements in `data`.

    Yields:
    -------
    element
        Each casted element from the `data` list as per the corresponding type in `cast_type`.
    """
    for value, cast_func in zip(data, cast_type):
        if value != '':
            yield cast_func(value)
        else:
            yield 'N/A'

In [4]:
# Lazy function to process car data line by line
def lazy_car_data_iterator(file_path):
    """
    Lazily reads the CSV file containing parking ticket data, processes each row, and yields a namedtuple.

    Args:
        file_path (str): The path to the CSV file.
    
    Yields:
        ind_car_data: A namedtuple instance containing the data for each car, cast to the appropriate types.
    """
    with open(file_path) as file:
        # Read the header and sanitize the column names
        header_info = next(file).strip('\n').split(',')
        sanitized_header = [sanitize_column_name(col) for col in header_info]  # Sanitize column names
        
        # Print the sanitized header to verify field names
        # print("Sanitized Header Names:", sanitized_header)
        
        # Create the namedtuple class with sanitized column names
        ind_car_data = namedtuple('ind_car_data', sanitized_header)
        
        # Define the data types for casting
        cast_type = [int, str, str, str, str, int, str, str, str]

        # Process each line lazily
        for line in file:
            line_data = list(type_cast_and_clean_data(line.strip("\n").split(','), cast_type))
            car = ind_car_data(*line_data)
            yield car

In [5]:
# Lazy function to count vehicle manufacturers
def lazy_car_make_violations_count(file_path):
    """
    Lazily counts the number of occurrences of each vehicle manufacturer in the CSV file.

    Args:
        file_path (str): The path to the CSV file.
    
    Yields:
        tuple: A tuple of the vehicle manufacturer and its count.
    """
    make_match = {}

    # Iterate over the data lazily
    for car in lazy_car_data_iterator(file_path):
        # Update the vehicle manufacturer count
        if car.Vehicle_Make in make_match:
            make_match[car.Vehicle_Make] += 1
        else:
            make_match[car.Vehicle_Make] = 1

        # Yield the updated manufacturer count lazily
        yield car.Vehicle_Make, make_match[car.Vehicle_Make]

In [6]:
file_path = 'nyc_parking_tickets_extract-1.csv'

In [7]:
# Dictionary to store vehicle make counts
car_make_violations = {}

# Process and count vehicle manufacturers lazily
for vehicle_make, count in lazy_car_make_violations_count(file_path):
    car_make_violations[vehicle_make] = count  # Store the count in the dictionary

# Sort the dictionary by the count (values) in descending order
sorted_vehicle_mfgr = dict(sorted(car_make_violations.items(), key=lambda x: x[1], reverse=True))

# Print the sorted vehicle make counts
for vehicle_make, count in sorted_vehicle_mfgr.items():
    print(f"{vehicle_make}: {count}")

TOYOT: 112
HONDA: 106
FORD: 104
CHEVR: 76
NISSA: 70
DODGE: 45
FRUEH: 44
ME/BE: 38
GMC: 35
HYUND: 35
BMW: 34
LEXUS: 26
INTER: 25
JEEP: 22
NS/OT: 18
SUBAR: 18
INFIN: 13
LINCO: 12
CHRYS: 12
ACURA: 12
AUDI: 12
VOLVO: 12
MITSU: 11
ISUZU: 10
CADIL: 9
KIA: 8
VOLKS: 8
HIN: 6
KENWO: 5
N/A: 5
ROVER: 5
BUICK: 5
MAZDA: 5
MERCU: 4
JAGUA: 3
SMART: 3
PORSC: 3
WORKH: 2
SATUR: 2
SCION: 2
SAAB: 2
HINO: 2
FIR: 1
OLDSM: 1
PETER: 1
CITRO: 1
GEO: 1
YAMAH: 1
BSA: 1
MINI: 1
PONTI: 1
SPRI: 1
PLYMO: 1
UPS: 1
FIAT: 1
UD: 1
UTILI: 1
GMCQ: 1
STAR: 1
AM/T: 1
MI/F: 1
