In [20]:
from collections import namedtuple
from datetime import datetime
import pandas as pd

In [33]:
field_names=['Summons_Number',
 'Plate_ID',
 'Registration_State',
 'Plate_Type',
 'Issue_Date',
 'Violation_Code',
 'Vehicle_Body_Type',
 'Vehicle_Make',
 'Violation_Description']
car_entry_tuple = namedtuple("ViolationEntry",field_names)

In [34]:
car_entry_tuple._fields

('Summons_Number',
 'Plate_ID',
 'Registration_State',
 'Plate_Type',
 'Issue_Date',
 'Violation_Code',
 'Vehicle_Body_Type',
 'Vehicle_Make',
 'Violation_Description')

In [30]:
from datetime import datetime

def lazy_file_reader(file_name, skip_header=True):
    '''
    Creates a generator function to read from input file_name.
    If skip_header is set, the first line is of the file will be skipped.
    '''
    def convert_datatypes(splitted_list):
        '''
        This function converts datatypes of specific offsets of an input list.
        '''
        summon_num, *_,issue_date,violation_code,_,_,_ = splitted_list
        issue_date = datetime.strptime(issue_date,"%m/%d/%Y")
        summon_num = int(summon_num)
        violation_code = int(violation_code)
        
        return (int(summon_num),*splitted_list[1:4],issue_date,violation_code,*splitted_list[6:])
        
    with open(file_name,"r",encoding='utf8') as f:
        if skip_header:
            next(f)
        for line in f:
            try:
                splitted_list = convert_datatypes(line.strip("\n").split(',', maxsplit=8))                
                yield car_entry_tuple(*splitted_list)                                     
            except Exception as e:
                print("Parsing Exception at:",line,e)
                continue

def group_violations_by_car_make(full_list: "List"):
    '''
    This function returns a list of vehicles sorted in the descending order of violations count.
    '''
    model_list = [ i.Vehicle_Make for i in full_list]
    x = { i: model_list.count(i) for i in set(model_list)}
    y = list(sorted(x.items(), key=lambda item: item[1], reverse=True))
    return y

In [9]:
cd "/mnt/c/Users/rajy/OneDrive - Nokia/EVA4P2/theseer"


/mnt/c/Users/rajy/OneDrive - Nokia/EVA4P2/theseer


In [16]:
base_file_name = './EPAI3/TSAI-EPAi-30/nyc_parking_tickets_extract.csv'

In [25]:
my_list = list(lazy_file_reader(base_file_name))

In [27]:
pd.DataFrame(my_list)#.info()

Unnamed: 0,Summons_Number,Plate_ID,Registration_State,Plate_Type,Issue_Date,Violation_Code,Vehicle_Body_Type,Vehicle_Make,Violation_Description
0,4006478550,VAD7274,VA,PAS,2016-10-05,5,4D,BMW,BUS LANE VIOLATION
1,4006462396,22834JK,NY,COM,2016-09-30,5,VAN,CHEVR,BUS LANE VIOLATION
2,4007117810,21791MG,NY,COM,2017-04-10,5,VAN,DODGE,BUS LANE VIOLATION
3,4006265037,FZX9232,NY,PAS,2016-08-23,5,SUBN,FORD,BUS LANE VIOLATION
4,4006535600,N203399C,NY,OMT,2016-10-19,5,SUBN,FORD,BUS LANE VIOLATION
...,...,...,...,...,...,...,...,...,...
995,8466533837,XANK50,NJ,PAS,2017-04-29,85,DELV,HIN,85-Storage-3 hour Commercial
996,8525168063,HBN8474,NY,PAS,2017-05-30,91,SUBN,HONDA,91-Veh for Sale (Dealer Only)
997,1420730010,GFW3620,NY,PAS,2017-06-10,98,SUBN,CHEVR,
998,1414049870,EJS4309,NY,PAS,2016-08-05,98,SDN,NISSA,


In [31]:
group_violations_by_car_make(my_list)

[('TOYOT', 112),
 ('HONDA', 106),
 ('FORD', 104),
 ('CHEVR', 76),
 ('NISSA', 70),
 ('DODGE', 45),
 ('FRUEH', 44),
 ('ME/BE', 38),
 ('GMC', 35),
 ('HYUND', 35),
 ('BMW', 34),
 ('LEXUS', 26),
 ('INTER', 25),
 ('JEEP', 22),
 ('SUBAR', 18),
 ('NS/OT', 18),
 ('INFIN', 13),
 ('CHRYS', 12),
 ('VOLVO', 12),
 ('LINCO', 12),
 ('ACURA', 12),
 ('AUDI', 12),
 ('MITSU', 11),
 ('ISUZU', 10),
 ('CADIL', 9),
 ('VOLKS', 8),
 ('KIA', 8),
 ('HIN', 6),
 ('', 5),
 ('KENWO', 5),
 ('ROVER', 5),
 ('MAZDA', 5),
 ('BUICK', 5),
 ('MERCU', 4),
 ('SMART', 3),
 ('PORSC', 3),
 ('JAGUA', 3),
 ('HINO', 2),
 ('SATUR', 2),
 ('SAAB', 2),
 ('SCION', 2),
 ('WORKH', 2),
 ('OLDSM', 1),
 ('FIAT', 1),
 ('SPRI', 1),
 ('GMCQ', 1),
 ('CITRO', 1),
 ('PONTI', 1),
 ('UD', 1),
 ('STAR', 1),
 ('UTILI', 1),
 ('MI/F', 1),
 ('UPS', 1),
 ('PETER', 1),
 ('YAMAH', 1),
 ('BSA', 1),
 ('MINI', 1),
 ('PLYMO', 1),
 ('AM/T', 1),
 ('FIR', 1),
 ('GEO', 1)]