### Project Solution: Goal 2

Here's what we wrote in Goal 1:

In [4]:
from collections import namedtuple
from datetime import datetime
from functools import partial

file_name = 'nyc_parking_tickets_extract.csv'

with open(file_name) as f:
    column_headers = next(f).strip('\n').split(',')

column_names = [header.replace(' ', '_').lower()
                for header in column_headers]

Ticket = namedtuple('Ticket', column_names)

def read_data():
    with open(file_name) as f:
        next(f)
        yield from f

def parse_int(value, *, default=None):
    try:
        return int(value)
    except ValueError:
        return default

def parse_date(value, *, default=None):
    date_format='%m/%d/%Y'
    try:
        return datetime.strptime(value, date_format).date()
    except ValueError:
        return default

def parse_string(value, *, default=None):
    try:
        cleaned = str(value).strip()
        if not cleaned:
            # empty string
            return default
        else:
            return cleaned
    except:
        return default

column_parsers = (parse_int,  # summons_number, default is None
                  parse_string,  # plate_id, default is None
                  partial(parse_string, default=''),  # state
                  partial(parse_string, default=''),  # plate_type
                  parse_date,  # issue_date, default is None
                  parse_int,  # violation_code
                  partial(parse_string, default=''),  # body type
                  parse_string,  # make, default is None
                  lambda x: parse_string(x, default='')  # description
                 )

def parse_row(row, *, default=None):
    fields = row.strip('\n').split(',')
    # note that I'm using a list comprehension here,
    # since we'll need to iterate through the entire parsed fields
    # twice - one time to check if nothing is None
    # and another time to create the named tuple
    parsed_data = [func(field)
                   for func, field in zip(column_parsers, fields)]
    if all(item is not None for item in parsed_data):
        return Ticket(*parsed_data)
    else:
        return default

def parsed_data():
    for row in read_data():
        parsed = parse_row(row)
        if parsed:
            yield parsed

In [6]:
makes_counts = {}
for data in parsed_data():
    if data.vehicle_make in makes_counts:
        makes_counts[data.vehicle_make] += 1
    else:
        makes_counts[data.vehicle_make] = 1

for make, cnt in sorted(makes_counts.items(),
       key=lambda t: t[1],
       reverse=True):
    print(make, cnt)

TOYOT 112
HONDA 106
FORD 104
CHEVR 76
NISSA 70
DODGE 45
FRUEH 44
ME/BE 38
GMC 35
HYUND 35
BMW 34
LEXUS 26
INTER 25
JEEP 22
NS/OT 18
SUBAR 18
INFIN 13
LINCO 12
CHRYS 12
ACURA 12
AUDI 12
VOLVO 12
MITSU 11
ISUZU 10
CADIL 9
KIA 8
VOLKS 8
HIN 6
KENWO 5
ROVER 5
BUICK 5
MAZDA 5
MERCU 4
JAGUA 3
SMART 3
PORSC 3
WORKH 2
SATUR 2
SCION 2
SAAB 2
HINO 2
FIR 1
OLDSM 1
PETER 1
CITRO 1
GEO 1
YAMAH 1
BSA 1
MINI 1
PONTI 1
SPRI 1
PLYMO 1
UPS 1
FIAT 1
UD 1
UTILI 1
GMCQ 1
STAR 1
AM/T 1
MI/F 1


In [7]:
from collections import defaultdict

In [8]:
d = defaultdict(str)

In [9]:
d['a'] = 1

In [10]:
d['a']

1

In [11]:
d['b']

''

In [12]:
d = defaultdict(int)

In [13]:
d['a'] = 'hello'

In [14]:
d['a']

'hello'

In [15]:
d['b']

0

In [16]:
from collections import defaultdict

makes_counts = defaultdict(int)

for data in parsed_data():
    makes_counts[data.vehicle_make] += 1

for make, cnt in sorted(makes_counts.items(),
       key=lambda t: t[1],
       reverse=True):
    print(make, cnt)

TOYOT 112
HONDA 106
FORD 104
CHEVR 76
NISSA 70
DODGE 45
FRUEH 44
ME/BE 38
GMC 35
HYUND 35
BMW 34
LEXUS 26
INTER 25
JEEP 22
NS/OT 18
SUBAR 18
INFIN 13
LINCO 12
CHRYS 12
ACURA 12
AUDI 12
VOLVO 12
MITSU 11
ISUZU 10
CADIL 9
KIA 8
VOLKS 8
HIN 6
KENWO 5
ROVER 5
BUICK 5
MAZDA 5
MERCU 4
JAGUA 3
SMART 3
PORSC 3
WORKH 2
SATUR 2
SCION 2
SAAB 2
HINO 2
FIR 1
OLDSM 1
PETER 1
CITRO 1
GEO 1
YAMAH 1
BSA 1
MINI 1
PONTI 1
SPRI 1
PLYMO 1
UPS 1
FIAT 1
UD 1
UTILI 1
GMCQ 1
STAR 1
AM/T 1
MI/F 1
