# Imports

In [1]:
from collections import namedtuple, Counter
from itertools import islice
from dateutil.parser import parse
import datetime

# Helper Function to Parse Dates

In [2]:
def date(value):
    '''
    This function is used to format the column with date in it.
    It will take a string in the form 10/5/2016 and return datetime.date(2016, 10, 5)
    '''
    return parse(value).date()   

# Goal 1

### Read Person's Info

In [3]:
def gen_info(file):
    '''
    this generator reads values from the 'personal_info.csv' file
    '''
    with open(file, encoding='utf8', errors='ignore') as f:
        # the first line is the header; this is used to
        # create the names for elements in the namedtuple
        header = next(f).strip().split(',')
        header = [x.replace(" ", "") for x in header]
        # data type for each column in the file
        data_type = [str, str, str, str, str]
        Info = namedtuple('Info', header )
        for line in f:
            line = line.strip().split(',')
            data = (type(field) for type, field in zip(data_type, line))
            yield Info(*data)

In [4]:
info = gen_info('personal_info.csv')

for i in islice(info, 0,2):
    print(f'{i}\n')

Info(ssn='100-53-9824', first_name='Sebastiano', last_name='Tester', gender='Male', language='Icelandic')

Info(ssn='101-71-4702', first_name='Cayla', last_name='MacDonagh', gender='Female', language='Lao')



### Read Employer's Info

In [5]:
def gen_employ(file):
    '''
    this generator reads values from the 'employment.csv' file
    '''
    with open(file, encoding='utf8', errors='ignore') as f:
        # the first line is the header; this is used to
        # create the names for elements in the namedtuple
        header = next(f).strip().split(',')
        header = [x.replace(" ", "") for x in header]
        # data type for each column in the file
        data_type = [str, str, str, str]
        Employ = namedtuple('Employ', header )
        for line in f:
            line = line.strip().split(',')
            data = (type(field) for type, field in zip(data_type, line))
            yield Employ(*data)

In [6]:
employ = gen_employ('employment.csv')

for i in islice(employ, 0,2):
    print(f'{i}\n')

Employ(employer='Stiedemann-Bailey', department='Research and Development', employee_id='29-0890771', ssn='100-53-9824')

Employ(employer='Nicolas and Sons', department='Sales', employee_id='41-6841359', ssn='101-71-4702')



### Read Record Update Status

In [7]:
def gen_update(file):
    '''
    this generator reads values from the 'update_status.csv' file
    '''
    with open(file, encoding='utf8', errors='ignore') as f:
        # the first line is the header; this is used to
        # create the names for elements in the namedtuple
        header = next(f).strip().split(',')
        header = [x.replace(" ", "") for x in header]
        # data type for each column in the file
        data_type = [str, date, date]
        Update = namedtuple('Update', header )
        for line in f:
            line = line.strip().split(',')
            data = (type(field) for type, field in zip(data_type, line))
            yield Update(*data)

In [8]:
update = gen_update('update_status.csv')

for i in islice(update, 0,2):
    print(f'{i}\n')

Update(ssn='100-53-9824', last_updated=datetime.date(2017, 10, 7), created=datetime.date(2016, 1, 24))

Update(ssn='101-71-4702', last_updated=datetime.date(2017, 1, 23), created=datetime.date(2016, 1, 27))



### Read Vehicle Info

In [9]:
def gen_vehicle(file):
    '''
    this generator reads values from the 'vehicles.csv' file
    '''
    with open(file, encoding='utf8', errors='ignore') as f:
        # the first line is the header; this is used to
        # create the names for elements in the namedtuple
        header = next(f).strip().split(',')
        header = [x.replace(" ", "") for x in header]
        # data type for each column in the file
        data_type = [str, str, str, int]
        Vehicle = namedtuple('Vehicle', header )
        for line in f:
            line = line.strip().split(',')
            data = (type(field) for type, field in zip(data_type, line))
            yield Vehicle(*data)

In [10]:
vehicle = gen_vehicle('vehicles.csv')

for i in islice(vehicle, 0,2):
    print(f'{i}\n')

Vehicle(ssn='100-53-9824', vehicle_make='Oldsmobile', vehicle_model='Bravada', model_year=1993)

Vehicle(ssn='101-71-4702', vehicle_make='Ford', vehicle_model='Mustang', model_year=1997)



# Goal 2

In [11]:
def gen_record():
    '''
    This generator attempts to combine all data from the four files.
    First, the generators are initialized
    '''
    info = gen_info('personal_info.csv')
    employ = gen_employ('employment.csv')
    update = gen_update('update_status.csv')
    vehicle = gen_vehicle('vehicles.csv')

    # The four generators are zipped together
    for i, e, u, v in zip(info, employ, update, vehicle):
        Record = namedtuple('Record', 'ssn first_name last_name \
            gender language employer department employee_id last_updated created \
                vehicle_make vehicle_model model_year' )
        data = (*i, e.employer, e.department, e.employee_id, \
                    u.last_updated, u.created, \
                        v.vehicle_make, v.vehicle_model, v.model_year)
        yield Record(*data)

In [12]:
record = gen_record()

for i in islice(record, 0,2):
    print(f'{i}\n')

Record(ssn='100-53-9824', first_name='Sebastiano', last_name='Tester', gender='Male', language='Icelandic', employer='Stiedemann-Bailey', department='Research and Development', employee_id='29-0890771', last_updated=datetime.date(2017, 10, 7), created=datetime.date(2016, 1, 24), vehicle_make='Oldsmobile', vehicle_model='Bravada', model_year=1993)

Record(ssn='101-71-4702', first_name='Cayla', last_name='MacDonagh', gender='Female', language='Lao', employer='Nicolas and Sons', department='Sales', employee_id='41-6841359', last_updated=datetime.date(2017, 1, 23), created=datetime.date(2016, 1, 27), vehicle_make='Ford', vehicle_model='Mustang', model_year=1997)



# Goal 3

In [13]:
def stale_record():
    '''
    This generator yields only data if the record has been updated
    after 3/1/2018.
    '''
    record = gen_record()
    for r in record:
        if r.last_updated > datetime.date(2018, 1, 3):
            yield r

In [14]:
st_rec = stale_record()

for i in islice(st_rec, 0,2):
    print(f'{i}\n')

Record(ssn='104-84-7144', first_name='Claudianus', last_name='Brixey', gender='Male', language='Afrikaans', employer='Zemlak-Olson', department='Business Development', employee_id='46-2886707', last_updated=datetime.date(2018, 2, 19), created=datetime.date(2016, 3, 15), vehicle_make='Ford', vehicle_model='Crown Victoria', model_year=2008)

Record(ssn='105-85-7486', first_name='Angelina', last_name='McAvey', gender='Female', language='Punjabi', employer='"Roberts', department=' Torphy and Dach"', employee_id='Human Resources', last_updated=datetime.date(2018, 2, 14), created=datetime.date(2016, 12, 15), vehicle_make='Chrysler', vehicle_model='300', model_year=2008)



# Goal 4

In [15]:
def car_gender():
    '''
    This function prints the most common car model used by males and females.
    after 3/1/2018.
    '''
    record = gen_record()
    male_cnt = Counter() # this counter keeps track of car models for males
    female_cnt = Counter() # this counter keeps track of car models for females
    for r in record:
        if r.gender == 'Male':
           male_cnt.update([r.vehicle_model])
        elif r.gender == 'Female':
           female_cnt.update([r.vehicle_model])
    print(f'Most Common Car for Male is {male_cnt.most_common(1)[0][0]} with a count of {male_cnt.most_common(1)[0][1]}')
    print(f'Most Common Car for Female is {female_cnt.most_common(1)[0][0]} with a count of {female_cnt.most_common(1)[0][1]}')

In [16]:
car_gender()

Most Common Car for Male is Savana 1500 with a count of 7
Most Common Car for Female is Mustang with a count of 12
