# Imports

In [1]:
from collections import namedtuple, Counter
from datetime import datetime
import csv

# Helper functions and CSV file readers as functions

In [2]:
def cast(data_type, value):
    if data_type == 'DATE':
        return datetime.strptime(value, '%m/%d/%Y')
    elif data_type == 'DATETIME':
        return datetime.strptime(value, '%Y-%m-%dT%H:%M:%SZ')
    elif data_type == 'INT':
        return int(value)
    elif data_type == 'SSN':
        return int(value.replace('-',''))
    else:
        return str(value)

In [3]:
def personal_info_iterator(filepath):
    with open(filepath) as lines:
        PInfo = namedtuple('PInfo', lines.readline().rstrip("\n").replace(' ','').split(','))
        data_types = ['SSN', 'STR', 'STR', 'STR', 'STR']
        for line in lines: 
            proc_values = [cast(data_type, value) for data_type, value in zip(data_types, line.rstrip("\n").split(','))]
            yield (PInfo(*proc_values))

In [4]:
def update_status_iterator(filepath):
    with open(filepath) as lines:
        UpdateStatus = namedtuple('UpdateStatus', lines.readline().rstrip("\n").replace(' ','').split(','))
        data_types = ['SSN', 'DATETIME', 'DATETIME']
        for line in lines: 
            proc_values = [cast(data_type, value) for data_type, value in zip(data_types, line.rstrip("\n").split(','))]
            yield (UpdateStatus(*proc_values))

In [5]:
def emp_status_iterator(filepath):
    with open(filepath) as f:
        lines = csv.reader(f, delimiter=',', quotechar='"')
        EmpStatus = namedtuple('EmpStatus', next(lines))
        data_types = ['STR', 'STR', 'SSN', 'SSN']
        for line in lines: 
            proc_values = [cast(data_type, value) for data_type, value in zip(data_types, line)]
            yield (EmpStatus(*proc_values))

In [6]:
def vehicles_iterator(filepath):
    with open(filepath) as f:
        lines = csv.reader(f, delimiter=',', quotechar='"')
        Vehicle = namedtuple('Vehicle', next(lines))
        data_types = ['SSN', 'STR', 'STR', 'INT']
        for line in lines: 
            proc_values = [cast(data_type, value) for data_type, value in zip(data_types, line)]
            yield (Vehicle(*proc_values))

# SSN Details Iterable, aggregates the named tuples into single one

In [7]:
class SSNDetails:
    def __init__(self,personal_info_file,update_status_file,emp_status_file,vehicles_file):
        self.personal_info_file = personal_info_file
        self.update_status_file = update_status_file
        self.emp_status_file = emp_status_file
        self.vehicles_file = vehicles_file
    
    def __iter__(self):
        return self.SSNIterator(self.personal_info_file,self.update_status_file,self.emp_status_file,
                                self.vehicles_file)
    
    class SSNIterator:
        def __init__(self,personal_info_file,update_status_file,emp_status_file,vehicles_file):
            self.personal_info_iterator = personal_info_iterator(personal_info_file)
            self.update_status_iterator = update_status_iterator(update_status_file)
            self.emp_status_iterator = emp_status_iterator(emp_status_file)
            self.vehicles_iterator = vehicles_iterator(vehicles_file)
            
        def __iter__(self):
            return self
        
        def merge(self,ntuples):
            m = {}
            for i in ntuples:
                m.update(i._asdict())
            Details = namedtuple("Details", m.keys())(*m.values())
            return Details
        
        def __next__(self):
            personal_info_tuple = next(self.personal_info_iterator)
            update_status_tuple = next(self.update_status_iterator)
            emp_status_tuple = next(self.emp_status_iterator)
            vehicles_tuple = next(self.vehicles_iterator)
            return self.merge([personal_info_tuple, update_status_tuple, emp_status_tuple, vehicles_tuple])

In [8]:
itr = SSNDetails('personal_info.csv','update_status.csv','employment.csv','vehicles.csv' )

In [9]:
for entry in itr:
    print(entry)

Details(ssn=100539824, first_name='Sebastiano', last_name='Tester', gender='Male', language='Icelandic', last_updated=datetime.datetime(2017, 10, 7, 0, 14, 42), created=datetime.datetime(2016, 1, 24, 21, 19, 30), employer='Stiedemann-Bailey', department='Research and Development', employee_id=290890771, vehicle_make='Oldsmobile', vehicle_model='Bravada', model_year=1993)
Details(ssn=101714702, first_name='Cayla', last_name='MacDonagh', gender='Female', language='Lao', last_updated=datetime.datetime(2017, 1, 23, 11, 23, 17), created=datetime.datetime(2016, 1, 27, 4, 32, 57), employer='Nicolas and Sons', department='Sales', employee_id=416841359, vehicle_make='Ford', vehicle_model='Mustang', model_year=1997)
Details(ssn=101840356, first_name='Nomi', last_name='Lipprose', gender='Female', language='Yiddish', last_updated=datetime.datetime(2017, 10, 4, 11, 21, 30), created=datetime.datetime(2016, 9, 21, 23, 4, 7), employer='Connelly Group', department='Research and Development', employee_i

Details(ssn=755084000, first_name='Domenic', last_name='Symes', gender='Male', language='Italian', last_updated=datetime.datetime(2017, 2, 9, 9, 44), created=datetime.datetime(2016, 4, 22, 18, 6, 55), employer='Padberg, Lowe and Beer', department='Accounting', employee_id=447633504, vehicle_make='Acura', vehicle_model='CL', model_year=1999)
Details(ssn=756402283, first_name='Paten', last_name='Senior', gender='Male', language='Malay', last_updated=datetime.datetime(2017, 12, 11, 3, 0, 57), created=datetime.datetime(2016, 7, 13, 9, 13, 41), employer='Simonis-Donnelly', department='Product Management', employee_id=666961911, vehicle_make='Ford', vehicle_model='Crown Victoria', model_year=1995)
Details(ssn=758205576, first_name='Kathrine', last_name='Amar', gender='Female', language='Croatian', last_updated=datetime.datetime(2017, 5, 31, 12, 9, 25), created=datetime.datetime(2016, 4, 23, 0, 47, 19), employer='Walker, Hayes and Jenkins', department='Support', employee_id=714104023, vehicle

# SSNLatestDetails Iterable with latest entries updated after 1st March 2017, aggregates the named tuples into single one

In [10]:
class SSNLatestDetails:
    def __init__(self,personal_info_file,update_status_file,emp_status_file,vehicles_file):
        self.personal_info_file = personal_info_file
        self.update_status_file = update_status_file
        self.emp_status_file = emp_status_file
        self.vehicles_file = vehicles_file
    
    def __iter__(self):
        return self.SSNIterator(self.personal_info_file,self.update_status_file,self.emp_status_file,
                                self.vehicles_file)
    
    class SSNIterator:
        def __init__(self,personal_info_file,update_status_file,emp_status_file,vehicles_file):
            self.personal_info_iterator = personal_info_iterator(personal_info_file)
            self.update_status_iterator = update_status_iterator(update_status_file)
            self.emp_status_iterator = emp_status_iterator(emp_status_file)
            self.vehicles_iterator = vehicles_iterator(vehicles_file)
            
        def __iter__(self):
            return self
        
        def merge(self,ntuples):
            m = {}
            for i in ntuples:
                m.update(i._asdict())
            M = namedtuple("M", m.keys())(*m.values())
            return M
        
        def __next__(self):
            while True:
                personal_info_tuple = next(self.personal_info_iterator)
                update_status_tuple = next(self.update_status_iterator)
                emp_status_tuple = next(self.emp_status_iterator)
                vehicles_tuple = next(self.vehicles_iterator)
                if getattr(update_status_tuple, 'last_updated') >= datetime(2017, 3, 1, 0, 0, 0):
                    break
            return self.merge([personal_info_tuple, update_status_tuple, emp_status_tuple, vehicles_tuple])

In [11]:
itr = SSNLatestDetails('personal_info.csv','update_status.csv','employment.csv','vehicles.csv' )

In [12]:
for entry in itr:
    print(entry)

M(ssn=100539824, first_name='Sebastiano', last_name='Tester', gender='Male', language='Icelandic', last_updated=datetime.datetime(2017, 10, 7, 0, 14, 42), created=datetime.datetime(2016, 1, 24, 21, 19, 30), employer='Stiedemann-Bailey', department='Research and Development', employee_id=290890771, vehicle_make='Oldsmobile', vehicle_model='Bravada', model_year=1993)
M(ssn=101840356, first_name='Nomi', last_name='Lipprose', gender='Female', language='Yiddish', last_updated=datetime.datetime(2017, 10, 4, 11, 21, 30), created=datetime.datetime(2016, 9, 21, 23, 4, 7), employer='Connelly Group', department='Research and Development', employee_id=987952860, vehicle_make='GMC', vehicle_model='Yukon', model_year=2005)
M(ssn=104220928, first_name='Justinian', last_name='Kunzelmann', gender='Male', language='Dhivehi', last_updated=datetime.datetime(2017, 3, 28, 12, 38, 29), created=datetime.datetime(2016, 4, 15, 11, 37, 17), employer='Upton LLC', department='Marketing', employee_id=569817552, veh

M(ssn=597456867, first_name='Boris', last_name='Selwood', gender='Male', language='Tajik', last_updated=datetime.datetime(2018, 2, 7, 17, 30, 11), created=datetime.datetime(2016, 2, 12, 3, 8, 30), employer='Kozey LLC', department='Marketing', employee_id=813779691, vehicle_make='Ford', vehicle_model='Tempo', model_year=1988)
M(ssn=597531918, first_name='Tilda', last_name='Cheales', gender='Female', language='Azeri', last_updated=datetime.datetime(2017, 11, 14, 21, 14, 1), created=datetime.datetime(2016, 4, 3, 2, 12, 30), employer='Ritchie, Murphy and Bahringer', department='Research and Development', employee_id=211766662, vehicle_make='Mercedes-Benz', vehicle_model='E-Class', model_year=2010)
M(ssn=597634107, first_name='Fairfax', last_name='De Blase', gender='Male', language='Gujarati', last_updated=datetime.datetime(2017, 3, 27, 1, 22, 14), created=datetime.datetime(2016, 10, 26, 22, 52, 17), employer='Farrell, Schumm and Ullrich', department='Support', employee_id=815994216, vehicl

M(ssn=817257437, first_name='Randall', last_name='Fibbitts', gender='Male', language='Yiddish', last_updated=datetime.datetime(2017, 11, 27, 5, 44, 31), created=datetime.datetime(2016, 7, 6, 6, 49, 2), employer='Kilback Group', department='Training', employee_id=438990, vehicle_make='Dodge', vehicle_model='Sprinter', model_year=2009)
M(ssn=817808930, first_name='Lib', last_name='Skein', gender='Female', language='Arabic', last_updated=datetime.datetime(2017, 10, 15, 5, 52, 17), created=datetime.datetime(2016, 1, 25, 15, 9, 1), employer='Rath-Ryan', department='Marketing', employee_id=311949756, vehicle_make='Subaru', vehicle_model='Legacy', model_year=2002)
M(ssn=818610770, first_name='Stanly', last_name='Casseldine', gender='Male', language='Belarusian', last_updated=datetime.datetime(2017, 11, 24, 15, 25, 51), created=datetime.datetime(2016, 8, 12, 7, 54, 17), employer='Kovacek, Effertz and Kihn', department='Business Development', employee_id=490909555, vehicle_make='Jaguar', vehicl

# Finding vehicle make owned the most by males in data

In [13]:
print(Counter([ getattr(entry, 'vehicle_make') for entry in itr  if getattr(entry, 'gender') == 'Male']))

Counter({'Ford': 40, 'Chevrolet': 30, 'GMC': 28, 'Mitsubishi': 28, 'Dodge': 22, 'Toyota': 21, 'Mercedes-Benz': 19, 'Volkswagen': 16, 'Audi': 14, 'Buick': 13, 'Mazda': 13, 'BMW': 12, 'Pontiac': 11, 'Mercury': 11, 'Volvo': 10, 'Cadillac': 9, 'Honda': 9, 'Subaru': 8, 'Hyundai': 8, 'Saab': 8, 'Acura': 7, 'Infiniti': 7, 'Jeep': 7, 'Lexus': 6, 'Nissan': 6, 'Oldsmobile': 5, 'Lincoln': 5, 'Kia': 5, 'Lotus': 5, 'Jaguar': 4, 'Plymouth': 4, 'Porsche': 4, 'Lamborghini': 4, 'Aston Martin': 3, 'Isuzu': 3, 'Maserati': 3, 'Chrysler': 3, 'Saturn': 3, 'Bentley': 3, 'Land Rover': 3, 'Maybach': 2, 'Panoz': 2, 'Geo': 2, 'Suzuki': 2, 'Scion': 1, 'Jensen': 1, 'Smart': 1, 'Rolls-Royce': 1, 'Corbin': 1, 'Daewoo': 1, 'Aptera': 1, 'Eagle': 1, 'Austin': 1})


In [14]:
male_vmaker_counts = Counter([ getattr(entry, 'vehicle_make') for entry in itr  if getattr(entry, 'gender') == 'Male'])

In [15]:
max_male_vmaker_counts = max(male_vmaker_counts.values())
for entry in male_vmaker_counts:
    if male_vmaker_counts[entry] == max_male_vmaker_counts:
        print(entry,male_vmaker_counts[entry])

Ford 40


# Finding vehicle make owned the most by females in data

In [16]:
print(Counter([ getattr(entry, 'vehicle_make') for entry in itr  if getattr(entry, 'gender') == 'Female']))

Counter({'Chevrolet': 42, 'Ford': 42, 'GMC': 22, 'Mitsubishi': 22, 'Toyota': 20, 'Mercedes-Benz': 17, 'Dodge': 17, 'Lexus': 15, 'Pontiac': 14, 'Volvo': 13, 'Audi': 13, 'Mazda': 13, 'BMW': 12, 'Nissan': 12, 'Suzuki': 12, 'Buick': 11, 'Volkswagen': 10, 'Acura': 9, 'Kia': 9, 'Infiniti': 9, 'Land Rover': 8, 'Honda': 8, 'Oldsmobile': 8, 'Chrysler': 6, 'Cadillac': 6, 'Subaru': 6, 'Jeep': 5, 'Mercury': 5, 'Lotus': 5, 'Bentley': 4, 'Hyundai': 4, 'Lincoln': 4, 'Isuzu': 3, 'Plymouth': 3, 'Saturn': 3, 'Porsche': 3, 'Saab': 3, 'Jaguar': 3, 'Scion': 2, 'Aston Martin': 2, 'Lamborghini': 2, 'Bugatti': 1, 'Rolls-Royce': 1, 'Eagle': 1, 'Geo': 1, 'Morgan': 1, 'Austin': 1, 'Panoz': 1})


In [17]:
female_vmaker_counts = Counter([ getattr(entry, 'vehicle_make') for entry in itr  if getattr(entry, 'gender') == 'Female'])

In [18]:
max_female_vmaker_counts = max(female_vmaker_counts.values())
for entry in female_vmaker_counts:
    if female_vmaker_counts[entry] == max_female_vmaker_counts:
        print(entry,female_vmaker_counts[entry])

Chevrolet 42
Ford 42
