# Goal 1
Create iterators for each of the four files that contained cleaned up data, of the correct type (e.g. string, int, date, etc), and represented by a named tuple.
For now these four iterators are just separate, independent iterators.

In [1]:
from datetime import datetime
from collections import namedtuple, Counter

def cast(zipped_obj):
    item_list = []
    for i in list(zipped_obj):
        data_type = i[0]
        value = i[1]
        if data_type == 'INT':
            item_list.append(int(value))
        elif data_type == 'DATE':
            item_list.append(datetime.strptime(value, '%m/%d/%Y'))
        else:
            item_list.append(str(value))
    return item_list

def get_formatted_data(file_path:str, list_coltypes:list, tuple_name:str)->int:
    '''Returns a namedtuple after casting the content to their appropriate types'''

    with open(file_path) as csvfile:
        csvreader = iter(csvfile)
        # extracting field names through first row 
        column_names = next(csvreader).strip('\n').split(',')
        car_details = namedtuple(tuple_name, column_names)
        
        for data in csvreader:
            new_details = next(csvreader)
            yield(car_details(*cast(zip(list_coltypes,new_details.strip('\n').split(',')))))

In [2]:
# Getting Employment data - Iterators
class iter_employee:
    def __init__(self):
        file_path = r"assets\a-1\employment.csv"
        column_types = ['STR','STR', 'STR', 'STR']
        self.employees_data = get_formatted_data(file_path, column_types, "abc")
        
    def __iter__(self):
        return self
    
    def __next__(self):
        return next(self.employees_data)

In [3]:
employee_details = iter_employee()

In [4]:
for i in employee_details:
    print(i) 

Human Resources', employee_id='05-8069298', ssn='123-88-3381')
abc(employer='Tromp-Leffler', department='Support', employee_id='02-6267170', ssn='127-49-2473')
abc(employer='Kulas-Corwin', department='Sales', employee_id='78-0632027', ssn='127-68-4232')
abc(employer="Kertzmann-O'Hara", department='Sales', employee_id='44-8015165', ssn='128-42-4599')
abc(employer='King-Donnelly', department='Training', employee_id='82-0573004', ssn='128-53-7952')
abc(employer='"Heidenreich', department=' Bergnaum and Smith"', employee_id='Training', ssn='04-7204163')
abc(employer='Gibson and Sons', department='Sales', employee_id='57-8173633', ssn='131-81-4310')
abc(employer='Donnelly-Crooks', department='Human Resources', employee_id='22-2943669', ssn='134-05-0120')
abc(employer='Schuster Group', department='Research and Development', employee_id='72-4728604', ssn='137-13-1889')
abc(employer='VonRueden-Torphy', department='Business Development', employee_id='10-6636122', ssn='138-67-1198')
abc(employer

In [10]:
print(next(employee_details))

StopIteration: 

In [51]:
for i in employee_details:
    print(i)

TypeError: iter() returned non-iterator of type 'type'

In [60]:
# Personal Info iterator

class iter_pinfo:
    def __init__(self):
        file_path = r"assets\a-1\personal_info.csv"
        column_types = ['STR','STR', 'STR', 'STR', 'STR']
        self.personal_info = get_formatted_data(file_path, column_types, tuple_name= "PersonalInfo")

        
    def __iter__(self):
        return personal_info
    
    def __next__(self):
        return next(self.personal_info)

In [61]:
employee_details = iter_pinfo()

In [72]:
next(employee_details)

PersonalInfo(ssn='118-21-0357', first_name='Laina', last_name='Bayly', gender='Female', language='Bulgarian')

In [83]:
class iter_update_status:
    def __init__(self, file_path):
        self.file_path = file_path
        file_path = r"assets\a-1\update_status.csv"
        column_types = ['STR','kATE','DkTE']
        self.formatted_data = get_formatted_data(file_path, column_types, tuple_name= "LastUpdate")

    # def __getitem__():
    #     pass
        
    def __iter__(self):
        return personal_info
    
    def __next__(self):
        return next(self.formatted_data)

In [84]:
updated_status = iter_update_status()

In [85]:
next(updated_status)

LastUpdate(ssn='101-71-4702', last_updated='2017-01-23T11:23:17Z', created='2016-01-27T04:32:57Z')

In [105]:
class iter_vehicles:
    def __init__(self):
        file_path = r"assets\a-1\vehicles.csv"
        column_types = ['STR','STR', 'STR', 'Date']
        self.personal_info = get_formatted_data(file_path, column_types, tuple_name= "VehicleDetails")
        
    def __iter__(self):
        return self.__class__()
    
    def __next__(self):
        pass
    class combined_iterable(self):
        self.iter_vehicles

In [106]:
vehicle_details = iter_vehicles()

In [111]:
type(iter_vehicles)

type

In [107]:
for i in range(1000):
    next(vehicle_details)

StopIteration: 

Create a single iterable that combines all the columns from all the iterators. The iterable should yield named tuples containing all the columns. Make sure that the SSN's across the files match! All the files are guaranteed to be in SSN sort order, and every SSN is unique, and every SSN appears in every file. Make sure the SSN is not repeated 4 times - one time per row is enough!

In [109]:
    file_path = r"assets\a-1\vehicles.csv"
    column_types = ['STR','STR', 'STR', 'Date']
    personal_info = get_formatted_data(file_path, column_types, tuple_name= "VehicleDetails")

In [110]:
type(personal_info)

generator

In [None]:
# Iterable that combines columns of all the iterators based on the SSN Numbers

class info_iterable:
    def __init__(self):
        

    def __iter__(self):
        pass

    def __next__(self):
        pass

    def __getitem__(self):
        pass

## Call open files in the function outside and then send that to iterator, it will combine and throw output

# This is the begining

In [1]:
from datetime import datetime
from collections import namedtuple, Counter

def cast(zipped_obj, date_format=None):
    item_list = []
    for i in list(zipped_obj):
        data_type = i[0]
        value = i[1]
        if data_type == 'INT':
            item_list.append(int(value))
        elif data_type == 'DATE':
            item_list.append(datetime.strptime(value, date_format)) #'%m/%d/%Y'
        else:
            item_list.append(str(value))
    return item_list

def get_formatted_data(file_path:str, list_coltypes:list, tuple_name:str)->int:
    '''Returns a namedtuple after casting the content to their appropriate types'''

    with open(file_path) as csvfile:
        csvreader = iter(csvfile)
        # extracting field names through first row 
        column_names = next(csvreader).strip('\n').split(',')
        car_details = namedtuple(tuple_name, column_names)
        
        for data in csvreader:
            new_details = next(csvreader)
            yield(car_details(*cast(zip(list_coltypes,new_details.strip('\n').split(',')))))

In [2]:
class iter_employee:
    def __init__(self):
        file_path = r"assets\a-1\employment.csv"
        column_types = ['STR','STR', 'STR', 'STR']
        self.employees_data = get_formatted_data(file_path, column_types, "EmployeeDetails")
        print(self.employees_data)
        
    def __iter__(self):
        return self
    
    def __next__(self):
        return next(self.employees_data)
        

class iter_pinfo:
    def __init__(self):
        file_path = r"assets\a-1\personal_info.csv"
        column_types = ['STR','STR', 'STR', 'STR', 'STR']
        self.personal_info = get_formatted_data(file_path, column_types, tuple_name= "PersonalInfo")
        
    def __iter__(self):
        return self
    
    def __next__(self):
        return next(self.personal_info)

class iter_vehicles:
    def __init__(self):
        file_path = r"assets\a-1\vehicles.csv"
        column_types = ['STR','STR', 'STR', 'Date']
        self.vehicle_details = get_formatted_data(file_path, column_types, tuple_name= "VehicleDetails")
        
    def __iter__(self):
        return self
    
    def __next__(self):
        return next(self.vehicle_details)

class iter_update_status:
    def __init__(self):
        file_path = r"assets\a-1\update_status.csv"
        column_types = ['STR','DATE','DATE']
        self.formatted_data = get_formatted_data(file_path, column_types, tuple_name= "LastUpdate")
        
    def __iter__(self):
        return self
    
    def __next__(self):
        return next(self.formatted_data)
    

In [59]:
class MergedIterable:
    def __init__(self, iter_employee, iter_pinfo, iter_vehicles, iter_update_status):
        self.iter_employee = iter_employee
        self.iter_pinfo = iter_pinfo
        self.iter_vehicles = iter_vehicles
        self.iter_update_status = iter_update_status


        self.FinalDetails = namedtuple('FinalDetails', ['ssn','employer', 'department', 'employee_id', 
        'first_name', 'last_name', 'gender', 'language', 
        'vehicle_make', 'vehicle_model', 'model_year',
        'created', 'last_updated'])

        # self.a = self.getdata()

    def __iter__(self):
        return self.a

    def __next__(self):
        return next(self.a)
        

    def getdata(self):

        def getmatcheddata(iterator, ssn):
            iterator = iterator()
            for i in iterator:
                if i.ssn == ssn:
                    return i
            
        pemployee = self.iter_employee()
        for i in pemployee:
            ssn = i.ssn

            match_pinfo = getmatcheddata(self.iter_pinfo, ssn)
            match_vehicle = getmatcheddata(self.iter_vehicles, ssn)
            match_status = getmatcheddata(self.iter_update_status, ssn)

            print(i,match_pinfo, match_vehicle, match_status)


            yield(self.FinalDetails(i.ssn, i.employer, i.department,i.employee_id,
            match_pinfo.first_name, match_pinfo.last_name, match_pinfo.gender, match_pinfo.language,
            match_vehicle.vehicle_make, match_vehicle.vehicle_model, match_vehicle.model_year,
            match_status.created, match_status.last_updated))

In [60]:
a = MergedIterable(iter_employee, iter_pinfo, iter_vehicles ,iter_update_status)

In [61]:
b = a.getdata()

In [64]:
next(b)  

EmployeeDetails(employer='"Kohler', department=' Bradtke and Davis"', employee_id='Support', ssn='80-0975518') None None None


AttributeError: 'NoneType' object has no attribute 'first_name'

In [53]:
pemployee = iter_employee()
for i in pemployee:
    print(i)

tment=' Wehner and Mohr"', employee_id='Legal', ssn='57-0199872')
<generator object get_formatted_data at 0x0000021FFEF07248>
EmployeeDetails(employer='"Treutel', department=' Jerde and Bayer"', employee_id='Training', ssn='93-8500147')
<generator object get_formatted_data at 0x0000021FFEF07248>
EmployeeDetails(employer="Rempel-O'Connell", department='Sales', employee_id='56-9638561', ssn='415-42-0898')
<generator object get_formatted_data at 0x0000021FFEF07248>
EmployeeDetails(employer='Metz-Wolf', department='Business Development', employee_id='72-8305549', ssn='416-37-7763')
<generator object get_formatted_data at 0x0000021FFEF07248>
EmployeeDetails(employer='Ryan-Schulist', department='Business Development', employee_id='38-2848171', ssn='416-80-6367')
<generator object get_formatted_data at 0x0000021FFEF07248>
EmployeeDetails(employer='Heidenreich-Schmitt', department='Training', employee_id='91-4798424', ssn='418-72-1888')
<generator object get_formatted_data at 0x0000021FFEF0724