# Goal 1
Create iterators for each of the four files that contained cleaned up data, of the correct type (e.g. string, int, date, etc), and represented by a named tuple.
For now these four iterators are just separate, independent iterators.

In [59]:
from datetime import datetime
from collections import namedtuple, Counter

def cast(zipped_obj):
    item_list = []
    for i in list(zipped_obj):
        data_type = i[0]
        value = i[1]
        if data_type == 'INT':
            item_list.append(int(value))
        elif data_type == 'DATE':
            item_list.append(datetime.strptime(value, '%m/%d/%Y'))
        else:
            item_list.append(str(value))
    return item_list

def get_formatted_data(file_path:str, list_coltypes:list, tuple_name:str)->int:
    '''Returns a namedtuple after casting the content to their appropriate types'''

    with open(file_path) as csvfile:
        csvreader = iter(csvfile)
        # extracting field names through first row 
        column_names = next(csvreader).strip('\n').split(',')
        car_details = namedtuple(tuple_name, column_names)
        
        for data in csvreader:
            new_details = next(csvreader)
            yield(car_details(*cast(zip(list_coltypes,new_details.strip('\n').split(',')))))

In [48]:
# Getting Employment data - Iterators
class iter_employee:
    def __init__(self):
        file_path = r"assets\a-1\employment.csv"
        column_types = ['STR','STR', 'STR', 'STR']
        self.employees_data = get_formatted_data(file_path, column_types)

    # def __getitem__():
    #     pass
        
    def __iter__(self):
        return iter_employee
    
    def __next__(self):
        return next(self.employees_data)
        


In [49]:
employee_details = iter_employee()

In [50]:
print(next(employee_details))

car_details(employer='Nicolas and Sons', department='Sales', employee_id='41-6841359', ssn='101-71-4702')


In [51]:
for i in employee_details:
    print(i)

TypeError: iter() returned non-iterator of type 'type'

In [60]:
# Personal Info iterator

class iter_pinfo:
    def __init__(self):
        file_path = r"assets\a-1\personal_info.csv"
        column_types = ['STR','STR', 'STR', 'STR', 'STR']
        self.personal_info = get_formatted_data(file_path, column_types, tuple_name= "PersonalInfo")

    # def __getitem__():
    #     pass
        
    def __iter__(self):
        return personal_info
    
    def __next__(self):
        return next(self.personal_info)

In [61]:
employee_details = iter_pinfo()

In [72]:
next(employee_details)

PersonalInfo(ssn='118-21-0357', first_name='Laina', last_name='Bayly', gender='Female', language='Bulgarian')

In [83]:
class iter_update_status:
    def __init__(self):
        file_path = r"assets\a-1\update_status.csv"
        column_types = ['STR','kATE','DkTE']
        self.formatted_data = get_formatted_data(file_path, column_types, tuple_name= "LastUpdate")

    # def __getitem__():
    #     pass
        
    def __iter__(self):
        return personal_info
    
    def __next__(self):
        return next(self.formatted_data)

In [84]:
updated_status = iter_update_status()

In [85]:
next(updated_status)

LastUpdate(ssn='101-71-4702', last_updated='2017-01-23T11:23:17Z', created='2016-01-27T04:32:57Z')

In [105]:
class iter_vehicles:
    def __init__(self):
        file_path = r"assets\a-1\vehicles.csv"
        column_types = ['STR','STR', 'STR', 'Date']
        self.personal_info = get_formatted_data(file_path, column_types, tuple_name= "VehicleDetails")

        

    # def __getitem__():
    #     pass
        
    def __iter__(self):
        return self.__class__()
    
    def __next__(self):
        pass
    class combined_iterable(self):
        self.iter_vehicles

In [106]:
vehicle_details = iter_vehicles()

In [111]:
type(iter_vehicles)

type

In [107]:
for i in range(1000):
    next(vehicle_details)

StopIteration: 

Create a single iterable that combines all the columns from all the iterators. The iterable should yield named tuples containing all the columns. Make sure that the SSN's across the files match! All the files are guaranteed to be in SSN sort order, and every SSN is unique, and every SSN appears in every file. Make sure the SSN is not repeated 4 times - one time per row is enough!

In [109]:
    file_path = r"assets\a-1\vehicles.csv"
    column_types = ['STR','STR', 'STR', 'Date']
    personal_info = get_formatted_data(file_path, column_types, tuple_name= "VehicleDetails")

In [110]:
type(personal_info)

generator

In [None]:
# Iterable that combines columns of all the iterators based on the SSN Numbers

class info_iterable:
    def __init__(self):
        

    def __iter__(self):
        pass

    def __next__(self):
        pass

    def __getitem__(self):
        pass