In [1]:
import pandas as pd
import requests
import sys
import csv


In [60]:
input_filepath = "fail_tests.csv"
output_filepath = "out.txt"

In [61]:
with open(input_filepath, newline="") as input_csv:
    csvreader = csv.reader(input_csv, delimiter=",",)
    
    needed_input_columns = ["Account ID","First Name", "Created On"]
    needed_output_columns = ["Account ID","First Name", "Created On", "Status", "Status Set On"]
    headers = next(csvreader) #grab first row as headers
    if not set(needed_input_columns).issubset(headers):
        print('ERROR - input csv must contain columns: "Account ID","First Name", "Created On"')
    
    with open(output_filepath, mode = "w", newline = "") as output_csv:
        csvwriter = csv.DictWriter(output_csv, fieldnames = needed_output_columns)
        csvwriter.writeheader()
        
        index_of = {}
        for index,header in enumerate(headers):
            index_of[header] = index
        write_dict = {}
        
        for row in csvreader:
            if len(row) != len(headers):
                print("incomplete data")
                #TODO: return
            for column in needed_input_columns:
                write_dict[column] = row[index_of[column]]
            valid, write_dict, message = verify_and_clean_input(write_dict)
            if not valid:
                print(message)
            else:
                write_dict, message = query_and_extend(write_dict)
                clean_and_write(write_dict, csvwriter)
                

            
            

not a valid account id
not a valid account id
not a valid account id
271
21


In [45]:
def verify_and_clean_input(input_dict):
    output_dict = {}
    try:
        output_dict["Account ID"] = int(input_dict["Account ID"])
        print(output_dict["Account ID"])
        if output_dict["Account ID"] < 0:
            return False, {}, "not a valid account id"
    except ValueError:
        return False, {}, "not a valid account id"
    output_dict["Created On"] = pd.to_datetime(input_dict["Created On"], errors='coerce')
    
    output_dict["First Name"] = input_dict["First Name"]
        
    return True, output_dict, ""

In [56]:
def query_and_extend(input_dict):
    output_dict = input_dict
    account_id = input_dict["Account ID"]
    is_valid, returned_dict, message = query(account_id)
        
    # go ahead and try again once after backoff in case of connection problem, 
    # having problem with first query being denied bc of network problems
    if not is_valid: 
        time.sleep(1)
        is_valid, returned_dict, message = query(account_id)

    if is_valid: #set new df_out values
        try:
            # set Status Set On to response's created_on value for account_id
            # could introduce check for status in set ("good", "bad", "")
            output_dict["Status"] = returned_dict["status"]
            status_returned = True
            message = f"Processed Account ID - {account_id} - Status: {returned_dict['status']} "
        except KeyError:
            status_returned = False
            output_dict["Status"] = ""
            output_dict["Status Set On"] = ""
            message = f"No status data included for Account ID - {account_id} "
            
        if status_returned: # only try and set status date if status successfully set
            try:
                # set Status Set On to response's created_on value for account_id
                # here choosing to write nan in event of bad date
                output_dict["Status Set On"] = pd.to_datetime(returned_dict["created_on"], errors='raise') 
            except KeyError:
                output_dict["Status Set On"] = ""
                message = f"No created_on date included for Account ID - {account_id}"
            except ValueError:
                output_dict["Status Set On"] = ""
                message = f"Invalid date included for Account ID - {account_id}"
    else:
        output_dict["Status"] = ""
        output_dict["Status Set On"] = ""
    return output_dict, message

In [47]:
# query accounts/account_id and determine if non-error json is returned
# returns validity of response and the response
def query(account_id):
    is_valid = True
    returned_dict = {}
    message = ""
    request = requests.get(f"http://interview.wpengine.io/v1/accounts/{account_id}")
    try: # check parseable response
        returned_dict = request.json()
    except ValueError:
        message = f"response not json parseable for account id {account_id}"
        is_valid = False
    try: # check not error message
        message = f"Account ID - {account_id} - {returned_dict['details']}"
        is_valid = False
    except KeyError:
        pass
    return is_valid, returned_dict, message

In [58]:
def clean_and_write(input_dict, csvwriter):
    write_dict = input_dict
    try:
        write_dict["Created On"] = write_dict["Created On"].strftime("%x")
    except ValueError:
        write_dict["Created On"] = ""
    try:
        write_dict["Status Set On"] = write_dict["Status Set On"].strftime("%x")
    except AttributeError:
        write_dict["Status Set On"] = ""
        
    csvwriter.writerow(write_dict)
    